arm.c (arm_get_frame_offsets): Revamp long lines.
[gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
32 #include "calls.h"
33 #include "varasm.h"
34 #include "obstack.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "reload.h"
43 #include "function.h"
44 #include "expr.h"
45 #include "optabs.h"
46 #include "diagnostic-core.h"
47 #include "recog.h"
48 #include "cgraph.h"
49 #include "ggc.h"
50 #include "except.h"
51 #include "tm_p.h"
52 #include "target.h"
53 #include "target-def.h"
54 #include "debug.h"
55 #include "langhooks.h"
56 #include "df.h"
57 #include "intl.h"
58 #include "libfuncs.h"
59 #include "params.h"
60 #include "opts.h"
61 #include "dumpfile.h"
62
63 /* Forward definitions of types. */
64 typedef struct minipool_node Mnode;
65 typedef struct minipool_fixup Mfix;
66
67 void (*arm_lang_output_object_attributes_hook)(void);
68
69 struct four_ints
70 {
71 int i[4];
72 };
73
74 /* Forward function declarations. */
75 static bool arm_lra_p (void);
76 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
77 static int arm_compute_static_chain_stack_bytes (void);
78 static arm_stack_offsets *arm_get_frame_offsets (void);
79 static void arm_add_gc_roots (void);
80 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
81 HOST_WIDE_INT, rtx, rtx, int, int);
82 static unsigned bit_count (unsigned long);
83 static int arm_address_register_rtx_p (rtx, int);
84 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
85 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
86 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
87 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
88 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
89 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
90 inline static int thumb1_index_register_rtx_p (rtx, int);
91 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
92 static int thumb_far_jump_used_p (void);
93 static bool thumb_force_lr_save (void);
94 static unsigned arm_size_return_regs (void);
95 static bool arm_assemble_integer (rtx, unsigned int, int);
96 static void arm_print_operand (FILE *, rtx, int);
97 static void arm_print_operand_address (FILE *, rtx);
98 static bool arm_print_operand_punct_valid_p (unsigned char code);
99 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
100 static arm_cc get_arm_condition_code (rtx);
101 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
102 static const char *output_multi_immediate (rtx *, const char *, const char *,
103 int, HOST_WIDE_INT);
104 static const char *shift_op (rtx, HOST_WIDE_INT *);
105 static struct machine_function *arm_init_machine_status (void);
106 static void thumb_exit (FILE *, int);
107 static HOST_WIDE_INT get_jump_table_size (rtx);
108 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
109 static Mnode *add_minipool_forward_ref (Mfix *);
110 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
111 static Mnode *add_minipool_backward_ref (Mfix *);
112 static void assign_minipool_offsets (Mfix *);
113 static void arm_print_value (FILE *, rtx);
114 static void dump_minipool (rtx);
115 static int arm_barrier_cost (rtx);
116 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
117 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
118 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
119 rtx);
120 static void arm_reorg (void);
121 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
122 static unsigned long arm_compute_save_reg0_reg12_mask (void);
123 static unsigned long arm_compute_save_reg_mask (void);
124 static unsigned long arm_isr_value (tree);
125 static unsigned long arm_compute_func_type (void);
126 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
127 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
128 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
129 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
130 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
131 #endif
132 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
133 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
134 static int arm_comp_type_attributes (const_tree, const_tree);
135 static void arm_set_default_type_attributes (tree);
136 static int arm_adjust_cost (rtx, rtx, rtx, int);
137 static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
138 static int optimal_immediate_sequence (enum rtx_code code,
139 unsigned HOST_WIDE_INT val,
140 struct four_ints *return_sequence);
141 static int optimal_immediate_sequence_1 (enum rtx_code code,
142 unsigned HOST_WIDE_INT val,
143 struct four_ints *return_sequence,
144 int i);
145 static int arm_get_strip_length (int);
146 static bool arm_function_ok_for_sibcall (tree, tree);
147 static enum machine_mode arm_promote_function_mode (const_tree,
148 enum machine_mode, int *,
149 const_tree, int);
150 static bool arm_return_in_memory (const_tree, const_tree);
151 static rtx arm_function_value (const_tree, const_tree, bool);
152 static rtx arm_libcall_value_1 (enum machine_mode);
153 static rtx arm_libcall_value (enum machine_mode, const_rtx);
154 static bool arm_function_value_regno_p (const unsigned int);
155 static void arm_internal_label (FILE *, const char *, unsigned long);
156 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
157 tree);
158 static bool arm_have_conditional_execution (void);
159 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
160 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
161 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
162 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
163 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
164 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
165 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
166 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
167 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
168 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
169 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
170 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
171 static void arm_init_builtins (void);
172 static void arm_init_iwmmxt_builtins (void);
173 static rtx safe_vector_operand (rtx, enum machine_mode);
174 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
175 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
176 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
177 static tree arm_builtin_decl (unsigned, bool);
178 static void emit_constant_insn (rtx cond, rtx pattern);
179 static rtx emit_set_insn (rtx, rtx);
180 static rtx emit_multi_reg_push (unsigned long);
181 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
182 tree, bool);
183 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
184 const_tree, bool);
185 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
186 const_tree, bool);
187 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
188 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
189 const_tree);
190 static rtx aapcs_libcall_value (enum machine_mode);
191 static int aapcs_select_return_coproc (const_tree, const_tree);
192
193 #ifdef OBJECT_FORMAT_ELF
194 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
195 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
196 #endif
197 #ifndef ARM_PE
198 static void arm_encode_section_info (tree, rtx, int);
199 #endif
200
201 static void arm_file_end (void);
202 static void arm_file_start (void);
203
204 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
205 tree, int *, int);
206 static bool arm_pass_by_reference (cumulative_args_t,
207 enum machine_mode, const_tree, bool);
208 static bool arm_promote_prototypes (const_tree);
209 static bool arm_default_short_enums (void);
210 static bool arm_align_anon_bitfield (void);
211 static bool arm_return_in_msb (const_tree);
212 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
213 static bool arm_return_in_memory (const_tree, const_tree);
214 #if ARM_UNWIND_INFO
215 static void arm_unwind_emit (FILE *, rtx);
216 static bool arm_output_ttype (rtx);
217 static void arm_asm_emit_except_personality (rtx);
218 static void arm_asm_init_sections (void);
219 #endif
220 static rtx arm_dwarf_register_span (rtx);
221
222 static tree arm_cxx_guard_type (void);
223 static bool arm_cxx_guard_mask_bit (void);
224 static tree arm_get_cookie_size (tree);
225 static bool arm_cookie_has_size (void);
226 static bool arm_cxx_cdtor_returns_this (void);
227 static bool arm_cxx_key_method_may_be_inline (void);
228 static void arm_cxx_determine_class_data_visibility (tree);
229 static bool arm_cxx_class_data_always_comdat (void);
230 static bool arm_cxx_use_aeabi_atexit (void);
231 static void arm_init_libfuncs (void);
232 static tree arm_build_builtin_va_list (void);
233 static void arm_expand_builtin_va_start (tree, rtx);
234 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
235 static void arm_option_override (void);
236 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
237 static bool arm_cannot_copy_insn_p (rtx);
238 static bool arm_tls_symbol_p (rtx x);
239 static int arm_issue_rate (void);
240 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
241 static bool arm_output_addr_const_extra (FILE *, rtx);
242 static bool arm_allocate_stack_slots_for_args (void);
243 static bool arm_warn_func_return (tree);
244 static const char *arm_invalid_parameter_type (const_tree t);
245 static const char *arm_invalid_return_type (const_tree t);
246 static tree arm_promoted_type (const_tree t);
247 static tree arm_convert_to_type (tree type, tree expr);
248 static bool arm_scalar_mode_supported_p (enum machine_mode);
249 static bool arm_frame_pointer_required (void);
250 static bool arm_can_eliminate (const int, const int);
251 static void arm_asm_trampoline_template (FILE *);
252 static void arm_trampoline_init (rtx, tree, rtx);
253 static rtx arm_trampoline_adjust_address (rtx);
254 static rtx arm_pic_static_addr (rtx orig, rtx reg);
255 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
256 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
257 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
258 static bool arm_array_mode_supported_p (enum machine_mode,
259 unsigned HOST_WIDE_INT);
260 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
261 static bool arm_class_likely_spilled_p (reg_class_t);
262 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
263 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
264 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
265 const_tree type,
266 int misalignment,
267 bool is_packed);
268 static void arm_conditional_register_usage (void);
269 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
270 static unsigned int arm_autovectorize_vector_sizes (void);
271 static int arm_default_branch_cost (bool, bool);
272 static int arm_cortex_a5_branch_cost (bool, bool);
273 static int arm_cortex_m_branch_cost (bool, bool);
274
275 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
276 const unsigned char *sel);
277
278 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
279 tree vectype,
280 int misalign ATTRIBUTE_UNUSED);
281 static unsigned arm_add_stmt_cost (void *data, int count,
282 enum vect_cost_for_stmt kind,
283 struct _stmt_vec_info *stmt_info,
284 int misalign,
285 enum vect_cost_model_location where);
286
287 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
288 bool op0_preserve_value);
289 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
290 \f
291 /* Table of machine attributes. */
292 static const struct attribute_spec arm_attribute_table[] =
293 {
294 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
295 affects_type_identity } */
296 /* Function calls made to this symbol must be done indirectly, because
297 it may lie outside of the 26 bit addressing range of a normal function
298 call. */
299 { "long_call", 0, 0, false, true, true, NULL, false },
300 /* Whereas these functions are always known to reside within the 26 bit
301 addressing range. */
302 { "short_call", 0, 0, false, true, true, NULL, false },
303 /* Specify the procedure call conventions for a function. */
304 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
305 false },
306 /* Interrupt Service Routines have special prologue and epilogue requirements. */
307 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
308 false },
309 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
310 false },
311 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
312 false },
313 #ifdef ARM_PE
314 /* ARM/PE has three new attributes:
315 interfacearm - ?
316 dllexport - for exporting a function/variable that will live in a dll
317 dllimport - for importing a function/variable from a dll
318
319 Microsoft allows multiple declspecs in one __declspec, separating
320 them with spaces. We do NOT support this. Instead, use __declspec
321 multiple times.
322 */
323 { "dllimport", 0, 0, true, false, false, NULL, false },
324 { "dllexport", 0, 0, true, false, false, NULL, false },
325 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
326 false },
327 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
328 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
329 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
330 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
331 false },
332 #endif
333 { NULL, 0, 0, false, false, false, NULL, false }
334 };
335 \f
336 /* Initialize the GCC target structure. */
337 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
338 #undef TARGET_MERGE_DECL_ATTRIBUTES
339 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
340 #endif
341
342 #undef TARGET_LEGITIMIZE_ADDRESS
343 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
344
345 #undef TARGET_LRA_P
346 #define TARGET_LRA_P arm_lra_p
347
348 #undef TARGET_ATTRIBUTE_TABLE
349 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
350
351 #undef TARGET_ASM_FILE_START
352 #define TARGET_ASM_FILE_START arm_file_start
353 #undef TARGET_ASM_FILE_END
354 #define TARGET_ASM_FILE_END arm_file_end
355
356 #undef TARGET_ASM_ALIGNED_SI_OP
357 #define TARGET_ASM_ALIGNED_SI_OP NULL
358 #undef TARGET_ASM_INTEGER
359 #define TARGET_ASM_INTEGER arm_assemble_integer
360
361 #undef TARGET_PRINT_OPERAND
362 #define TARGET_PRINT_OPERAND arm_print_operand
363 #undef TARGET_PRINT_OPERAND_ADDRESS
364 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
365 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
366 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
367
368 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
369 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
370
371 #undef TARGET_ASM_FUNCTION_PROLOGUE
372 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
373
374 #undef TARGET_ASM_FUNCTION_EPILOGUE
375 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
376
377 #undef TARGET_OPTION_OVERRIDE
378 #define TARGET_OPTION_OVERRIDE arm_option_override
379
380 #undef TARGET_COMP_TYPE_ATTRIBUTES
381 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
382
383 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
384 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
385
386 #undef TARGET_SCHED_ADJUST_COST
387 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
388
389 #undef TARGET_SCHED_REORDER
390 #define TARGET_SCHED_REORDER arm_sched_reorder
391
392 #undef TARGET_REGISTER_MOVE_COST
393 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
394
395 #undef TARGET_MEMORY_MOVE_COST
396 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
397
398 #undef TARGET_ENCODE_SECTION_INFO
399 #ifdef ARM_PE
400 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
401 #else
402 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
403 #endif
404
405 #undef TARGET_STRIP_NAME_ENCODING
406 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
407
408 #undef TARGET_ASM_INTERNAL_LABEL
409 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
410
411 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
412 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
413
414 #undef TARGET_FUNCTION_VALUE
415 #define TARGET_FUNCTION_VALUE arm_function_value
416
417 #undef TARGET_LIBCALL_VALUE
418 #define TARGET_LIBCALL_VALUE arm_libcall_value
419
420 #undef TARGET_FUNCTION_VALUE_REGNO_P
421 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
422
423 #undef TARGET_ASM_OUTPUT_MI_THUNK
424 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
425 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
426 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
427
428 #undef TARGET_RTX_COSTS
429 #define TARGET_RTX_COSTS arm_rtx_costs
430 #undef TARGET_ADDRESS_COST
431 #define TARGET_ADDRESS_COST arm_address_cost
432
433 #undef TARGET_SHIFT_TRUNCATION_MASK
434 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
435 #undef TARGET_VECTOR_MODE_SUPPORTED_P
436 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
437 #undef TARGET_ARRAY_MODE_SUPPORTED_P
438 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
439 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
440 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
441 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
442 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
443 arm_autovectorize_vector_sizes
444
445 #undef TARGET_MACHINE_DEPENDENT_REORG
446 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
447
448 #undef TARGET_INIT_BUILTINS
449 #define TARGET_INIT_BUILTINS arm_init_builtins
450 #undef TARGET_EXPAND_BUILTIN
451 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
452 #undef TARGET_BUILTIN_DECL
453 #define TARGET_BUILTIN_DECL arm_builtin_decl
454
455 #undef TARGET_INIT_LIBFUNCS
456 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
457
458 #undef TARGET_PROMOTE_FUNCTION_MODE
459 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
460 #undef TARGET_PROMOTE_PROTOTYPES
461 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
462 #undef TARGET_PASS_BY_REFERENCE
463 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
464 #undef TARGET_ARG_PARTIAL_BYTES
465 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
466 #undef TARGET_FUNCTION_ARG
467 #define TARGET_FUNCTION_ARG arm_function_arg
468 #undef TARGET_FUNCTION_ARG_ADVANCE
469 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
470 #undef TARGET_FUNCTION_ARG_BOUNDARY
471 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
472
473 #undef TARGET_SETUP_INCOMING_VARARGS
474 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
475
476 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
477 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
478
479 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
480 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
481 #undef TARGET_TRAMPOLINE_INIT
482 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
483 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
484 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
485
486 #undef TARGET_WARN_FUNC_RETURN
487 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
488
489 #undef TARGET_DEFAULT_SHORT_ENUMS
490 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
491
492 #undef TARGET_ALIGN_ANON_BITFIELD
493 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
494
495 #undef TARGET_NARROW_VOLATILE_BITFIELD
496 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
497
498 #undef TARGET_CXX_GUARD_TYPE
499 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
500
501 #undef TARGET_CXX_GUARD_MASK_BIT
502 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
503
504 #undef TARGET_CXX_GET_COOKIE_SIZE
505 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
506
507 #undef TARGET_CXX_COOKIE_HAS_SIZE
508 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
509
510 #undef TARGET_CXX_CDTOR_RETURNS_THIS
511 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
512
513 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
514 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
515
516 #undef TARGET_CXX_USE_AEABI_ATEXIT
517 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
518
519 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
520 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
521 arm_cxx_determine_class_data_visibility
522
523 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
524 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
525
526 #undef TARGET_RETURN_IN_MSB
527 #define TARGET_RETURN_IN_MSB arm_return_in_msb
528
529 #undef TARGET_RETURN_IN_MEMORY
530 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
531
532 #undef TARGET_MUST_PASS_IN_STACK
533 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
534
535 #if ARM_UNWIND_INFO
536 #undef TARGET_ASM_UNWIND_EMIT
537 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
538
539 /* EABI unwinding tables use a different format for the typeinfo tables. */
540 #undef TARGET_ASM_TTYPE
541 #define TARGET_ASM_TTYPE arm_output_ttype
542
543 #undef TARGET_ARM_EABI_UNWINDER
544 #define TARGET_ARM_EABI_UNWINDER true
545
546 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
547 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
548
549 #undef TARGET_ASM_INIT_SECTIONS
550 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
551 #endif /* ARM_UNWIND_INFO */
552
553 #undef TARGET_DWARF_REGISTER_SPAN
554 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
555
556 #undef TARGET_CANNOT_COPY_INSN_P
557 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
558
559 #ifdef HAVE_AS_TLS
560 #undef TARGET_HAVE_TLS
561 #define TARGET_HAVE_TLS true
562 #endif
563
564 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
565 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
566
567 #undef TARGET_LEGITIMATE_CONSTANT_P
568 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
569
570 #undef TARGET_CANNOT_FORCE_CONST_MEM
571 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
572
573 #undef TARGET_MAX_ANCHOR_OFFSET
574 #define TARGET_MAX_ANCHOR_OFFSET 4095
575
576 /* The minimum is set such that the total size of the block
577 for a particular anchor is -4088 + 1 + 4095 bytes, which is
578 divisible by eight, ensuring natural spacing of anchors. */
579 #undef TARGET_MIN_ANCHOR_OFFSET
580 #define TARGET_MIN_ANCHOR_OFFSET -4088
581
582 #undef TARGET_SCHED_ISSUE_RATE
583 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
584
585 #undef TARGET_MANGLE_TYPE
586 #define TARGET_MANGLE_TYPE arm_mangle_type
587
588 #undef TARGET_BUILD_BUILTIN_VA_LIST
589 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
590 #undef TARGET_EXPAND_BUILTIN_VA_START
591 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
592 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
593 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
594
595 #ifdef HAVE_AS_TLS
596 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
597 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
598 #endif
599
600 #undef TARGET_LEGITIMATE_ADDRESS_P
601 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
602
603 #undef TARGET_PREFERRED_RELOAD_CLASS
604 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
605
606 #undef TARGET_INVALID_PARAMETER_TYPE
607 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
608
609 #undef TARGET_INVALID_RETURN_TYPE
610 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
611
612 #undef TARGET_PROMOTED_TYPE
613 #define TARGET_PROMOTED_TYPE arm_promoted_type
614
615 #undef TARGET_CONVERT_TO_TYPE
616 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
617
618 #undef TARGET_SCALAR_MODE_SUPPORTED_P
619 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
620
621 #undef TARGET_FRAME_POINTER_REQUIRED
622 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
623
624 #undef TARGET_CAN_ELIMINATE
625 #define TARGET_CAN_ELIMINATE arm_can_eliminate
626
627 #undef TARGET_CONDITIONAL_REGISTER_USAGE
628 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
629
630 #undef TARGET_CLASS_LIKELY_SPILLED_P
631 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
632
633 #undef TARGET_VECTORIZE_BUILTINS
634 #define TARGET_VECTORIZE_BUILTINS
635
636 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
637 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
638 arm_builtin_vectorized_function
639
640 #undef TARGET_VECTOR_ALIGNMENT
641 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
642
643 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
644 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
645 arm_vector_alignment_reachable
646
647 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
648 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
649 arm_builtin_support_vector_misalignment
650
651 #undef TARGET_PREFERRED_RENAME_CLASS
652 #define TARGET_PREFERRED_RENAME_CLASS \
653 arm_preferred_rename_class
654
655 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
656 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
657 arm_vectorize_vec_perm_const_ok
658
659 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
660 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
661 arm_builtin_vectorization_cost
662 #undef TARGET_VECTORIZE_ADD_STMT_COST
663 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
664
665 #undef TARGET_CANONICALIZE_COMPARISON
666 #define TARGET_CANONICALIZE_COMPARISON \
667 arm_canonicalize_comparison
668
669 #undef TARGET_ASAN_SHADOW_OFFSET
670 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
671
672 #undef MAX_INSN_PER_IT_BLOCK
673 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
674
675 #undef TARGET_CAN_USE_DOLOOP_P
676 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
677
678 struct gcc_target targetm = TARGET_INITIALIZER;
679 \f
680 /* Obstack for minipool constant handling. */
681 static struct obstack minipool_obstack;
682 static char * minipool_startobj;
683
684 /* The maximum number of insns skipped which
685 will be conditionalised if possible. */
686 static int max_insns_skipped = 5;
687
688 extern FILE * asm_out_file;
689
690 /* True if we are currently building a constant table. */
691 int making_const_table;
692
693 /* The processor for which instructions should be scheduled. */
694 enum processor_type arm_tune = arm_none;
695
696 /* The current tuning set. */
697 const struct tune_params *current_tune;
698
699 /* Which floating point hardware to schedule for. */
700 int arm_fpu_attr;
701
702 /* Which floating popint hardware to use. */
703 const struct arm_fpu_desc *arm_fpu_desc;
704
705 /* Used for Thumb call_via trampolines. */
706 rtx thumb_call_via_label[14];
707 static int thumb_call_reg_needed;
708
709 /* Bit values used to identify processor capabilities. */
710 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
711 #define FL_ARCH3M (1 << 1) /* Extended multiply */
712 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
713 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
714 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
715 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
716 #define FL_THUMB (1 << 6) /* Thumb aware */
717 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
718 #define FL_STRONG (1 << 8) /* StrongARM */
719 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
720 #define FL_XSCALE (1 << 10) /* XScale */
721 /* spare (1 << 11) */
722 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
723 media instructions. */
724 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
725 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
726 Note: ARM6 & 7 derivatives only. */
727 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
728 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
729 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
730 profile. */
731 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
732 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
733 #define FL_NEON (1 << 20) /* Neon instructions. */
734 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
735 architecture. */
736 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
737 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
738 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
739 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
740
741 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
742 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
743
744 /* Flags that only effect tuning, not available instructions. */
745 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
746 | FL_CO_PROC)
747
748 #define FL_FOR_ARCH2 FL_NOTM
749 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
750 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
751 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
752 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
753 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
754 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
755 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
756 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
757 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
758 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
759 #define FL_FOR_ARCH6J FL_FOR_ARCH6
760 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
761 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
762 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
763 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
764 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
765 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
766 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
767 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
768 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
769 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
770 #define FL_FOR_ARCH8A (FL_FOR_ARCH7 | FL_ARCH6K | FL_ARCH8 | FL_THUMB_DIV \
771 | FL_ARM_DIV | FL_NOTM)
772
773 /* The bits in this mask specify which
774 instructions we are allowed to generate. */
775 static unsigned long insn_flags = 0;
776
777 /* The bits in this mask specify which instruction scheduling options should
778 be used. */
779 static unsigned long tune_flags = 0;
780
781 /* The highest ARM architecture version supported by the
782 target. */
783 enum base_architecture arm_base_arch = BASE_ARCH_0;
784
785 /* The following are used in the arm.md file as equivalents to bits
786 in the above two flag variables. */
787
788 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
789 int arm_arch3m = 0;
790
791 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
792 int arm_arch4 = 0;
793
794 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
795 int arm_arch4t = 0;
796
797 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
798 int arm_arch5 = 0;
799
800 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
801 int arm_arch5e = 0;
802
803 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
804 int arm_arch6 = 0;
805
806 /* Nonzero if this chip supports the ARM 6K extensions. */
807 int arm_arch6k = 0;
808
809 /* Nonzero if instructions present in ARMv6-M can be used. */
810 int arm_arch6m = 0;
811
812 /* Nonzero if this chip supports the ARM 7 extensions. */
813 int arm_arch7 = 0;
814
815 /* Nonzero if instructions not present in the 'M' profile can be used. */
816 int arm_arch_notm = 0;
817
818 /* Nonzero if instructions present in ARMv7E-M can be used. */
819 int arm_arch7em = 0;
820
821 /* Nonzero if instructions present in ARMv8 can be used. */
822 int arm_arch8 = 0;
823
824 /* Nonzero if this chip can benefit from load scheduling. */
825 int arm_ld_sched = 0;
826
827 /* Nonzero if this chip is a StrongARM. */
828 int arm_tune_strongarm = 0;
829
830 /* Nonzero if this chip supports Intel Wireless MMX technology. */
831 int arm_arch_iwmmxt = 0;
832
833 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
834 int arm_arch_iwmmxt2 = 0;
835
836 /* Nonzero if this chip is an XScale. */
837 int arm_arch_xscale = 0;
838
839 /* Nonzero if tuning for XScale */
840 int arm_tune_xscale = 0;
841
842 /* Nonzero if we want to tune for stores that access the write-buffer.
843 This typically means an ARM6 or ARM7 with MMU or MPU. */
844 int arm_tune_wbuf = 0;
845
846 /* Nonzero if tuning for Cortex-A9. */
847 int arm_tune_cortex_a9 = 0;
848
849 /* Nonzero if generating Thumb instructions. */
850 int thumb_code = 0;
851
852 /* Nonzero if generating Thumb-1 instructions. */
853 int thumb1_code = 0;
854
855 /* Nonzero if we should define __THUMB_INTERWORK__ in the
856 preprocessor.
857 XXX This is a bit of a hack, it's intended to help work around
858 problems in GLD which doesn't understand that armv5t code is
859 interworking clean. */
860 int arm_cpp_interwork = 0;
861
862 /* Nonzero if chip supports Thumb 2. */
863 int arm_arch_thumb2;
864
865 /* Nonzero if chip supports integer division instruction. */
866 int arm_arch_arm_hwdiv;
867 int arm_arch_thumb_hwdiv;
868
869 /* Nonzero if we should use Neon to handle 64-bits operations rather
870 than core registers. */
871 int prefer_neon_for_64bits = 0;
872
873 /* Nonzero if we shouldn't use literal pools. */
874 bool arm_disable_literal_pool = false;
875
876 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
877 we must report the mode of the memory reference from
878 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
879 enum machine_mode output_memory_reference_mode;
880
881 /* The register number to be used for the PIC offset register. */
882 unsigned arm_pic_register = INVALID_REGNUM;
883
884 /* Set to 1 after arm_reorg has started. Reset to start at the start of
885 the next function. */
886 static int after_arm_reorg = 0;
887
888 enum arm_pcs arm_pcs_default;
889
890 /* For an explanation of these variables, see final_prescan_insn below. */
891 int arm_ccfsm_state;
892 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
893 enum arm_cond_code arm_current_cc;
894
895 rtx arm_target_insn;
896 int arm_target_label;
897 /* The number of conditionally executed insns, including the current insn. */
898 int arm_condexec_count = 0;
899 /* A bitmask specifying the patterns for the IT block.
900 Zero means do not output an IT block before this insn. */
901 int arm_condexec_mask = 0;
902 /* The number of bits used in arm_condexec_mask. */
903 int arm_condexec_masklen = 0;
904
905 /* Nonzero if chip supports the ARMv8 CRC instructions. */
906 int arm_arch_crc = 0;
907
908 /* The condition codes of the ARM, and the inverse function. */
909 static const char * const arm_condition_codes[] =
910 {
911 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
912 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
913 };
914
915 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
916 int arm_regs_in_sequence[] =
917 {
918 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
919 };
920
921 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
922 #define streq(string1, string2) (strcmp (string1, string2) == 0)
923
924 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
925 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
926 | (1 << PIC_OFFSET_TABLE_REGNUM)))
927 \f
928 /* Initialization code. */
929
930 struct processors
931 {
932 const char *const name;
933 enum processor_type core;
934 const char *arch;
935 enum base_architecture base_arch;
936 const unsigned long flags;
937 const struct tune_params *const tune;
938 };
939
940
941 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
942 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
943 prefetch_slots, \
944 l1_size, \
945 l1_line_size
946
947 /* arm generic vectorizer costs. */
948 static const
949 struct cpu_vec_costs arm_default_vec_cost = {
950 1, /* scalar_stmt_cost. */
951 1, /* scalar load_cost. */
952 1, /* scalar_store_cost. */
953 1, /* vec_stmt_cost. */
954 1, /* vec_to_scalar_cost. */
955 1, /* scalar_to_vec_cost. */
956 1, /* vec_align_load_cost. */
957 1, /* vec_unalign_load_cost. */
958 1, /* vec_unalign_store_cost. */
959 1, /* vec_store_cost. */
960 3, /* cond_taken_branch_cost. */
961 1, /* cond_not_taken_branch_cost. */
962 };
963
964 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
965 #include "aarch-cost-tables.h"
966
967
968
969 const struct cpu_cost_table cortexa9_extra_costs =
970 {
971 /* ALU */
972 {
973 0, /* Arith. */
974 0, /* Logical. */
975 0, /* Shift. */
976 COSTS_N_INSNS (1), /* Shift_reg. */
977 COSTS_N_INSNS (1), /* Arith_shift. */
978 COSTS_N_INSNS (2), /* Arith_shift_reg. */
979 0, /* Log_shift. */
980 COSTS_N_INSNS (1), /* Log_shift_reg. */
981 COSTS_N_INSNS (1), /* Extend. */
982 COSTS_N_INSNS (2), /* Extend_arith. */
983 COSTS_N_INSNS (1), /* Bfi. */
984 COSTS_N_INSNS (1), /* Bfx. */
985 0, /* Clz. */
986 0, /* non_exec. */
987 true /* non_exec_costs_exec. */
988 },
989 {
990 /* MULT SImode */
991 {
992 COSTS_N_INSNS (3), /* Simple. */
993 COSTS_N_INSNS (3), /* Flag_setting. */
994 COSTS_N_INSNS (2), /* Extend. */
995 COSTS_N_INSNS (3), /* Add. */
996 COSTS_N_INSNS (2), /* Extend_add. */
997 COSTS_N_INSNS (30) /* Idiv. No HW div on Cortex A9. */
998 },
999 /* MULT DImode */
1000 {
1001 0, /* Simple (N/A). */
1002 0, /* Flag_setting (N/A). */
1003 COSTS_N_INSNS (4), /* Extend. */
1004 0, /* Add (N/A). */
1005 COSTS_N_INSNS (4), /* Extend_add. */
1006 0 /* Idiv (N/A). */
1007 }
1008 },
1009 /* LD/ST */
1010 {
1011 COSTS_N_INSNS (2), /* Load. */
1012 COSTS_N_INSNS (2), /* Load_sign_extend. */
1013 COSTS_N_INSNS (2), /* Ldrd. */
1014 COSTS_N_INSNS (2), /* Ldm_1st. */
1015 1, /* Ldm_regs_per_insn_1st. */
1016 2, /* Ldm_regs_per_insn_subsequent. */
1017 COSTS_N_INSNS (5), /* Loadf. */
1018 COSTS_N_INSNS (5), /* Loadd. */
1019 COSTS_N_INSNS (1), /* Load_unaligned. */
1020 COSTS_N_INSNS (2), /* Store. */
1021 COSTS_N_INSNS (2), /* Strd. */
1022 COSTS_N_INSNS (2), /* Stm_1st. */
1023 1, /* Stm_regs_per_insn_1st. */
1024 2, /* Stm_regs_per_insn_subsequent. */
1025 COSTS_N_INSNS (1), /* Storef. */
1026 COSTS_N_INSNS (1), /* Stored. */
1027 COSTS_N_INSNS (1) /* Store_unaligned. */
1028 },
1029 {
1030 /* FP SFmode */
1031 {
1032 COSTS_N_INSNS (14), /* Div. */
1033 COSTS_N_INSNS (4), /* Mult. */
1034 COSTS_N_INSNS (7), /* Mult_addsub. */
1035 COSTS_N_INSNS (30), /* Fma. */
1036 COSTS_N_INSNS (3), /* Addsub. */
1037 COSTS_N_INSNS (1), /* Fpconst. */
1038 COSTS_N_INSNS (1), /* Neg. */
1039 COSTS_N_INSNS (3), /* Compare. */
1040 COSTS_N_INSNS (3), /* Widen. */
1041 COSTS_N_INSNS (3), /* Narrow. */
1042 COSTS_N_INSNS (3), /* Toint. */
1043 COSTS_N_INSNS (3), /* Fromint. */
1044 COSTS_N_INSNS (3) /* Roundint. */
1045 },
1046 /* FP DFmode */
1047 {
1048 COSTS_N_INSNS (24), /* Div. */
1049 COSTS_N_INSNS (5), /* Mult. */
1050 COSTS_N_INSNS (8), /* Mult_addsub. */
1051 COSTS_N_INSNS (30), /* Fma. */
1052 COSTS_N_INSNS (3), /* Addsub. */
1053 COSTS_N_INSNS (1), /* Fpconst. */
1054 COSTS_N_INSNS (1), /* Neg. */
1055 COSTS_N_INSNS (3), /* Compare. */
1056 COSTS_N_INSNS (3), /* Widen. */
1057 COSTS_N_INSNS (3), /* Narrow. */
1058 COSTS_N_INSNS (3), /* Toint. */
1059 COSTS_N_INSNS (3), /* Fromint. */
1060 COSTS_N_INSNS (3) /* Roundint. */
1061 }
1062 },
1063 /* Vector */
1064 {
1065 COSTS_N_INSNS (1) /* Alu. */
1066 }
1067 };
1068
1069
1070 const struct cpu_cost_table cortexa7_extra_costs =
1071 {
1072 /* ALU */
1073 {
1074 0, /* Arith. */
1075 0, /* Logical. */
1076 COSTS_N_INSNS (1), /* Shift. */
1077 COSTS_N_INSNS (1), /* Shift_reg. */
1078 COSTS_N_INSNS (1), /* Arith_shift. */
1079 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1080 COSTS_N_INSNS (1), /* Log_shift. */
1081 COSTS_N_INSNS (1), /* Log_shift_reg. */
1082 COSTS_N_INSNS (1), /* Extend. */
1083 COSTS_N_INSNS (1), /* Extend_arith. */
1084 COSTS_N_INSNS (1), /* Bfi. */
1085 COSTS_N_INSNS (1), /* Bfx. */
1086 COSTS_N_INSNS (1), /* Clz. */
1087 0, /* non_exec. */
1088 true /* non_exec_costs_exec. */
1089 },
1090
1091 {
1092 /* MULT SImode */
1093 {
1094 0, /* Simple. */
1095 COSTS_N_INSNS (1), /* Flag_setting. */
1096 COSTS_N_INSNS (1), /* Extend. */
1097 COSTS_N_INSNS (1), /* Add. */
1098 COSTS_N_INSNS (1), /* Extend_add. */
1099 COSTS_N_INSNS (7) /* Idiv. */
1100 },
1101 /* MULT DImode */
1102 {
1103 0, /* Simple (N/A). */
1104 0, /* Flag_setting (N/A). */
1105 COSTS_N_INSNS (1), /* Extend. */
1106 0, /* Add. */
1107 COSTS_N_INSNS (2), /* Extend_add. */
1108 0 /* Idiv (N/A). */
1109 }
1110 },
1111 /* LD/ST */
1112 {
1113 COSTS_N_INSNS (1), /* Load. */
1114 COSTS_N_INSNS (1), /* Load_sign_extend. */
1115 COSTS_N_INSNS (3), /* Ldrd. */
1116 COSTS_N_INSNS (1), /* Ldm_1st. */
1117 1, /* Ldm_regs_per_insn_1st. */
1118 2, /* Ldm_regs_per_insn_subsequent. */
1119 COSTS_N_INSNS (2), /* Loadf. */
1120 COSTS_N_INSNS (2), /* Loadd. */
1121 COSTS_N_INSNS (1), /* Load_unaligned. */
1122 COSTS_N_INSNS (1), /* Store. */
1123 COSTS_N_INSNS (3), /* Strd. */
1124 COSTS_N_INSNS (1), /* Stm_1st. */
1125 1, /* Stm_regs_per_insn_1st. */
1126 2, /* Stm_regs_per_insn_subsequent. */
1127 COSTS_N_INSNS (2), /* Storef. */
1128 COSTS_N_INSNS (2), /* Stored. */
1129 COSTS_N_INSNS (1) /* Store_unaligned. */
1130 },
1131 {
1132 /* FP SFmode */
1133 {
1134 COSTS_N_INSNS (15), /* Div. */
1135 COSTS_N_INSNS (3), /* Mult. */
1136 COSTS_N_INSNS (7), /* Mult_addsub. */
1137 COSTS_N_INSNS (7), /* Fma. */
1138 COSTS_N_INSNS (3), /* Addsub. */
1139 COSTS_N_INSNS (3), /* Fpconst. */
1140 COSTS_N_INSNS (3), /* Neg. */
1141 COSTS_N_INSNS (3), /* Compare. */
1142 COSTS_N_INSNS (3), /* Widen. */
1143 COSTS_N_INSNS (3), /* Narrow. */
1144 COSTS_N_INSNS (3), /* Toint. */
1145 COSTS_N_INSNS (3), /* Fromint. */
1146 COSTS_N_INSNS (3) /* Roundint. */
1147 },
1148 /* FP DFmode */
1149 {
1150 COSTS_N_INSNS (30), /* Div. */
1151 COSTS_N_INSNS (6), /* Mult. */
1152 COSTS_N_INSNS (10), /* Mult_addsub. */
1153 COSTS_N_INSNS (7), /* Fma. */
1154 COSTS_N_INSNS (3), /* Addsub. */
1155 COSTS_N_INSNS (3), /* Fpconst. */
1156 COSTS_N_INSNS (3), /* Neg. */
1157 COSTS_N_INSNS (3), /* Compare. */
1158 COSTS_N_INSNS (3), /* Widen. */
1159 COSTS_N_INSNS (3), /* Narrow. */
1160 COSTS_N_INSNS (3), /* Toint. */
1161 COSTS_N_INSNS (3), /* Fromint. */
1162 COSTS_N_INSNS (3) /* Roundint. */
1163 }
1164 },
1165 /* Vector */
1166 {
1167 COSTS_N_INSNS (1) /* Alu. */
1168 }
1169 };
1170
1171 const struct cpu_cost_table cortexa12_extra_costs =
1172 {
1173 /* ALU */
1174 {
1175 0, /* Arith. */
1176 0, /* Logical. */
1177 0, /* Shift. */
1178 COSTS_N_INSNS (1), /* Shift_reg. */
1179 COSTS_N_INSNS (1), /* Arith_shift. */
1180 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1181 COSTS_N_INSNS (1), /* Log_shift. */
1182 COSTS_N_INSNS (1), /* Log_shift_reg. */
1183 0, /* Extend. */
1184 COSTS_N_INSNS (1), /* Extend_arith. */
1185 0, /* Bfi. */
1186 COSTS_N_INSNS (1), /* Bfx. */
1187 COSTS_N_INSNS (1), /* Clz. */
1188 0, /* non_exec. */
1189 true /* non_exec_costs_exec. */
1190 },
1191 /* MULT SImode */
1192 {
1193 {
1194 COSTS_N_INSNS (2), /* Simple. */
1195 COSTS_N_INSNS (3), /* Flag_setting. */
1196 COSTS_N_INSNS (2), /* Extend. */
1197 COSTS_N_INSNS (3), /* Add. */
1198 COSTS_N_INSNS (2), /* Extend_add. */
1199 COSTS_N_INSNS (18) /* Idiv. */
1200 },
1201 /* MULT DImode */
1202 {
1203 0, /* Simple (N/A). */
1204 0, /* Flag_setting (N/A). */
1205 COSTS_N_INSNS (3), /* Extend. */
1206 0, /* Add (N/A). */
1207 COSTS_N_INSNS (3), /* Extend_add. */
1208 0 /* Idiv (N/A). */
1209 }
1210 },
1211 /* LD/ST */
1212 {
1213 COSTS_N_INSNS (3), /* Load. */
1214 COSTS_N_INSNS (3), /* Load_sign_extend. */
1215 COSTS_N_INSNS (3), /* Ldrd. */
1216 COSTS_N_INSNS (3), /* Ldm_1st. */
1217 1, /* Ldm_regs_per_insn_1st. */
1218 2, /* Ldm_regs_per_insn_subsequent. */
1219 COSTS_N_INSNS (3), /* Loadf. */
1220 COSTS_N_INSNS (3), /* Loadd. */
1221 0, /* Load_unaligned. */
1222 0, /* Store. */
1223 0, /* Strd. */
1224 0, /* Stm_1st. */
1225 1, /* Stm_regs_per_insn_1st. */
1226 2, /* Stm_regs_per_insn_subsequent. */
1227 COSTS_N_INSNS (2), /* Storef. */
1228 COSTS_N_INSNS (2), /* Stored. */
1229 0 /* Store_unaligned. */
1230 },
1231 {
1232 /* FP SFmode */
1233 {
1234 COSTS_N_INSNS (17), /* Div. */
1235 COSTS_N_INSNS (4), /* Mult. */
1236 COSTS_N_INSNS (8), /* Mult_addsub. */
1237 COSTS_N_INSNS (8), /* Fma. */
1238 COSTS_N_INSNS (4), /* Addsub. */
1239 COSTS_N_INSNS (2), /* Fpconst. */
1240 COSTS_N_INSNS (2), /* Neg. */
1241 COSTS_N_INSNS (2), /* Compare. */
1242 COSTS_N_INSNS (4), /* Widen. */
1243 COSTS_N_INSNS (4), /* Narrow. */
1244 COSTS_N_INSNS (4), /* Toint. */
1245 COSTS_N_INSNS (4), /* Fromint. */
1246 COSTS_N_INSNS (4) /* Roundint. */
1247 },
1248 /* FP DFmode */
1249 {
1250 COSTS_N_INSNS (31), /* Div. */
1251 COSTS_N_INSNS (4), /* Mult. */
1252 COSTS_N_INSNS (8), /* Mult_addsub. */
1253 COSTS_N_INSNS (8), /* Fma. */
1254 COSTS_N_INSNS (4), /* Addsub. */
1255 COSTS_N_INSNS (2), /* Fpconst. */
1256 COSTS_N_INSNS (2), /* Neg. */
1257 COSTS_N_INSNS (2), /* Compare. */
1258 COSTS_N_INSNS (4), /* Widen. */
1259 COSTS_N_INSNS (4), /* Narrow. */
1260 COSTS_N_INSNS (4), /* Toint. */
1261 COSTS_N_INSNS (4), /* Fromint. */
1262 COSTS_N_INSNS (4) /* Roundint. */
1263 }
1264 },
1265 /* Vector */
1266 {
1267 COSTS_N_INSNS (1) /* Alu. */
1268 }
1269 };
1270
1271 const struct cpu_cost_table cortexa15_extra_costs =
1272 {
1273 /* ALU */
1274 {
1275 0, /* Arith. */
1276 0, /* Logical. */
1277 0, /* Shift. */
1278 0, /* Shift_reg. */
1279 COSTS_N_INSNS (1), /* Arith_shift. */
1280 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1281 COSTS_N_INSNS (1), /* Log_shift. */
1282 COSTS_N_INSNS (1), /* Log_shift_reg. */
1283 0, /* Extend. */
1284 COSTS_N_INSNS (1), /* Extend_arith. */
1285 COSTS_N_INSNS (1), /* Bfi. */
1286 0, /* Bfx. */
1287 0, /* Clz. */
1288 0, /* non_exec. */
1289 true /* non_exec_costs_exec. */
1290 },
1291 /* MULT SImode */
1292 {
1293 {
1294 COSTS_N_INSNS (2), /* Simple. */
1295 COSTS_N_INSNS (3), /* Flag_setting. */
1296 COSTS_N_INSNS (2), /* Extend. */
1297 COSTS_N_INSNS (2), /* Add. */
1298 COSTS_N_INSNS (2), /* Extend_add. */
1299 COSTS_N_INSNS (18) /* Idiv. */
1300 },
1301 /* MULT DImode */
1302 {
1303 0, /* Simple (N/A). */
1304 0, /* Flag_setting (N/A). */
1305 COSTS_N_INSNS (3), /* Extend. */
1306 0, /* Add (N/A). */
1307 COSTS_N_INSNS (3), /* Extend_add. */
1308 0 /* Idiv (N/A). */
1309 }
1310 },
1311 /* LD/ST */
1312 {
1313 COSTS_N_INSNS (3), /* Load. */
1314 COSTS_N_INSNS (3), /* Load_sign_extend. */
1315 COSTS_N_INSNS (3), /* Ldrd. */
1316 COSTS_N_INSNS (4), /* Ldm_1st. */
1317 1, /* Ldm_regs_per_insn_1st. */
1318 2, /* Ldm_regs_per_insn_subsequent. */
1319 COSTS_N_INSNS (4), /* Loadf. */
1320 COSTS_N_INSNS (4), /* Loadd. */
1321 0, /* Load_unaligned. */
1322 0, /* Store. */
1323 0, /* Strd. */
1324 COSTS_N_INSNS (1), /* Stm_1st. */
1325 1, /* Stm_regs_per_insn_1st. */
1326 2, /* Stm_regs_per_insn_subsequent. */
1327 0, /* Storef. */
1328 0, /* Stored. */
1329 0 /* Store_unaligned. */
1330 },
1331 {
1332 /* FP SFmode */
1333 {
1334 COSTS_N_INSNS (17), /* Div. */
1335 COSTS_N_INSNS (4), /* Mult. */
1336 COSTS_N_INSNS (8), /* Mult_addsub. */
1337 COSTS_N_INSNS (8), /* Fma. */
1338 COSTS_N_INSNS (4), /* Addsub. */
1339 COSTS_N_INSNS (2), /* Fpconst. */
1340 COSTS_N_INSNS (2), /* Neg. */
1341 COSTS_N_INSNS (5), /* Compare. */
1342 COSTS_N_INSNS (4), /* Widen. */
1343 COSTS_N_INSNS (4), /* Narrow. */
1344 COSTS_N_INSNS (4), /* Toint. */
1345 COSTS_N_INSNS (4), /* Fromint. */
1346 COSTS_N_INSNS (4) /* Roundint. */
1347 },
1348 /* FP DFmode */
1349 {
1350 COSTS_N_INSNS (31), /* Div. */
1351 COSTS_N_INSNS (4), /* Mult. */
1352 COSTS_N_INSNS (8), /* Mult_addsub. */
1353 COSTS_N_INSNS (8), /* Fma. */
1354 COSTS_N_INSNS (4), /* Addsub. */
1355 COSTS_N_INSNS (2), /* Fpconst. */
1356 COSTS_N_INSNS (2), /* Neg. */
1357 COSTS_N_INSNS (2), /* Compare. */
1358 COSTS_N_INSNS (4), /* Widen. */
1359 COSTS_N_INSNS (4), /* Narrow. */
1360 COSTS_N_INSNS (4), /* Toint. */
1361 COSTS_N_INSNS (4), /* Fromint. */
1362 COSTS_N_INSNS (4) /* Roundint. */
1363 }
1364 },
1365 /* Vector */
1366 {
1367 COSTS_N_INSNS (1) /* Alu. */
1368 }
1369 };
1370
1371 const struct cpu_cost_table v7m_extra_costs =
1372 {
1373 /* ALU */
1374 {
1375 0, /* Arith. */
1376 0, /* Logical. */
1377 0, /* Shift. */
1378 0, /* Shift_reg. */
1379 0, /* Arith_shift. */
1380 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1381 0, /* Log_shift. */
1382 COSTS_N_INSNS (1), /* Log_shift_reg. */
1383 0, /* Extend. */
1384 COSTS_N_INSNS (1), /* Extend_arith. */
1385 0, /* Bfi. */
1386 0, /* Bfx. */
1387 0, /* Clz. */
1388 COSTS_N_INSNS (1), /* non_exec. */
1389 false /* non_exec_costs_exec. */
1390 },
1391 {
1392 /* MULT SImode */
1393 {
1394 COSTS_N_INSNS (1), /* Simple. */
1395 COSTS_N_INSNS (1), /* Flag_setting. */
1396 COSTS_N_INSNS (2), /* Extend. */
1397 COSTS_N_INSNS (1), /* Add. */
1398 COSTS_N_INSNS (3), /* Extend_add. */
1399 COSTS_N_INSNS (8) /* Idiv. */
1400 },
1401 /* MULT DImode */
1402 {
1403 0, /* Simple (N/A). */
1404 0, /* Flag_setting (N/A). */
1405 COSTS_N_INSNS (2), /* Extend. */
1406 0, /* Add (N/A). */
1407 COSTS_N_INSNS (3), /* Extend_add. */
1408 0 /* Idiv (N/A). */
1409 }
1410 },
1411 /* LD/ST */
1412 {
1413 COSTS_N_INSNS (2), /* Load. */
1414 0, /* Load_sign_extend. */
1415 COSTS_N_INSNS (3), /* Ldrd. */
1416 COSTS_N_INSNS (2), /* Ldm_1st. */
1417 1, /* Ldm_regs_per_insn_1st. */
1418 1, /* Ldm_regs_per_insn_subsequent. */
1419 COSTS_N_INSNS (2), /* Loadf. */
1420 COSTS_N_INSNS (3), /* Loadd. */
1421 COSTS_N_INSNS (1), /* Load_unaligned. */
1422 COSTS_N_INSNS (2), /* Store. */
1423 COSTS_N_INSNS (3), /* Strd. */
1424 COSTS_N_INSNS (2), /* Stm_1st. */
1425 1, /* Stm_regs_per_insn_1st. */
1426 1, /* Stm_regs_per_insn_subsequent. */
1427 COSTS_N_INSNS (2), /* Storef. */
1428 COSTS_N_INSNS (3), /* Stored. */
1429 COSTS_N_INSNS (1) /* Store_unaligned. */
1430 },
1431 {
1432 /* FP SFmode */
1433 {
1434 COSTS_N_INSNS (7), /* Div. */
1435 COSTS_N_INSNS (2), /* Mult. */
1436 COSTS_N_INSNS (5), /* Mult_addsub. */
1437 COSTS_N_INSNS (3), /* Fma. */
1438 COSTS_N_INSNS (1), /* Addsub. */
1439 0, /* Fpconst. */
1440 0, /* Neg. */
1441 0, /* Compare. */
1442 0, /* Widen. */
1443 0, /* Narrow. */
1444 0, /* Toint. */
1445 0, /* Fromint. */
1446 0 /* Roundint. */
1447 },
1448 /* FP DFmode */
1449 {
1450 COSTS_N_INSNS (15), /* Div. */
1451 COSTS_N_INSNS (5), /* Mult. */
1452 COSTS_N_INSNS (7), /* Mult_addsub. */
1453 COSTS_N_INSNS (7), /* Fma. */
1454 COSTS_N_INSNS (3), /* Addsub. */
1455 0, /* Fpconst. */
1456 0, /* Neg. */
1457 0, /* Compare. */
1458 0, /* Widen. */
1459 0, /* Narrow. */
1460 0, /* Toint. */
1461 0, /* Fromint. */
1462 0 /* Roundint. */
1463 }
1464 },
1465 /* Vector */
1466 {
1467 COSTS_N_INSNS (1) /* Alu. */
1468 }
1469 };
1470
1471 const struct tune_params arm_slowmul_tune =
1472 {
1473 arm_slowmul_rtx_costs,
1474 NULL,
1475 NULL, /* Sched adj cost. */
1476 3, /* Constant limit. */
1477 5, /* Max cond insns. */
1478 ARM_PREFETCH_NOT_BENEFICIAL,
1479 true, /* Prefer constant pool. */
1480 arm_default_branch_cost,
1481 false, /* Prefer LDRD/STRD. */
1482 {true, true}, /* Prefer non short circuit. */
1483 &arm_default_vec_cost, /* Vectorizer costs. */
1484 false /* Prefer Neon for 64-bits bitops. */
1485 };
1486
1487 const struct tune_params arm_fastmul_tune =
1488 {
1489 arm_fastmul_rtx_costs,
1490 NULL,
1491 NULL, /* Sched adj cost. */
1492 1, /* Constant limit. */
1493 5, /* Max cond insns. */
1494 ARM_PREFETCH_NOT_BENEFICIAL,
1495 true, /* Prefer constant pool. */
1496 arm_default_branch_cost,
1497 false, /* Prefer LDRD/STRD. */
1498 {true, true}, /* Prefer non short circuit. */
1499 &arm_default_vec_cost, /* Vectorizer costs. */
1500 false /* Prefer Neon for 64-bits bitops. */
1501 };
1502
1503 /* StrongARM has early execution of branches, so a sequence that is worth
1504 skipping is shorter. Set max_insns_skipped to a lower value. */
1505
1506 const struct tune_params arm_strongarm_tune =
1507 {
1508 arm_fastmul_rtx_costs,
1509 NULL,
1510 NULL, /* Sched adj cost. */
1511 1, /* Constant limit. */
1512 3, /* Max cond insns. */
1513 ARM_PREFETCH_NOT_BENEFICIAL,
1514 true, /* Prefer constant pool. */
1515 arm_default_branch_cost,
1516 false, /* Prefer LDRD/STRD. */
1517 {true, true}, /* Prefer non short circuit. */
1518 &arm_default_vec_cost, /* Vectorizer costs. */
1519 false /* Prefer Neon for 64-bits bitops. */
1520 };
1521
1522 const struct tune_params arm_xscale_tune =
1523 {
1524 arm_xscale_rtx_costs,
1525 NULL,
1526 xscale_sched_adjust_cost,
1527 2, /* Constant limit. */
1528 3, /* Max cond insns. */
1529 ARM_PREFETCH_NOT_BENEFICIAL,
1530 true, /* Prefer constant pool. */
1531 arm_default_branch_cost,
1532 false, /* Prefer LDRD/STRD. */
1533 {true, true}, /* Prefer non short circuit. */
1534 &arm_default_vec_cost, /* Vectorizer costs. */
1535 false /* Prefer Neon for 64-bits bitops. */
1536 };
1537
1538 const struct tune_params arm_9e_tune =
1539 {
1540 arm_9e_rtx_costs,
1541 NULL,
1542 NULL, /* Sched adj cost. */
1543 1, /* Constant limit. */
1544 5, /* Max cond insns. */
1545 ARM_PREFETCH_NOT_BENEFICIAL,
1546 true, /* Prefer constant pool. */
1547 arm_default_branch_cost,
1548 false, /* Prefer LDRD/STRD. */
1549 {true, true}, /* Prefer non short circuit. */
1550 &arm_default_vec_cost, /* Vectorizer costs. */
1551 false /* Prefer Neon for 64-bits bitops. */
1552 };
1553
1554 const struct tune_params arm_v6t2_tune =
1555 {
1556 arm_9e_rtx_costs,
1557 NULL,
1558 NULL, /* Sched adj cost. */
1559 1, /* Constant limit. */
1560 5, /* Max cond insns. */
1561 ARM_PREFETCH_NOT_BENEFICIAL,
1562 false, /* Prefer constant pool. */
1563 arm_default_branch_cost,
1564 false, /* Prefer LDRD/STRD. */
1565 {true, true}, /* Prefer non short circuit. */
1566 &arm_default_vec_cost, /* Vectorizer costs. */
1567 false /* Prefer Neon for 64-bits bitops. */
1568 };
1569
1570 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1571 const struct tune_params arm_cortex_tune =
1572 {
1573 arm_9e_rtx_costs,
1574 &generic_extra_costs,
1575 NULL, /* Sched adj cost. */
1576 1, /* Constant limit. */
1577 5, /* Max cond insns. */
1578 ARM_PREFETCH_NOT_BENEFICIAL,
1579 false, /* Prefer constant pool. */
1580 arm_default_branch_cost,
1581 false, /* Prefer LDRD/STRD. */
1582 {true, true}, /* Prefer non short circuit. */
1583 &arm_default_vec_cost, /* Vectorizer costs. */
1584 false /* Prefer Neon for 64-bits bitops. */
1585 };
1586
1587 const struct tune_params arm_cortex_a7_tune =
1588 {
1589 arm_9e_rtx_costs,
1590 &cortexa7_extra_costs,
1591 NULL,
1592 1, /* Constant limit. */
1593 5, /* Max cond insns. */
1594 ARM_PREFETCH_NOT_BENEFICIAL,
1595 false, /* Prefer constant pool. */
1596 arm_default_branch_cost,
1597 false, /* Prefer LDRD/STRD. */
1598 {true, true}, /* Prefer non short circuit. */
1599 &arm_default_vec_cost, /* Vectorizer costs. */
1600 false /* Prefer Neon for 64-bits bitops. */
1601 };
1602
1603 const struct tune_params arm_cortex_a15_tune =
1604 {
1605 arm_9e_rtx_costs,
1606 &cortexa15_extra_costs,
1607 NULL, /* Sched adj cost. */
1608 1, /* Constant limit. */
1609 2, /* Max cond insns. */
1610 ARM_PREFETCH_NOT_BENEFICIAL,
1611 false, /* Prefer constant pool. */
1612 arm_default_branch_cost,
1613 true, /* Prefer LDRD/STRD. */
1614 {true, true}, /* Prefer non short circuit. */
1615 &arm_default_vec_cost, /* Vectorizer costs. */
1616 false /* Prefer Neon for 64-bits bitops. */
1617 };
1618
1619 const struct tune_params arm_cortex_a53_tune =
1620 {
1621 arm_9e_rtx_costs,
1622 &cortexa53_extra_costs,
1623 NULL, /* Scheduler cost adjustment. */
1624 1, /* Constant limit. */
1625 5, /* Max cond insns. */
1626 ARM_PREFETCH_NOT_BENEFICIAL,
1627 false, /* Prefer constant pool. */
1628 arm_default_branch_cost,
1629 false, /* Prefer LDRD/STRD. */
1630 {true, true}, /* Prefer non short circuit. */
1631 &arm_default_vec_cost, /* Vectorizer costs. */
1632 false /* Prefer Neon for 64-bits bitops. */
1633 };
1634
1635 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1636 less appealing. Set max_insns_skipped to a low value. */
1637
1638 const struct tune_params arm_cortex_a5_tune =
1639 {
1640 arm_9e_rtx_costs,
1641 NULL,
1642 NULL, /* Sched adj cost. */
1643 1, /* Constant limit. */
1644 1, /* Max cond insns. */
1645 ARM_PREFETCH_NOT_BENEFICIAL,
1646 false, /* Prefer constant pool. */
1647 arm_cortex_a5_branch_cost,
1648 false, /* Prefer LDRD/STRD. */
1649 {false, false}, /* Prefer non short circuit. */
1650 &arm_default_vec_cost, /* Vectorizer costs. */
1651 false /* Prefer Neon for 64-bits bitops. */
1652 };
1653
1654 const struct tune_params arm_cortex_a9_tune =
1655 {
1656 arm_9e_rtx_costs,
1657 &cortexa9_extra_costs,
1658 cortex_a9_sched_adjust_cost,
1659 1, /* Constant limit. */
1660 5, /* Max cond insns. */
1661 ARM_PREFETCH_BENEFICIAL(4,32,32),
1662 false, /* Prefer constant pool. */
1663 arm_default_branch_cost,
1664 false, /* Prefer LDRD/STRD. */
1665 {true, true}, /* Prefer non short circuit. */
1666 &arm_default_vec_cost, /* Vectorizer costs. */
1667 false /* Prefer Neon for 64-bits bitops. */
1668 };
1669
1670 const struct tune_params arm_cortex_a12_tune =
1671 {
1672 arm_9e_rtx_costs,
1673 &cortexa12_extra_costs,
1674 NULL,
1675 1, /* Constant limit. */
1676 5, /* Max cond insns. */
1677 ARM_PREFETCH_BENEFICIAL(4,32,32),
1678 false, /* Prefer constant pool. */
1679 arm_default_branch_cost,
1680 true, /* Prefer LDRD/STRD. */
1681 {true, true}, /* Prefer non short circuit. */
1682 &arm_default_vec_cost, /* Vectorizer costs. */
1683 false /* Prefer Neon for 64-bits bitops. */
1684 };
1685
1686 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1687 cycle to execute each. An LDR from the constant pool also takes two cycles
1688 to execute, but mildly increases pipelining opportunity (consecutive
1689 loads/stores can be pipelined together, saving one cycle), and may also
1690 improve icache utilisation. Hence we prefer the constant pool for such
1691 processors. */
1692
1693 const struct tune_params arm_v7m_tune =
1694 {
1695 arm_9e_rtx_costs,
1696 &v7m_extra_costs,
1697 NULL, /* Sched adj cost. */
1698 1, /* Constant limit. */
1699 5, /* Max cond insns. */
1700 ARM_PREFETCH_NOT_BENEFICIAL,
1701 true, /* Prefer constant pool. */
1702 arm_cortex_m_branch_cost,
1703 false, /* Prefer LDRD/STRD. */
1704 {false, false}, /* Prefer non short circuit. */
1705 &arm_default_vec_cost, /* Vectorizer costs. */
1706 false /* Prefer Neon for 64-bits bitops. */
1707 };
1708
1709 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1710 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1711 const struct tune_params arm_v6m_tune =
1712 {
1713 arm_9e_rtx_costs,
1714 NULL,
1715 NULL, /* Sched adj cost. */
1716 1, /* Constant limit. */
1717 5, /* Max cond insns. */
1718 ARM_PREFETCH_NOT_BENEFICIAL,
1719 false, /* Prefer constant pool. */
1720 arm_default_branch_cost,
1721 false, /* Prefer LDRD/STRD. */
1722 {false, false}, /* Prefer non short circuit. */
1723 &arm_default_vec_cost, /* Vectorizer costs. */
1724 false /* Prefer Neon for 64-bits bitops. */
1725 };
1726
1727 const struct tune_params arm_fa726te_tune =
1728 {
1729 arm_9e_rtx_costs,
1730 NULL,
1731 fa726te_sched_adjust_cost,
1732 1, /* Constant limit. */
1733 5, /* Max cond insns. */
1734 ARM_PREFETCH_NOT_BENEFICIAL,
1735 true, /* Prefer constant pool. */
1736 arm_default_branch_cost,
1737 false, /* Prefer LDRD/STRD. */
1738 {true, true}, /* Prefer non short circuit. */
1739 &arm_default_vec_cost, /* Vectorizer costs. */
1740 false /* Prefer Neon for 64-bits bitops. */
1741 };
1742
1743
1744 /* Not all of these give usefully different compilation alternatives,
1745 but there is no simple way of generalizing them. */
1746 static const struct processors all_cores[] =
1747 {
1748 /* ARM Cores */
1749 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
1750 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1751 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1752 #include "arm-cores.def"
1753 #undef ARM_CORE
1754 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1755 };
1756
1757 static const struct processors all_architectures[] =
1758 {
1759 /* ARM Architectures */
1760 /* We don't specify tuning costs here as it will be figured out
1761 from the core. */
1762
1763 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1764 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1765 #include "arm-arches.def"
1766 #undef ARM_ARCH
1767 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1768 };
1769
1770
1771 /* These are populated as commandline arguments are processed, or NULL
1772 if not specified. */
1773 static const struct processors *arm_selected_arch;
1774 static const struct processors *arm_selected_cpu;
1775 static const struct processors *arm_selected_tune;
1776
1777 /* The name of the preprocessor macro to define for this architecture. */
1778
1779 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1780
1781 /* Available values for -mfpu=. */
1782
1783 static const struct arm_fpu_desc all_fpus[] =
1784 {
1785 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1786 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1787 #include "arm-fpus.def"
1788 #undef ARM_FPU
1789 };
1790
1791
1792 /* Supported TLS relocations. */
1793
1794 enum tls_reloc {
1795 TLS_GD32,
1796 TLS_LDM32,
1797 TLS_LDO32,
1798 TLS_IE32,
1799 TLS_LE32,
1800 TLS_DESCSEQ /* GNU scheme */
1801 };
1802
1803 /* The maximum number of insns to be used when loading a constant. */
1804 inline static int
1805 arm_constant_limit (bool size_p)
1806 {
1807 return size_p ? 1 : current_tune->constant_limit;
1808 }
1809
1810 /* Emit an insn that's a simple single-set. Both the operands must be known
1811 to be valid. */
1812 inline static rtx
1813 emit_set_insn (rtx x, rtx y)
1814 {
1815 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1816 }
1817
1818 /* Return the number of bits set in VALUE. */
1819 static unsigned
1820 bit_count (unsigned long value)
1821 {
1822 unsigned long count = 0;
1823
1824 while (value)
1825 {
1826 count++;
1827 value &= value - 1; /* Clear the least-significant set bit. */
1828 }
1829
1830 return count;
1831 }
1832
1833 typedef struct
1834 {
1835 enum machine_mode mode;
1836 const char *name;
1837 } arm_fixed_mode_set;
1838
1839 /* A small helper for setting fixed-point library libfuncs. */
1840
1841 static void
1842 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1843 const char *funcname, const char *modename,
1844 int num_suffix)
1845 {
1846 char buffer[50];
1847
1848 if (num_suffix == 0)
1849 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1850 else
1851 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1852
1853 set_optab_libfunc (optable, mode, buffer);
1854 }
1855
1856 static void
1857 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1858 enum machine_mode from, const char *funcname,
1859 const char *toname, const char *fromname)
1860 {
1861 char buffer[50];
1862 const char *maybe_suffix_2 = "";
1863
1864 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1865 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1866 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1867 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1868 maybe_suffix_2 = "2";
1869
1870 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1871 maybe_suffix_2);
1872
1873 set_conv_libfunc (optable, to, from, buffer);
1874 }
1875
1876 /* Set up library functions unique to ARM. */
1877
1878 static void
1879 arm_init_libfuncs (void)
1880 {
1881 /* For Linux, we have access to kernel support for atomic operations. */
1882 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1883 init_sync_libfuncs (2 * UNITS_PER_WORD);
1884
1885 /* There are no special library functions unless we are using the
1886 ARM BPABI. */
1887 if (!TARGET_BPABI)
1888 return;
1889
1890 /* The functions below are described in Section 4 of the "Run-Time
1891 ABI for the ARM architecture", Version 1.0. */
1892
1893 /* Double-precision floating-point arithmetic. Table 2. */
1894 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1895 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1896 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1897 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1898 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1899
1900 /* Double-precision comparisons. Table 3. */
1901 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1902 set_optab_libfunc (ne_optab, DFmode, NULL);
1903 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1904 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1905 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1906 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1907 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1908
1909 /* Single-precision floating-point arithmetic. Table 4. */
1910 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1911 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1912 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1913 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1914 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1915
1916 /* Single-precision comparisons. Table 5. */
1917 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1918 set_optab_libfunc (ne_optab, SFmode, NULL);
1919 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1920 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1921 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1922 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1923 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1924
1925 /* Floating-point to integer conversions. Table 6. */
1926 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1927 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1928 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1929 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1930 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1931 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1932 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1933 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1934
1935 /* Conversions between floating types. Table 7. */
1936 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1937 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1938
1939 /* Integer to floating-point conversions. Table 8. */
1940 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1941 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1942 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1943 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1944 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1945 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1946 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1947 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1948
1949 /* Long long. Table 9. */
1950 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1951 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1952 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1953 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1954 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1955 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1956 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1957 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1958
1959 /* Integer (32/32->32) division. \S 4.3.1. */
1960 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1961 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1962
1963 /* The divmod functions are designed so that they can be used for
1964 plain division, even though they return both the quotient and the
1965 remainder. The quotient is returned in the usual location (i.e.,
1966 r0 for SImode, {r0, r1} for DImode), just as would be expected
1967 for an ordinary division routine. Because the AAPCS calling
1968 conventions specify that all of { r0, r1, r2, r3 } are
1969 callee-saved registers, there is no need to tell the compiler
1970 explicitly that those registers are clobbered by these
1971 routines. */
1972 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1973 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1974
1975 /* For SImode division the ABI provides div-without-mod routines,
1976 which are faster. */
1977 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1978 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1979
1980 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1981 divmod libcalls instead. */
1982 set_optab_libfunc (smod_optab, DImode, NULL);
1983 set_optab_libfunc (umod_optab, DImode, NULL);
1984 set_optab_libfunc (smod_optab, SImode, NULL);
1985 set_optab_libfunc (umod_optab, SImode, NULL);
1986
1987 /* Half-precision float operations. The compiler handles all operations
1988 with NULL libfuncs by converting the SFmode. */
1989 switch (arm_fp16_format)
1990 {
1991 case ARM_FP16_FORMAT_IEEE:
1992 case ARM_FP16_FORMAT_ALTERNATIVE:
1993
1994 /* Conversions. */
1995 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1996 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1997 ? "__gnu_f2h_ieee"
1998 : "__gnu_f2h_alternative"));
1999 set_conv_libfunc (sext_optab, SFmode, HFmode,
2000 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2001 ? "__gnu_h2f_ieee"
2002 : "__gnu_h2f_alternative"));
2003
2004 /* Arithmetic. */
2005 set_optab_libfunc (add_optab, HFmode, NULL);
2006 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2007 set_optab_libfunc (smul_optab, HFmode, NULL);
2008 set_optab_libfunc (neg_optab, HFmode, NULL);
2009 set_optab_libfunc (sub_optab, HFmode, NULL);
2010
2011 /* Comparisons. */
2012 set_optab_libfunc (eq_optab, HFmode, NULL);
2013 set_optab_libfunc (ne_optab, HFmode, NULL);
2014 set_optab_libfunc (lt_optab, HFmode, NULL);
2015 set_optab_libfunc (le_optab, HFmode, NULL);
2016 set_optab_libfunc (ge_optab, HFmode, NULL);
2017 set_optab_libfunc (gt_optab, HFmode, NULL);
2018 set_optab_libfunc (unord_optab, HFmode, NULL);
2019 break;
2020
2021 default:
2022 break;
2023 }
2024
2025 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2026 {
2027 const arm_fixed_mode_set fixed_arith_modes[] =
2028 {
2029 { QQmode, "qq" },
2030 { UQQmode, "uqq" },
2031 { HQmode, "hq" },
2032 { UHQmode, "uhq" },
2033 { SQmode, "sq" },
2034 { USQmode, "usq" },
2035 { DQmode, "dq" },
2036 { UDQmode, "udq" },
2037 { TQmode, "tq" },
2038 { UTQmode, "utq" },
2039 { HAmode, "ha" },
2040 { UHAmode, "uha" },
2041 { SAmode, "sa" },
2042 { USAmode, "usa" },
2043 { DAmode, "da" },
2044 { UDAmode, "uda" },
2045 { TAmode, "ta" },
2046 { UTAmode, "uta" }
2047 };
2048 const arm_fixed_mode_set fixed_conv_modes[] =
2049 {
2050 { QQmode, "qq" },
2051 { UQQmode, "uqq" },
2052 { HQmode, "hq" },
2053 { UHQmode, "uhq" },
2054 { SQmode, "sq" },
2055 { USQmode, "usq" },
2056 { DQmode, "dq" },
2057 { UDQmode, "udq" },
2058 { TQmode, "tq" },
2059 { UTQmode, "utq" },
2060 { HAmode, "ha" },
2061 { UHAmode, "uha" },
2062 { SAmode, "sa" },
2063 { USAmode, "usa" },
2064 { DAmode, "da" },
2065 { UDAmode, "uda" },
2066 { TAmode, "ta" },
2067 { UTAmode, "uta" },
2068 { QImode, "qi" },
2069 { HImode, "hi" },
2070 { SImode, "si" },
2071 { DImode, "di" },
2072 { TImode, "ti" },
2073 { SFmode, "sf" },
2074 { DFmode, "df" }
2075 };
2076 unsigned int i, j;
2077
2078 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2079 {
2080 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2081 "add", fixed_arith_modes[i].name, 3);
2082 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2083 "ssadd", fixed_arith_modes[i].name, 3);
2084 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2085 "usadd", fixed_arith_modes[i].name, 3);
2086 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2087 "sub", fixed_arith_modes[i].name, 3);
2088 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2089 "sssub", fixed_arith_modes[i].name, 3);
2090 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2091 "ussub", fixed_arith_modes[i].name, 3);
2092 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2093 "mul", fixed_arith_modes[i].name, 3);
2094 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2095 "ssmul", fixed_arith_modes[i].name, 3);
2096 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2097 "usmul", fixed_arith_modes[i].name, 3);
2098 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2099 "div", fixed_arith_modes[i].name, 3);
2100 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2101 "udiv", fixed_arith_modes[i].name, 3);
2102 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2103 "ssdiv", fixed_arith_modes[i].name, 3);
2104 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2105 "usdiv", fixed_arith_modes[i].name, 3);
2106 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2107 "neg", fixed_arith_modes[i].name, 2);
2108 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2109 "ssneg", fixed_arith_modes[i].name, 2);
2110 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2111 "usneg", fixed_arith_modes[i].name, 2);
2112 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2113 "ashl", fixed_arith_modes[i].name, 3);
2114 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2115 "ashr", fixed_arith_modes[i].name, 3);
2116 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2117 "lshr", fixed_arith_modes[i].name, 3);
2118 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2119 "ssashl", fixed_arith_modes[i].name, 3);
2120 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2121 "usashl", fixed_arith_modes[i].name, 3);
2122 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2123 "cmp", fixed_arith_modes[i].name, 2);
2124 }
2125
2126 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2127 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2128 {
2129 if (i == j
2130 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2131 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2132 continue;
2133
2134 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2135 fixed_conv_modes[j].mode, "fract",
2136 fixed_conv_modes[i].name,
2137 fixed_conv_modes[j].name);
2138 arm_set_fixed_conv_libfunc (satfract_optab,
2139 fixed_conv_modes[i].mode,
2140 fixed_conv_modes[j].mode, "satfract",
2141 fixed_conv_modes[i].name,
2142 fixed_conv_modes[j].name);
2143 arm_set_fixed_conv_libfunc (fractuns_optab,
2144 fixed_conv_modes[i].mode,
2145 fixed_conv_modes[j].mode, "fractuns",
2146 fixed_conv_modes[i].name,
2147 fixed_conv_modes[j].name);
2148 arm_set_fixed_conv_libfunc (satfractuns_optab,
2149 fixed_conv_modes[i].mode,
2150 fixed_conv_modes[j].mode, "satfractuns",
2151 fixed_conv_modes[i].name,
2152 fixed_conv_modes[j].name);
2153 }
2154 }
2155
2156 if (TARGET_AAPCS_BASED)
2157 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2158 }
2159
2160 /* On AAPCS systems, this is the "struct __va_list". */
2161 static GTY(()) tree va_list_type;
2162
2163 /* Return the type to use as __builtin_va_list. */
2164 static tree
2165 arm_build_builtin_va_list (void)
2166 {
2167 tree va_list_name;
2168 tree ap_field;
2169
2170 if (!TARGET_AAPCS_BASED)
2171 return std_build_builtin_va_list ();
2172
2173 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2174 defined as:
2175
2176 struct __va_list
2177 {
2178 void *__ap;
2179 };
2180
2181 The C Library ABI further reinforces this definition in \S
2182 4.1.
2183
2184 We must follow this definition exactly. The structure tag
2185 name is visible in C++ mangled names, and thus forms a part
2186 of the ABI. The field name may be used by people who
2187 #include <stdarg.h>. */
2188 /* Create the type. */
2189 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2190 /* Give it the required name. */
2191 va_list_name = build_decl (BUILTINS_LOCATION,
2192 TYPE_DECL,
2193 get_identifier ("__va_list"),
2194 va_list_type);
2195 DECL_ARTIFICIAL (va_list_name) = 1;
2196 TYPE_NAME (va_list_type) = va_list_name;
2197 TYPE_STUB_DECL (va_list_type) = va_list_name;
2198 /* Create the __ap field. */
2199 ap_field = build_decl (BUILTINS_LOCATION,
2200 FIELD_DECL,
2201 get_identifier ("__ap"),
2202 ptr_type_node);
2203 DECL_ARTIFICIAL (ap_field) = 1;
2204 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2205 TYPE_FIELDS (va_list_type) = ap_field;
2206 /* Compute its layout. */
2207 layout_type (va_list_type);
2208
2209 return va_list_type;
2210 }
2211
2212 /* Return an expression of type "void *" pointing to the next
2213 available argument in a variable-argument list. VALIST is the
2214 user-level va_list object, of type __builtin_va_list. */
2215 static tree
2216 arm_extract_valist_ptr (tree valist)
2217 {
2218 if (TREE_TYPE (valist) == error_mark_node)
2219 return error_mark_node;
2220
2221 /* On an AAPCS target, the pointer is stored within "struct
2222 va_list". */
2223 if (TARGET_AAPCS_BASED)
2224 {
2225 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2226 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2227 valist, ap_field, NULL_TREE);
2228 }
2229
2230 return valist;
2231 }
2232
2233 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2234 static void
2235 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2236 {
2237 valist = arm_extract_valist_ptr (valist);
2238 std_expand_builtin_va_start (valist, nextarg);
2239 }
2240
2241 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2242 static tree
2243 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2244 gimple_seq *post_p)
2245 {
2246 valist = arm_extract_valist_ptr (valist);
2247 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2248 }
2249
2250 /* Fix up any incompatible options that the user has specified. */
2251 static void
2252 arm_option_override (void)
2253 {
2254 if (global_options_set.x_arm_arch_option)
2255 arm_selected_arch = &all_architectures[arm_arch_option];
2256
2257 if (global_options_set.x_arm_cpu_option)
2258 {
2259 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2260 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2261 }
2262
2263 if (global_options_set.x_arm_tune_option)
2264 arm_selected_tune = &all_cores[(int) arm_tune_option];
2265
2266 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2267 SUBTARGET_OVERRIDE_OPTIONS;
2268 #endif
2269
2270 if (arm_selected_arch)
2271 {
2272 if (arm_selected_cpu)
2273 {
2274 /* Check for conflict between mcpu and march. */
2275 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2276 {
2277 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2278 arm_selected_cpu->name, arm_selected_arch->name);
2279 /* -march wins for code generation.
2280 -mcpu wins for default tuning. */
2281 if (!arm_selected_tune)
2282 arm_selected_tune = arm_selected_cpu;
2283
2284 arm_selected_cpu = arm_selected_arch;
2285 }
2286 else
2287 /* -mcpu wins. */
2288 arm_selected_arch = NULL;
2289 }
2290 else
2291 /* Pick a CPU based on the architecture. */
2292 arm_selected_cpu = arm_selected_arch;
2293 }
2294
2295 /* If the user did not specify a processor, choose one for them. */
2296 if (!arm_selected_cpu)
2297 {
2298 const struct processors * sel;
2299 unsigned int sought;
2300
2301 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2302 if (!arm_selected_cpu->name)
2303 {
2304 #ifdef SUBTARGET_CPU_DEFAULT
2305 /* Use the subtarget default CPU if none was specified by
2306 configure. */
2307 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2308 #endif
2309 /* Default to ARM6. */
2310 if (!arm_selected_cpu->name)
2311 arm_selected_cpu = &all_cores[arm6];
2312 }
2313
2314 sel = arm_selected_cpu;
2315 insn_flags = sel->flags;
2316
2317 /* Now check to see if the user has specified some command line
2318 switch that require certain abilities from the cpu. */
2319 sought = 0;
2320
2321 if (TARGET_INTERWORK || TARGET_THUMB)
2322 {
2323 sought |= (FL_THUMB | FL_MODE32);
2324
2325 /* There are no ARM processors that support both APCS-26 and
2326 interworking. Therefore we force FL_MODE26 to be removed
2327 from insn_flags here (if it was set), so that the search
2328 below will always be able to find a compatible processor. */
2329 insn_flags &= ~FL_MODE26;
2330 }
2331
2332 if (sought != 0 && ((sought & insn_flags) != sought))
2333 {
2334 /* Try to locate a CPU type that supports all of the abilities
2335 of the default CPU, plus the extra abilities requested by
2336 the user. */
2337 for (sel = all_cores; sel->name != NULL; sel++)
2338 if ((sel->flags & sought) == (sought | insn_flags))
2339 break;
2340
2341 if (sel->name == NULL)
2342 {
2343 unsigned current_bit_count = 0;
2344 const struct processors * best_fit = NULL;
2345
2346 /* Ideally we would like to issue an error message here
2347 saying that it was not possible to find a CPU compatible
2348 with the default CPU, but which also supports the command
2349 line options specified by the programmer, and so they
2350 ought to use the -mcpu=<name> command line option to
2351 override the default CPU type.
2352
2353 If we cannot find a cpu that has both the
2354 characteristics of the default cpu and the given
2355 command line options we scan the array again looking
2356 for a best match. */
2357 for (sel = all_cores; sel->name != NULL; sel++)
2358 if ((sel->flags & sought) == sought)
2359 {
2360 unsigned count;
2361
2362 count = bit_count (sel->flags & insn_flags);
2363
2364 if (count >= current_bit_count)
2365 {
2366 best_fit = sel;
2367 current_bit_count = count;
2368 }
2369 }
2370
2371 gcc_assert (best_fit);
2372 sel = best_fit;
2373 }
2374
2375 arm_selected_cpu = sel;
2376 }
2377 }
2378
2379 gcc_assert (arm_selected_cpu);
2380 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2381 if (!arm_selected_tune)
2382 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2383
2384 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2385 insn_flags = arm_selected_cpu->flags;
2386 arm_base_arch = arm_selected_cpu->base_arch;
2387
2388 arm_tune = arm_selected_tune->core;
2389 tune_flags = arm_selected_tune->flags;
2390 current_tune = arm_selected_tune->tune;
2391
2392 /* Make sure that the processor choice does not conflict with any of the
2393 other command line choices. */
2394 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2395 error ("target CPU does not support ARM mode");
2396
2397 /* BPABI targets use linker tricks to allow interworking on cores
2398 without thumb support. */
2399 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2400 {
2401 warning (0, "target CPU does not support interworking" );
2402 target_flags &= ~MASK_INTERWORK;
2403 }
2404
2405 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2406 {
2407 warning (0, "target CPU does not support THUMB instructions");
2408 target_flags &= ~MASK_THUMB;
2409 }
2410
2411 if (TARGET_APCS_FRAME && TARGET_THUMB)
2412 {
2413 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2414 target_flags &= ~MASK_APCS_FRAME;
2415 }
2416
2417 /* Callee super interworking implies thumb interworking. Adding
2418 this to the flags here simplifies the logic elsewhere. */
2419 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2420 target_flags |= MASK_INTERWORK;
2421
2422 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2423 from here where no function is being compiled currently. */
2424 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2425 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2426
2427 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2428 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2429
2430 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2431 {
2432 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2433 target_flags |= MASK_APCS_FRAME;
2434 }
2435
2436 if (TARGET_POKE_FUNCTION_NAME)
2437 target_flags |= MASK_APCS_FRAME;
2438
2439 if (TARGET_APCS_REENT && flag_pic)
2440 error ("-fpic and -mapcs-reent are incompatible");
2441
2442 if (TARGET_APCS_REENT)
2443 warning (0, "APCS reentrant code not supported. Ignored");
2444
2445 /* If this target is normally configured to use APCS frames, warn if they
2446 are turned off and debugging is turned on. */
2447 if (TARGET_ARM
2448 && write_symbols != NO_DEBUG
2449 && !TARGET_APCS_FRAME
2450 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2451 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2452
2453 if (TARGET_APCS_FLOAT)
2454 warning (0, "passing floating point arguments in fp regs not yet supported");
2455
2456 if (TARGET_LITTLE_WORDS)
2457 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
2458 "will be removed in a future release");
2459
2460 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2461 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2462 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2463 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2464 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2465 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2466 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2467 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2468 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2469 arm_arch6m = arm_arch6 && !arm_arch_notm;
2470 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2471 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2472 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2473 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2474 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2475
2476 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2477 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2478 thumb_code = TARGET_ARM == 0;
2479 thumb1_code = TARGET_THUMB1 != 0;
2480 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2481 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2482 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2483 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2484 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2485 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2486 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2487 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2488 if (arm_restrict_it == 2)
2489 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2490
2491 if (!TARGET_THUMB2)
2492 arm_restrict_it = 0;
2493
2494 /* If we are not using the default (ARM mode) section anchor offset
2495 ranges, then set the correct ranges now. */
2496 if (TARGET_THUMB1)
2497 {
2498 /* Thumb-1 LDR instructions cannot have negative offsets.
2499 Permissible positive offset ranges are 5-bit (for byte loads),
2500 6-bit (for halfword loads), or 7-bit (for word loads).
2501 Empirical results suggest a 7-bit anchor range gives the best
2502 overall code size. */
2503 targetm.min_anchor_offset = 0;
2504 targetm.max_anchor_offset = 127;
2505 }
2506 else if (TARGET_THUMB2)
2507 {
2508 /* The minimum is set such that the total size of the block
2509 for a particular anchor is 248 + 1 + 4095 bytes, which is
2510 divisible by eight, ensuring natural spacing of anchors. */
2511 targetm.min_anchor_offset = -248;
2512 targetm.max_anchor_offset = 4095;
2513 }
2514
2515 /* V5 code we generate is completely interworking capable, so we turn off
2516 TARGET_INTERWORK here to avoid many tests later on. */
2517
2518 /* XXX However, we must pass the right pre-processor defines to CPP
2519 or GLD can get confused. This is a hack. */
2520 if (TARGET_INTERWORK)
2521 arm_cpp_interwork = 1;
2522
2523 if (arm_arch5)
2524 target_flags &= ~MASK_INTERWORK;
2525
2526 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2527 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2528
2529 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2530 error ("iwmmxt abi requires an iwmmxt capable cpu");
2531
2532 if (!global_options_set.x_arm_fpu_index)
2533 {
2534 const char *target_fpu_name;
2535 bool ok;
2536
2537 #ifdef FPUTYPE_DEFAULT
2538 target_fpu_name = FPUTYPE_DEFAULT;
2539 #else
2540 target_fpu_name = "vfp";
2541 #endif
2542
2543 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2544 CL_TARGET);
2545 gcc_assert (ok);
2546 }
2547
2548 arm_fpu_desc = &all_fpus[arm_fpu_index];
2549
2550 switch (arm_fpu_desc->model)
2551 {
2552 case ARM_FP_MODEL_VFP:
2553 arm_fpu_attr = FPU_VFP;
2554 break;
2555
2556 default:
2557 gcc_unreachable();
2558 }
2559
2560 if (TARGET_AAPCS_BASED)
2561 {
2562 if (TARGET_CALLER_INTERWORKING)
2563 error ("AAPCS does not support -mcaller-super-interworking");
2564 else
2565 if (TARGET_CALLEE_INTERWORKING)
2566 error ("AAPCS does not support -mcallee-super-interworking");
2567 }
2568
2569 /* iWMMXt and NEON are incompatible. */
2570 if (TARGET_IWMMXT && TARGET_NEON)
2571 error ("iWMMXt and NEON are incompatible");
2572
2573 /* iWMMXt unsupported under Thumb mode. */
2574 if (TARGET_THUMB && TARGET_IWMMXT)
2575 error ("iWMMXt unsupported under Thumb mode");
2576
2577 /* __fp16 support currently assumes the core has ldrh. */
2578 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2579 sorry ("__fp16 and no ldrh");
2580
2581 /* If soft-float is specified then don't use FPU. */
2582 if (TARGET_SOFT_FLOAT)
2583 arm_fpu_attr = FPU_NONE;
2584
2585 if (TARGET_AAPCS_BASED)
2586 {
2587 if (arm_abi == ARM_ABI_IWMMXT)
2588 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2589 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2590 && TARGET_HARD_FLOAT
2591 && TARGET_VFP)
2592 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2593 else
2594 arm_pcs_default = ARM_PCS_AAPCS;
2595 }
2596 else
2597 {
2598 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2599 sorry ("-mfloat-abi=hard and VFP");
2600
2601 if (arm_abi == ARM_ABI_APCS)
2602 arm_pcs_default = ARM_PCS_APCS;
2603 else
2604 arm_pcs_default = ARM_PCS_ATPCS;
2605 }
2606
2607 /* For arm2/3 there is no need to do any scheduling if we are doing
2608 software floating-point. */
2609 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2610 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2611
2612 /* Use the cp15 method if it is available. */
2613 if (target_thread_pointer == TP_AUTO)
2614 {
2615 if (arm_arch6k && !TARGET_THUMB1)
2616 target_thread_pointer = TP_CP15;
2617 else
2618 target_thread_pointer = TP_SOFT;
2619 }
2620
2621 if (TARGET_HARD_TP && TARGET_THUMB1)
2622 error ("can not use -mtp=cp15 with 16-bit Thumb");
2623
2624 /* Override the default structure alignment for AAPCS ABI. */
2625 if (!global_options_set.x_arm_structure_size_boundary)
2626 {
2627 if (TARGET_AAPCS_BASED)
2628 arm_structure_size_boundary = 8;
2629 }
2630 else
2631 {
2632 if (arm_structure_size_boundary != 8
2633 && arm_structure_size_boundary != 32
2634 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2635 {
2636 if (ARM_DOUBLEWORD_ALIGN)
2637 warning (0,
2638 "structure size boundary can only be set to 8, 32 or 64");
2639 else
2640 warning (0, "structure size boundary can only be set to 8 or 32");
2641 arm_structure_size_boundary
2642 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2643 }
2644 }
2645
2646 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2647 {
2648 error ("RTP PIC is incompatible with Thumb");
2649 flag_pic = 0;
2650 }
2651
2652 /* If stack checking is disabled, we can use r10 as the PIC register,
2653 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2654 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2655 {
2656 if (TARGET_VXWORKS_RTP)
2657 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2658 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2659 }
2660
2661 if (flag_pic && TARGET_VXWORKS_RTP)
2662 arm_pic_register = 9;
2663
2664 if (arm_pic_register_string != NULL)
2665 {
2666 int pic_register = decode_reg_name (arm_pic_register_string);
2667
2668 if (!flag_pic)
2669 warning (0, "-mpic-register= is useless without -fpic");
2670
2671 /* Prevent the user from choosing an obviously stupid PIC register. */
2672 else if (pic_register < 0 || call_used_regs[pic_register]
2673 || pic_register == HARD_FRAME_POINTER_REGNUM
2674 || pic_register == STACK_POINTER_REGNUM
2675 || pic_register >= PC_REGNUM
2676 || (TARGET_VXWORKS_RTP
2677 && (unsigned int) pic_register != arm_pic_register))
2678 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2679 else
2680 arm_pic_register = pic_register;
2681 }
2682
2683 if (TARGET_VXWORKS_RTP
2684 && !global_options_set.x_arm_pic_data_is_text_relative)
2685 arm_pic_data_is_text_relative = 0;
2686
2687 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2688 if (fix_cm3_ldrd == 2)
2689 {
2690 if (arm_selected_cpu->core == cortexm3)
2691 fix_cm3_ldrd = 1;
2692 else
2693 fix_cm3_ldrd = 0;
2694 }
2695
2696 /* Enable -munaligned-access by default for
2697 - all ARMv6 architecture-based processors
2698 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2699 - ARMv8 architecture-base processors.
2700
2701 Disable -munaligned-access by default for
2702 - all pre-ARMv6 architecture-based processors
2703 - ARMv6-M architecture-based processors. */
2704
2705 if (unaligned_access == 2)
2706 {
2707 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2708 unaligned_access = 1;
2709 else
2710 unaligned_access = 0;
2711 }
2712 else if (unaligned_access == 1
2713 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2714 {
2715 warning (0, "target CPU does not support unaligned accesses");
2716 unaligned_access = 0;
2717 }
2718
2719 if (TARGET_THUMB1 && flag_schedule_insns)
2720 {
2721 /* Don't warn since it's on by default in -O2. */
2722 flag_schedule_insns = 0;
2723 }
2724
2725 if (optimize_size)
2726 {
2727 /* If optimizing for size, bump the number of instructions that we
2728 are prepared to conditionally execute (even on a StrongARM). */
2729 max_insns_skipped = 6;
2730 }
2731 else
2732 max_insns_skipped = current_tune->max_insns_skipped;
2733
2734 /* Hot/Cold partitioning is not currently supported, since we can't
2735 handle literal pool placement in that case. */
2736 if (flag_reorder_blocks_and_partition)
2737 {
2738 inform (input_location,
2739 "-freorder-blocks-and-partition not supported on this architecture");
2740 flag_reorder_blocks_and_partition = 0;
2741 flag_reorder_blocks = 1;
2742 }
2743
2744 if (flag_pic)
2745 /* Hoisting PIC address calculations more aggressively provides a small,
2746 but measurable, size reduction for PIC code. Therefore, we decrease
2747 the bar for unrestricted expression hoisting to the cost of PIC address
2748 calculation, which is 2 instructions. */
2749 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2750 global_options.x_param_values,
2751 global_options_set.x_param_values);
2752
2753 /* ARM EABI defaults to strict volatile bitfields. */
2754 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2755 && abi_version_at_least(2))
2756 flag_strict_volatile_bitfields = 1;
2757
2758 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2759 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2760 if (flag_prefetch_loop_arrays < 0
2761 && HAVE_prefetch
2762 && optimize >= 3
2763 && current_tune->num_prefetch_slots > 0)
2764 flag_prefetch_loop_arrays = 1;
2765
2766 /* Set up parameters to be used in prefetching algorithm. Do not override the
2767 defaults unless we are tuning for a core we have researched values for. */
2768 if (current_tune->num_prefetch_slots > 0)
2769 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2770 current_tune->num_prefetch_slots,
2771 global_options.x_param_values,
2772 global_options_set.x_param_values);
2773 if (current_tune->l1_cache_line_size >= 0)
2774 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2775 current_tune->l1_cache_line_size,
2776 global_options.x_param_values,
2777 global_options_set.x_param_values);
2778 if (current_tune->l1_cache_size >= 0)
2779 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2780 current_tune->l1_cache_size,
2781 global_options.x_param_values,
2782 global_options_set.x_param_values);
2783
2784 /* Use Neon to perform 64-bits operations rather than core
2785 registers. */
2786 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
2787 if (use_neon_for_64bits == 1)
2788 prefer_neon_for_64bits = true;
2789
2790 /* Use the alternative scheduling-pressure algorithm by default. */
2791 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
2792 global_options.x_param_values,
2793 global_options_set.x_param_values);
2794
2795 /* Disable shrink-wrap when optimizing function for size, since it tends to
2796 generate additional returns. */
2797 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
2798 flag_shrink_wrap = false;
2799 /* TBD: Dwarf info for apcs frame is not handled yet. */
2800 if (TARGET_APCS_FRAME)
2801 flag_shrink_wrap = false;
2802
2803 /* We only support -mslow-flash-data on armv7-m targets. */
2804 if (target_slow_flash_data
2805 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2806 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
2807 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2808
2809 /* Currently, for slow flash data, we just disable literal pools. */
2810 if (target_slow_flash_data)
2811 arm_disable_literal_pool = true;
2812
2813 /* Register global variables with the garbage collector. */
2814 arm_add_gc_roots ();
2815 }
2816
2817 static void
2818 arm_add_gc_roots (void)
2819 {
2820 gcc_obstack_init(&minipool_obstack);
2821 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2822 }
2823 \f
2824 /* A table of known ARM exception types.
2825 For use with the interrupt function attribute. */
2826
2827 typedef struct
2828 {
2829 const char *const arg;
2830 const unsigned long return_value;
2831 }
2832 isr_attribute_arg;
2833
2834 static const isr_attribute_arg isr_attribute_args [] =
2835 {
2836 { "IRQ", ARM_FT_ISR },
2837 { "irq", ARM_FT_ISR },
2838 { "FIQ", ARM_FT_FIQ },
2839 { "fiq", ARM_FT_FIQ },
2840 { "ABORT", ARM_FT_ISR },
2841 { "abort", ARM_FT_ISR },
2842 { "ABORT", ARM_FT_ISR },
2843 { "abort", ARM_FT_ISR },
2844 { "UNDEF", ARM_FT_EXCEPTION },
2845 { "undef", ARM_FT_EXCEPTION },
2846 { "SWI", ARM_FT_EXCEPTION },
2847 { "swi", ARM_FT_EXCEPTION },
2848 { NULL, ARM_FT_NORMAL }
2849 };
2850
2851 /* Returns the (interrupt) function type of the current
2852 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2853
2854 static unsigned long
2855 arm_isr_value (tree argument)
2856 {
2857 const isr_attribute_arg * ptr;
2858 const char * arg;
2859
2860 if (!arm_arch_notm)
2861 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2862
2863 /* No argument - default to IRQ. */
2864 if (argument == NULL_TREE)
2865 return ARM_FT_ISR;
2866
2867 /* Get the value of the argument. */
2868 if (TREE_VALUE (argument) == NULL_TREE
2869 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2870 return ARM_FT_UNKNOWN;
2871
2872 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2873
2874 /* Check it against the list of known arguments. */
2875 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2876 if (streq (arg, ptr->arg))
2877 return ptr->return_value;
2878
2879 /* An unrecognized interrupt type. */
2880 return ARM_FT_UNKNOWN;
2881 }
2882
2883 /* Computes the type of the current function. */
2884
2885 static unsigned long
2886 arm_compute_func_type (void)
2887 {
2888 unsigned long type = ARM_FT_UNKNOWN;
2889 tree a;
2890 tree attr;
2891
2892 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2893
2894 /* Decide if the current function is volatile. Such functions
2895 never return, and many memory cycles can be saved by not storing
2896 register values that will never be needed again. This optimization
2897 was added to speed up context switching in a kernel application. */
2898 if (optimize > 0
2899 && (TREE_NOTHROW (current_function_decl)
2900 || !(flag_unwind_tables
2901 || (flag_exceptions
2902 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2903 && TREE_THIS_VOLATILE (current_function_decl))
2904 type |= ARM_FT_VOLATILE;
2905
2906 if (cfun->static_chain_decl != NULL)
2907 type |= ARM_FT_NESTED;
2908
2909 attr = DECL_ATTRIBUTES (current_function_decl);
2910
2911 a = lookup_attribute ("naked", attr);
2912 if (a != NULL_TREE)
2913 type |= ARM_FT_NAKED;
2914
2915 a = lookup_attribute ("isr", attr);
2916 if (a == NULL_TREE)
2917 a = lookup_attribute ("interrupt", attr);
2918
2919 if (a == NULL_TREE)
2920 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2921 else
2922 type |= arm_isr_value (TREE_VALUE (a));
2923
2924 return type;
2925 }
2926
2927 /* Returns the type of the current function. */
2928
2929 unsigned long
2930 arm_current_func_type (void)
2931 {
2932 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2933 cfun->machine->func_type = arm_compute_func_type ();
2934
2935 return cfun->machine->func_type;
2936 }
2937
2938 bool
2939 arm_allocate_stack_slots_for_args (void)
2940 {
2941 /* Naked functions should not allocate stack slots for arguments. */
2942 return !IS_NAKED (arm_current_func_type ());
2943 }
2944
2945 static bool
2946 arm_warn_func_return (tree decl)
2947 {
2948 /* Naked functions are implemented entirely in assembly, including the
2949 return sequence, so suppress warnings about this. */
2950 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
2951 }
2952
2953 \f
2954 /* Output assembler code for a block containing the constant parts
2955 of a trampoline, leaving space for the variable parts.
2956
2957 On the ARM, (if r8 is the static chain regnum, and remembering that
2958 referencing pc adds an offset of 8) the trampoline looks like:
2959 ldr r8, [pc, #0]
2960 ldr pc, [pc]
2961 .word static chain value
2962 .word function's address
2963 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2964
2965 static void
2966 arm_asm_trampoline_template (FILE *f)
2967 {
2968 if (TARGET_ARM)
2969 {
2970 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2971 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2972 }
2973 else if (TARGET_THUMB2)
2974 {
2975 /* The Thumb-2 trampoline is similar to the arm implementation.
2976 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2977 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2978 STATIC_CHAIN_REGNUM, PC_REGNUM);
2979 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2980 }
2981 else
2982 {
2983 ASM_OUTPUT_ALIGN (f, 2);
2984 fprintf (f, "\t.code\t16\n");
2985 fprintf (f, ".Ltrampoline_start:\n");
2986 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2987 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2988 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2989 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2990 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2991 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2992 }
2993 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2994 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2995 }
2996
2997 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2998
2999 static void
3000 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3001 {
3002 rtx fnaddr, mem, a_tramp;
3003
3004 emit_block_move (m_tramp, assemble_trampoline_template (),
3005 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3006
3007 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3008 emit_move_insn (mem, chain_value);
3009
3010 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3011 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3012 emit_move_insn (mem, fnaddr);
3013
3014 a_tramp = XEXP (m_tramp, 0);
3015 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3016 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3017 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3018 }
3019
3020 /* Thumb trampolines should be entered in thumb mode, so set
3021 the bottom bit of the address. */
3022
3023 static rtx
3024 arm_trampoline_adjust_address (rtx addr)
3025 {
3026 if (TARGET_THUMB)
3027 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3028 NULL, 0, OPTAB_LIB_WIDEN);
3029 return addr;
3030 }
3031 \f
3032 /* Return 1 if it is possible to return using a single instruction.
3033 If SIBLING is non-null, this is a test for a return before a sibling
3034 call. SIBLING is the call insn, so we can examine its register usage. */
3035
3036 int
3037 use_return_insn (int iscond, rtx sibling)
3038 {
3039 int regno;
3040 unsigned int func_type;
3041 unsigned long saved_int_regs;
3042 unsigned HOST_WIDE_INT stack_adjust;
3043 arm_stack_offsets *offsets;
3044
3045 /* Never use a return instruction before reload has run. */
3046 if (!reload_completed)
3047 return 0;
3048
3049 func_type = arm_current_func_type ();
3050
3051 /* Naked, volatile and stack alignment functions need special
3052 consideration. */
3053 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3054 return 0;
3055
3056 /* So do interrupt functions that use the frame pointer and Thumb
3057 interrupt functions. */
3058 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3059 return 0;
3060
3061 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3062 && !optimize_function_for_size_p (cfun))
3063 return 0;
3064
3065 offsets = arm_get_frame_offsets ();
3066 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3067
3068 /* As do variadic functions. */
3069 if (crtl->args.pretend_args_size
3070 || cfun->machine->uses_anonymous_args
3071 /* Or if the function calls __builtin_eh_return () */
3072 || crtl->calls_eh_return
3073 /* Or if the function calls alloca */
3074 || cfun->calls_alloca
3075 /* Or if there is a stack adjustment. However, if the stack pointer
3076 is saved on the stack, we can use a pre-incrementing stack load. */
3077 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3078 && stack_adjust == 4)))
3079 return 0;
3080
3081 saved_int_regs = offsets->saved_regs_mask;
3082
3083 /* Unfortunately, the insn
3084
3085 ldmib sp, {..., sp, ...}
3086
3087 triggers a bug on most SA-110 based devices, such that the stack
3088 pointer won't be correctly restored if the instruction takes a
3089 page fault. We work around this problem by popping r3 along with
3090 the other registers, since that is never slower than executing
3091 another instruction.
3092
3093 We test for !arm_arch5 here, because code for any architecture
3094 less than this could potentially be run on one of the buggy
3095 chips. */
3096 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3097 {
3098 /* Validate that r3 is a call-clobbered register (always true in
3099 the default abi) ... */
3100 if (!call_used_regs[3])
3101 return 0;
3102
3103 /* ... that it isn't being used for a return value ... */
3104 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3105 return 0;
3106
3107 /* ... or for a tail-call argument ... */
3108 if (sibling)
3109 {
3110 gcc_assert (CALL_P (sibling));
3111
3112 if (find_regno_fusage (sibling, USE, 3))
3113 return 0;
3114 }
3115
3116 /* ... and that there are no call-saved registers in r0-r2
3117 (always true in the default ABI). */
3118 if (saved_int_regs & 0x7)
3119 return 0;
3120 }
3121
3122 /* Can't be done if interworking with Thumb, and any registers have been
3123 stacked. */
3124 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3125 return 0;
3126
3127 /* On StrongARM, conditional returns are expensive if they aren't
3128 taken and multiple registers have been stacked. */
3129 if (iscond && arm_tune_strongarm)
3130 {
3131 /* Conditional return when just the LR is stored is a simple
3132 conditional-load instruction, that's not expensive. */
3133 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3134 return 0;
3135
3136 if (flag_pic
3137 && arm_pic_register != INVALID_REGNUM
3138 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3139 return 0;
3140 }
3141
3142 /* If there are saved registers but the LR isn't saved, then we need
3143 two instructions for the return. */
3144 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3145 return 0;
3146
3147 /* Can't be done if any of the VFP regs are pushed,
3148 since this also requires an insn. */
3149 if (TARGET_HARD_FLOAT && TARGET_VFP)
3150 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3151 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3152 return 0;
3153
3154 if (TARGET_REALLY_IWMMXT)
3155 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3156 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3157 return 0;
3158
3159 return 1;
3160 }
3161
3162 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3163 shrink-wrapping if possible. This is the case if we need to emit a
3164 prologue, which we can test by looking at the offsets. */
3165 bool
3166 use_simple_return_p (void)
3167 {
3168 arm_stack_offsets *offsets;
3169
3170 offsets = arm_get_frame_offsets ();
3171 return offsets->outgoing_args != 0;
3172 }
3173
3174 /* Return TRUE if int I is a valid immediate ARM constant. */
3175
3176 int
3177 const_ok_for_arm (HOST_WIDE_INT i)
3178 {
3179 int lowbit;
3180
3181 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3182 be all zero, or all one. */
3183 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3184 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3185 != ((~(unsigned HOST_WIDE_INT) 0)
3186 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3187 return FALSE;
3188
3189 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3190
3191 /* Fast return for 0 and small values. We must do this for zero, since
3192 the code below can't handle that one case. */
3193 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3194 return TRUE;
3195
3196 /* Get the number of trailing zeros. */
3197 lowbit = ffs((int) i) - 1;
3198
3199 /* Only even shifts are allowed in ARM mode so round down to the
3200 nearest even number. */
3201 if (TARGET_ARM)
3202 lowbit &= ~1;
3203
3204 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3205 return TRUE;
3206
3207 if (TARGET_ARM)
3208 {
3209 /* Allow rotated constants in ARM mode. */
3210 if (lowbit <= 4
3211 && ((i & ~0xc000003f) == 0
3212 || (i & ~0xf000000f) == 0
3213 || (i & ~0xfc000003) == 0))
3214 return TRUE;
3215 }
3216 else
3217 {
3218 HOST_WIDE_INT v;
3219
3220 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3221 v = i & 0xff;
3222 v |= v << 16;
3223 if (i == v || i == (v | (v << 8)))
3224 return TRUE;
3225
3226 /* Allow repeated pattern 0xXY00XY00. */
3227 v = i & 0xff00;
3228 v |= v << 16;
3229 if (i == v)
3230 return TRUE;
3231 }
3232
3233 return FALSE;
3234 }
3235
3236 /* Return true if I is a valid constant for the operation CODE. */
3237 int
3238 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3239 {
3240 if (const_ok_for_arm (i))
3241 return 1;
3242
3243 switch (code)
3244 {
3245 case SET:
3246 /* See if we can use movw. */
3247 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3248 return 1;
3249 else
3250 /* Otherwise, try mvn. */
3251 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3252
3253 case PLUS:
3254 /* See if we can use addw or subw. */
3255 if (TARGET_THUMB2
3256 && ((i & 0xfffff000) == 0
3257 || ((-i) & 0xfffff000) == 0))
3258 return 1;
3259 /* else fall through. */
3260
3261 case COMPARE:
3262 case EQ:
3263 case NE:
3264 case GT:
3265 case LE:
3266 case LT:
3267 case GE:
3268 case GEU:
3269 case LTU:
3270 case GTU:
3271 case LEU:
3272 case UNORDERED:
3273 case ORDERED:
3274 case UNEQ:
3275 case UNGE:
3276 case UNLT:
3277 case UNGT:
3278 case UNLE:
3279 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3280
3281 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3282 case XOR:
3283 return 0;
3284
3285 case IOR:
3286 if (TARGET_THUMB2)
3287 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3288 return 0;
3289
3290 case AND:
3291 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3292
3293 default:
3294 gcc_unreachable ();
3295 }
3296 }
3297
3298 /* Return true if I is a valid di mode constant for the operation CODE. */
3299 int
3300 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3301 {
3302 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3303 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3304 rtx hi = GEN_INT (hi_val);
3305 rtx lo = GEN_INT (lo_val);
3306
3307 if (TARGET_THUMB1)
3308 return 0;
3309
3310 switch (code)
3311 {
3312 case AND:
3313 case IOR:
3314 case XOR:
3315 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3316 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3317 case PLUS:
3318 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3319
3320 default:
3321 return 0;
3322 }
3323 }
3324
3325 /* Emit a sequence of insns to handle a large constant.
3326 CODE is the code of the operation required, it can be any of SET, PLUS,
3327 IOR, AND, XOR, MINUS;
3328 MODE is the mode in which the operation is being performed;
3329 VAL is the integer to operate on;
3330 SOURCE is the other operand (a register, or a null-pointer for SET);
3331 SUBTARGETS means it is safe to create scratch registers if that will
3332 either produce a simpler sequence, or we will want to cse the values.
3333 Return value is the number of insns emitted. */
3334
3335 /* ??? Tweak this for thumb2. */
3336 int
3337 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
3338 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3339 {
3340 rtx cond;
3341
3342 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3343 cond = COND_EXEC_TEST (PATTERN (insn));
3344 else
3345 cond = NULL_RTX;
3346
3347 if (subtargets || code == SET
3348 || (REG_P (target) && REG_P (source)
3349 && REGNO (target) != REGNO (source)))
3350 {
3351 /* After arm_reorg has been called, we can't fix up expensive
3352 constants by pushing them into memory so we must synthesize
3353 them in-line, regardless of the cost. This is only likely to
3354 be more costly on chips that have load delay slots and we are
3355 compiling without running the scheduler (so no splitting
3356 occurred before the final instruction emission).
3357
3358 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3359 */
3360 if (!after_arm_reorg
3361 && !cond
3362 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3363 1, 0)
3364 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3365 + (code != SET))))
3366 {
3367 if (code == SET)
3368 {
3369 /* Currently SET is the only monadic value for CODE, all
3370 the rest are diadic. */
3371 if (TARGET_USE_MOVT)
3372 arm_emit_movpair (target, GEN_INT (val));
3373 else
3374 emit_set_insn (target, GEN_INT (val));
3375
3376 return 1;
3377 }
3378 else
3379 {
3380 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3381
3382 if (TARGET_USE_MOVT)
3383 arm_emit_movpair (temp, GEN_INT (val));
3384 else
3385 emit_set_insn (temp, GEN_INT (val));
3386
3387 /* For MINUS, the value is subtracted from, since we never
3388 have subtraction of a constant. */
3389 if (code == MINUS)
3390 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3391 else
3392 emit_set_insn (target,
3393 gen_rtx_fmt_ee (code, mode, source, temp));
3394 return 2;
3395 }
3396 }
3397 }
3398
3399 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3400 1);
3401 }
3402
3403 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3404 ARM/THUMB2 immediates, and add up to VAL.
3405 Thr function return value gives the number of insns required. */
3406 static int
3407 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3408 struct four_ints *return_sequence)
3409 {
3410 int best_consecutive_zeros = 0;
3411 int i;
3412 int best_start = 0;
3413 int insns1, insns2;
3414 struct four_ints tmp_sequence;
3415
3416 /* If we aren't targeting ARM, the best place to start is always at
3417 the bottom, otherwise look more closely. */
3418 if (TARGET_ARM)
3419 {
3420 for (i = 0; i < 32; i += 2)
3421 {
3422 int consecutive_zeros = 0;
3423
3424 if (!(val & (3 << i)))
3425 {
3426 while ((i < 32) && !(val & (3 << i)))
3427 {
3428 consecutive_zeros += 2;
3429 i += 2;
3430 }
3431 if (consecutive_zeros > best_consecutive_zeros)
3432 {
3433 best_consecutive_zeros = consecutive_zeros;
3434 best_start = i - consecutive_zeros;
3435 }
3436 i -= 2;
3437 }
3438 }
3439 }
3440
3441 /* So long as it won't require any more insns to do so, it's
3442 desirable to emit a small constant (in bits 0...9) in the last
3443 insn. This way there is more chance that it can be combined with
3444 a later addressing insn to form a pre-indexed load or store
3445 operation. Consider:
3446
3447 *((volatile int *)0xe0000100) = 1;
3448 *((volatile int *)0xe0000110) = 2;
3449
3450 We want this to wind up as:
3451
3452 mov rA, #0xe0000000
3453 mov rB, #1
3454 str rB, [rA, #0x100]
3455 mov rB, #2
3456 str rB, [rA, #0x110]
3457
3458 rather than having to synthesize both large constants from scratch.
3459
3460 Therefore, we calculate how many insns would be required to emit
3461 the constant starting from `best_start', and also starting from
3462 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3463 yield a shorter sequence, we may as well use zero. */
3464 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3465 if (best_start != 0
3466 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3467 {
3468 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3469 if (insns2 <= insns1)
3470 {
3471 *return_sequence = tmp_sequence;
3472 insns1 = insns2;
3473 }
3474 }
3475
3476 return insns1;
3477 }
3478
3479 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3480 static int
3481 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3482 struct four_ints *return_sequence, int i)
3483 {
3484 int remainder = val & 0xffffffff;
3485 int insns = 0;
3486
3487 /* Try and find a way of doing the job in either two or three
3488 instructions.
3489
3490 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3491 location. We start at position I. This may be the MSB, or
3492 optimial_immediate_sequence may have positioned it at the largest block
3493 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3494 wrapping around to the top of the word when we drop off the bottom.
3495 In the worst case this code should produce no more than four insns.
3496
3497 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3498 constants, shifted to any arbitrary location. We should always start
3499 at the MSB. */
3500 do
3501 {
3502 int end;
3503 unsigned int b1, b2, b3, b4;
3504 unsigned HOST_WIDE_INT result;
3505 int loc;
3506
3507 gcc_assert (insns < 4);
3508
3509 if (i <= 0)
3510 i += 32;
3511
3512 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3513 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3514 {
3515 loc = i;
3516 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3517 /* We can use addw/subw for the last 12 bits. */
3518 result = remainder;
3519 else
3520 {
3521 /* Use an 8-bit shifted/rotated immediate. */
3522 end = i - 8;
3523 if (end < 0)
3524 end += 32;
3525 result = remainder & ((0x0ff << end)
3526 | ((i < end) ? (0xff >> (32 - end))
3527 : 0));
3528 i -= 8;
3529 }
3530 }
3531 else
3532 {
3533 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3534 arbitrary shifts. */
3535 i -= TARGET_ARM ? 2 : 1;
3536 continue;
3537 }
3538
3539 /* Next, see if we can do a better job with a thumb2 replicated
3540 constant.
3541
3542 We do it this way around to catch the cases like 0x01F001E0 where
3543 two 8-bit immediates would work, but a replicated constant would
3544 make it worse.
3545
3546 TODO: 16-bit constants that don't clear all the bits, but still win.
3547 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3548 if (TARGET_THUMB2)
3549 {
3550 b1 = (remainder & 0xff000000) >> 24;
3551 b2 = (remainder & 0x00ff0000) >> 16;
3552 b3 = (remainder & 0x0000ff00) >> 8;
3553 b4 = remainder & 0xff;
3554
3555 if (loc > 24)
3556 {
3557 /* The 8-bit immediate already found clears b1 (and maybe b2),
3558 but must leave b3 and b4 alone. */
3559
3560 /* First try to find a 32-bit replicated constant that clears
3561 almost everything. We can assume that we can't do it in one,
3562 or else we wouldn't be here. */
3563 unsigned int tmp = b1 & b2 & b3 & b4;
3564 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3565 + (tmp << 24);
3566 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3567 + (tmp == b3) + (tmp == b4);
3568 if (tmp
3569 && (matching_bytes >= 3
3570 || (matching_bytes == 2
3571 && const_ok_for_op (remainder & ~tmp2, code))))
3572 {
3573 /* At least 3 of the bytes match, and the fourth has at
3574 least as many bits set, or two of the bytes match
3575 and it will only require one more insn to finish. */
3576 result = tmp2;
3577 i = tmp != b1 ? 32
3578 : tmp != b2 ? 24
3579 : tmp != b3 ? 16
3580 : 8;
3581 }
3582
3583 /* Second, try to find a 16-bit replicated constant that can
3584 leave three of the bytes clear. If b2 or b4 is already
3585 zero, then we can. If the 8-bit from above would not
3586 clear b2 anyway, then we still win. */
3587 else if (b1 == b3 && (!b2 || !b4
3588 || (remainder & 0x00ff0000 & ~result)))
3589 {
3590 result = remainder & 0xff00ff00;
3591 i = 24;
3592 }
3593 }
3594 else if (loc > 16)
3595 {
3596 /* The 8-bit immediate already found clears b2 (and maybe b3)
3597 and we don't get here unless b1 is alredy clear, but it will
3598 leave b4 unchanged. */
3599
3600 /* If we can clear b2 and b4 at once, then we win, since the
3601 8-bits couldn't possibly reach that far. */
3602 if (b2 == b4)
3603 {
3604 result = remainder & 0x00ff00ff;
3605 i = 16;
3606 }
3607 }
3608 }
3609
3610 return_sequence->i[insns++] = result;
3611 remainder &= ~result;
3612
3613 if (code == SET || code == MINUS)
3614 code = PLUS;
3615 }
3616 while (remainder);
3617
3618 return insns;
3619 }
3620
3621 /* Emit an instruction with the indicated PATTERN. If COND is
3622 non-NULL, conditionalize the execution of the instruction on COND
3623 being true. */
3624
3625 static void
3626 emit_constant_insn (rtx cond, rtx pattern)
3627 {
3628 if (cond)
3629 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3630 emit_insn (pattern);
3631 }
3632
3633 /* As above, but extra parameter GENERATE which, if clear, suppresses
3634 RTL generation. */
3635
3636 static int
3637 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
3638 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3639 int generate)
3640 {
3641 int can_invert = 0;
3642 int can_negate = 0;
3643 int final_invert = 0;
3644 int i;
3645 int set_sign_bit_copies = 0;
3646 int clear_sign_bit_copies = 0;
3647 int clear_zero_bit_copies = 0;
3648 int set_zero_bit_copies = 0;
3649 int insns = 0, neg_insns, inv_insns;
3650 unsigned HOST_WIDE_INT temp1, temp2;
3651 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3652 struct four_ints *immediates;
3653 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3654
3655 /* Find out which operations are safe for a given CODE. Also do a quick
3656 check for degenerate cases; these can occur when DImode operations
3657 are split. */
3658 switch (code)
3659 {
3660 case SET:
3661 can_invert = 1;
3662 break;
3663
3664 case PLUS:
3665 can_negate = 1;
3666 break;
3667
3668 case IOR:
3669 if (remainder == 0xffffffff)
3670 {
3671 if (generate)
3672 emit_constant_insn (cond,
3673 gen_rtx_SET (VOIDmode, target,
3674 GEN_INT (ARM_SIGN_EXTEND (val))));
3675 return 1;
3676 }
3677
3678 if (remainder == 0)
3679 {
3680 if (reload_completed && rtx_equal_p (target, source))
3681 return 0;
3682
3683 if (generate)
3684 emit_constant_insn (cond,
3685 gen_rtx_SET (VOIDmode, target, source));
3686 return 1;
3687 }
3688 break;
3689
3690 case AND:
3691 if (remainder == 0)
3692 {
3693 if (generate)
3694 emit_constant_insn (cond,
3695 gen_rtx_SET (VOIDmode, target, const0_rtx));
3696 return 1;
3697 }
3698 if (remainder == 0xffffffff)
3699 {
3700 if (reload_completed && rtx_equal_p (target, source))
3701 return 0;
3702 if (generate)
3703 emit_constant_insn (cond,
3704 gen_rtx_SET (VOIDmode, target, source));
3705 return 1;
3706 }
3707 can_invert = 1;
3708 break;
3709
3710 case XOR:
3711 if (remainder == 0)
3712 {
3713 if (reload_completed && rtx_equal_p (target, source))
3714 return 0;
3715 if (generate)
3716 emit_constant_insn (cond,
3717 gen_rtx_SET (VOIDmode, target, source));
3718 return 1;
3719 }
3720
3721 if (remainder == 0xffffffff)
3722 {
3723 if (generate)
3724 emit_constant_insn (cond,
3725 gen_rtx_SET (VOIDmode, target,
3726 gen_rtx_NOT (mode, source)));
3727 return 1;
3728 }
3729 final_invert = 1;
3730 break;
3731
3732 case MINUS:
3733 /* We treat MINUS as (val - source), since (source - val) is always
3734 passed as (source + (-val)). */
3735 if (remainder == 0)
3736 {
3737 if (generate)
3738 emit_constant_insn (cond,
3739 gen_rtx_SET (VOIDmode, target,
3740 gen_rtx_NEG (mode, source)));
3741 return 1;
3742 }
3743 if (const_ok_for_arm (val))
3744 {
3745 if (generate)
3746 emit_constant_insn (cond,
3747 gen_rtx_SET (VOIDmode, target,
3748 gen_rtx_MINUS (mode, GEN_INT (val),
3749 source)));
3750 return 1;
3751 }
3752
3753 break;
3754
3755 default:
3756 gcc_unreachable ();
3757 }
3758
3759 /* If we can do it in one insn get out quickly. */
3760 if (const_ok_for_op (val, code))
3761 {
3762 if (generate)
3763 emit_constant_insn (cond,
3764 gen_rtx_SET (VOIDmode, target,
3765 (source
3766 ? gen_rtx_fmt_ee (code, mode, source,
3767 GEN_INT (val))
3768 : GEN_INT (val))));
3769 return 1;
3770 }
3771
3772 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3773 insn. */
3774 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
3775 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
3776 {
3777 if (generate)
3778 {
3779 if (mode == SImode && i == 16)
3780 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3781 smaller insn. */
3782 emit_constant_insn (cond,
3783 gen_zero_extendhisi2
3784 (target, gen_lowpart (HImode, source)));
3785 else
3786 /* Extz only supports SImode, but we can coerce the operands
3787 into that mode. */
3788 emit_constant_insn (cond,
3789 gen_extzv_t2 (gen_lowpart (SImode, target),
3790 gen_lowpart (SImode, source),
3791 GEN_INT (i), const0_rtx));
3792 }
3793
3794 return 1;
3795 }
3796
3797 /* Calculate a few attributes that may be useful for specific
3798 optimizations. */
3799 /* Count number of leading zeros. */
3800 for (i = 31; i >= 0; i--)
3801 {
3802 if ((remainder & (1 << i)) == 0)
3803 clear_sign_bit_copies++;
3804 else
3805 break;
3806 }
3807
3808 /* Count number of leading 1's. */
3809 for (i = 31; i >= 0; i--)
3810 {
3811 if ((remainder & (1 << i)) != 0)
3812 set_sign_bit_copies++;
3813 else
3814 break;
3815 }
3816
3817 /* Count number of trailing zero's. */
3818 for (i = 0; i <= 31; i++)
3819 {
3820 if ((remainder & (1 << i)) == 0)
3821 clear_zero_bit_copies++;
3822 else
3823 break;
3824 }
3825
3826 /* Count number of trailing 1's. */
3827 for (i = 0; i <= 31; i++)
3828 {
3829 if ((remainder & (1 << i)) != 0)
3830 set_zero_bit_copies++;
3831 else
3832 break;
3833 }
3834
3835 switch (code)
3836 {
3837 case SET:
3838 /* See if we can do this by sign_extending a constant that is known
3839 to be negative. This is a good, way of doing it, since the shift
3840 may well merge into a subsequent insn. */
3841 if (set_sign_bit_copies > 1)
3842 {
3843 if (const_ok_for_arm
3844 (temp1 = ARM_SIGN_EXTEND (remainder
3845 << (set_sign_bit_copies - 1))))
3846 {
3847 if (generate)
3848 {
3849 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3850 emit_constant_insn (cond,
3851 gen_rtx_SET (VOIDmode, new_src,
3852 GEN_INT (temp1)));
3853 emit_constant_insn (cond,
3854 gen_ashrsi3 (target, new_src,
3855 GEN_INT (set_sign_bit_copies - 1)));
3856 }
3857 return 2;
3858 }
3859 /* For an inverted constant, we will need to set the low bits,
3860 these will be shifted out of harm's way. */
3861 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3862 if (const_ok_for_arm (~temp1))
3863 {
3864 if (generate)
3865 {
3866 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3867 emit_constant_insn (cond,
3868 gen_rtx_SET (VOIDmode, new_src,
3869 GEN_INT (temp1)));
3870 emit_constant_insn (cond,
3871 gen_ashrsi3 (target, new_src,
3872 GEN_INT (set_sign_bit_copies - 1)));
3873 }
3874 return 2;
3875 }
3876 }
3877
3878 /* See if we can calculate the value as the difference between two
3879 valid immediates. */
3880 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3881 {
3882 int topshift = clear_sign_bit_copies & ~1;
3883
3884 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3885 & (0xff000000 >> topshift));
3886
3887 /* If temp1 is zero, then that means the 9 most significant
3888 bits of remainder were 1 and we've caused it to overflow.
3889 When topshift is 0 we don't need to do anything since we
3890 can borrow from 'bit 32'. */
3891 if (temp1 == 0 && topshift != 0)
3892 temp1 = 0x80000000 >> (topshift - 1);
3893
3894 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3895
3896 if (const_ok_for_arm (temp2))
3897 {
3898 if (generate)
3899 {
3900 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3901 emit_constant_insn (cond,
3902 gen_rtx_SET (VOIDmode, new_src,
3903 GEN_INT (temp1)));
3904 emit_constant_insn (cond,
3905 gen_addsi3 (target, new_src,
3906 GEN_INT (-temp2)));
3907 }
3908
3909 return 2;
3910 }
3911 }
3912
3913 /* See if we can generate this by setting the bottom (or the top)
3914 16 bits, and then shifting these into the other half of the
3915 word. We only look for the simplest cases, to do more would cost
3916 too much. Be careful, however, not to generate this when the
3917 alternative would take fewer insns. */
3918 if (val & 0xffff0000)
3919 {
3920 temp1 = remainder & 0xffff0000;
3921 temp2 = remainder & 0x0000ffff;
3922
3923 /* Overlaps outside this range are best done using other methods. */
3924 for (i = 9; i < 24; i++)
3925 {
3926 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3927 && !const_ok_for_arm (temp2))
3928 {
3929 rtx new_src = (subtargets
3930 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3931 : target);
3932 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3933 source, subtargets, generate);
3934 source = new_src;
3935 if (generate)
3936 emit_constant_insn
3937 (cond,
3938 gen_rtx_SET
3939 (VOIDmode, target,
3940 gen_rtx_IOR (mode,
3941 gen_rtx_ASHIFT (mode, source,
3942 GEN_INT (i)),
3943 source)));
3944 return insns + 1;
3945 }
3946 }
3947
3948 /* Don't duplicate cases already considered. */
3949 for (i = 17; i < 24; i++)
3950 {
3951 if (((temp1 | (temp1 >> i)) == remainder)
3952 && !const_ok_for_arm (temp1))
3953 {
3954 rtx new_src = (subtargets
3955 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3956 : target);
3957 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3958 source, subtargets, generate);
3959 source = new_src;
3960 if (generate)
3961 emit_constant_insn
3962 (cond,
3963 gen_rtx_SET (VOIDmode, target,
3964 gen_rtx_IOR
3965 (mode,
3966 gen_rtx_LSHIFTRT (mode, source,
3967 GEN_INT (i)),
3968 source)));
3969 return insns + 1;
3970 }
3971 }
3972 }
3973 break;
3974
3975 case IOR:
3976 case XOR:
3977 /* If we have IOR or XOR, and the constant can be loaded in a
3978 single instruction, and we can find a temporary to put it in,
3979 then this can be done in two instructions instead of 3-4. */
3980 if (subtargets
3981 /* TARGET can't be NULL if SUBTARGETS is 0 */
3982 || (reload_completed && !reg_mentioned_p (target, source)))
3983 {
3984 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3985 {
3986 if (generate)
3987 {
3988 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3989
3990 emit_constant_insn (cond,
3991 gen_rtx_SET (VOIDmode, sub,
3992 GEN_INT (val)));
3993 emit_constant_insn (cond,
3994 gen_rtx_SET (VOIDmode, target,
3995 gen_rtx_fmt_ee (code, mode,
3996 source, sub)));
3997 }
3998 return 2;
3999 }
4000 }
4001
4002 if (code == XOR)
4003 break;
4004
4005 /* Convert.
4006 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4007 and the remainder 0s for e.g. 0xfff00000)
4008 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4009
4010 This can be done in 2 instructions by using shifts with mov or mvn.
4011 e.g. for
4012 x = x | 0xfff00000;
4013 we generate.
4014 mvn r0, r0, asl #12
4015 mvn r0, r0, lsr #12 */
4016 if (set_sign_bit_copies > 8
4017 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4018 {
4019 if (generate)
4020 {
4021 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4022 rtx shift = GEN_INT (set_sign_bit_copies);
4023
4024 emit_constant_insn
4025 (cond,
4026 gen_rtx_SET (VOIDmode, sub,
4027 gen_rtx_NOT (mode,
4028 gen_rtx_ASHIFT (mode,
4029 source,
4030 shift))));
4031 emit_constant_insn
4032 (cond,
4033 gen_rtx_SET (VOIDmode, target,
4034 gen_rtx_NOT (mode,
4035 gen_rtx_LSHIFTRT (mode, sub,
4036 shift))));
4037 }
4038 return 2;
4039 }
4040
4041 /* Convert
4042 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4043 to
4044 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4045
4046 For eg. r0 = r0 | 0xfff
4047 mvn r0, r0, lsr #12
4048 mvn r0, r0, asl #12
4049
4050 */
4051 if (set_zero_bit_copies > 8
4052 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4053 {
4054 if (generate)
4055 {
4056 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4057 rtx shift = GEN_INT (set_zero_bit_copies);
4058
4059 emit_constant_insn
4060 (cond,
4061 gen_rtx_SET (VOIDmode, sub,
4062 gen_rtx_NOT (mode,
4063 gen_rtx_LSHIFTRT (mode,
4064 source,
4065 shift))));
4066 emit_constant_insn
4067 (cond,
4068 gen_rtx_SET (VOIDmode, target,
4069 gen_rtx_NOT (mode,
4070 gen_rtx_ASHIFT (mode, sub,
4071 shift))));
4072 }
4073 return 2;
4074 }
4075
4076 /* This will never be reached for Thumb2 because orn is a valid
4077 instruction. This is for Thumb1 and the ARM 32 bit cases.
4078
4079 x = y | constant (such that ~constant is a valid constant)
4080 Transform this to
4081 x = ~(~y & ~constant).
4082 */
4083 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4084 {
4085 if (generate)
4086 {
4087 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4088 emit_constant_insn (cond,
4089 gen_rtx_SET (VOIDmode, sub,
4090 gen_rtx_NOT (mode, source)));
4091 source = sub;
4092 if (subtargets)
4093 sub = gen_reg_rtx (mode);
4094 emit_constant_insn (cond,
4095 gen_rtx_SET (VOIDmode, sub,
4096 gen_rtx_AND (mode, source,
4097 GEN_INT (temp1))));
4098 emit_constant_insn (cond,
4099 gen_rtx_SET (VOIDmode, target,
4100 gen_rtx_NOT (mode, sub)));
4101 }
4102 return 3;
4103 }
4104 break;
4105
4106 case AND:
4107 /* See if two shifts will do 2 or more insn's worth of work. */
4108 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4109 {
4110 HOST_WIDE_INT shift_mask = ((0xffffffff
4111 << (32 - clear_sign_bit_copies))
4112 & 0xffffffff);
4113
4114 if ((remainder | shift_mask) != 0xffffffff)
4115 {
4116 if (generate)
4117 {
4118 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4119 insns = arm_gen_constant (AND, mode, cond,
4120 remainder | shift_mask,
4121 new_src, source, subtargets, 1);
4122 source = new_src;
4123 }
4124 else
4125 {
4126 rtx targ = subtargets ? NULL_RTX : target;
4127 insns = arm_gen_constant (AND, mode, cond,
4128 remainder | shift_mask,
4129 targ, source, subtargets, 0);
4130 }
4131 }
4132
4133 if (generate)
4134 {
4135 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4136 rtx shift = GEN_INT (clear_sign_bit_copies);
4137
4138 emit_insn (gen_ashlsi3 (new_src, source, shift));
4139 emit_insn (gen_lshrsi3 (target, new_src, shift));
4140 }
4141
4142 return insns + 2;
4143 }
4144
4145 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4146 {
4147 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4148
4149 if ((remainder | shift_mask) != 0xffffffff)
4150 {
4151 if (generate)
4152 {
4153 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4154
4155 insns = arm_gen_constant (AND, mode, cond,
4156 remainder | shift_mask,
4157 new_src, source, subtargets, 1);
4158 source = new_src;
4159 }
4160 else
4161 {
4162 rtx targ = subtargets ? NULL_RTX : target;
4163
4164 insns = arm_gen_constant (AND, mode, cond,
4165 remainder | shift_mask,
4166 targ, source, subtargets, 0);
4167 }
4168 }
4169
4170 if (generate)
4171 {
4172 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4173 rtx shift = GEN_INT (clear_zero_bit_copies);
4174
4175 emit_insn (gen_lshrsi3 (new_src, source, shift));
4176 emit_insn (gen_ashlsi3 (target, new_src, shift));
4177 }
4178
4179 return insns + 2;
4180 }
4181
4182 break;
4183
4184 default:
4185 break;
4186 }
4187
4188 /* Calculate what the instruction sequences would be if we generated it
4189 normally, negated, or inverted. */
4190 if (code == AND)
4191 /* AND cannot be split into multiple insns, so invert and use BIC. */
4192 insns = 99;
4193 else
4194 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4195
4196 if (can_negate)
4197 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4198 &neg_immediates);
4199 else
4200 neg_insns = 99;
4201
4202 if (can_invert || final_invert)
4203 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4204 &inv_immediates);
4205 else
4206 inv_insns = 99;
4207
4208 immediates = &pos_immediates;
4209
4210 /* Is the negated immediate sequence more efficient? */
4211 if (neg_insns < insns && neg_insns <= inv_insns)
4212 {
4213 insns = neg_insns;
4214 immediates = &neg_immediates;
4215 }
4216 else
4217 can_negate = 0;
4218
4219 /* Is the inverted immediate sequence more efficient?
4220 We must allow for an extra NOT instruction for XOR operations, although
4221 there is some chance that the final 'mvn' will get optimized later. */
4222 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4223 {
4224 insns = inv_insns;
4225 immediates = &inv_immediates;
4226 }
4227 else
4228 {
4229 can_invert = 0;
4230 final_invert = 0;
4231 }
4232
4233 /* Now output the chosen sequence as instructions. */
4234 if (generate)
4235 {
4236 for (i = 0; i < insns; i++)
4237 {
4238 rtx new_src, temp1_rtx;
4239
4240 temp1 = immediates->i[i];
4241
4242 if (code == SET || code == MINUS)
4243 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4244 else if ((final_invert || i < (insns - 1)) && subtargets)
4245 new_src = gen_reg_rtx (mode);
4246 else
4247 new_src = target;
4248
4249 if (can_invert)
4250 temp1 = ~temp1;
4251 else if (can_negate)
4252 temp1 = -temp1;
4253
4254 temp1 = trunc_int_for_mode (temp1, mode);
4255 temp1_rtx = GEN_INT (temp1);
4256
4257 if (code == SET)
4258 ;
4259 else if (code == MINUS)
4260 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4261 else
4262 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4263
4264 emit_constant_insn (cond,
4265 gen_rtx_SET (VOIDmode, new_src,
4266 temp1_rtx));
4267 source = new_src;
4268
4269 if (code == SET)
4270 {
4271 can_negate = can_invert;
4272 can_invert = 0;
4273 code = PLUS;
4274 }
4275 else if (code == MINUS)
4276 code = PLUS;
4277 }
4278 }
4279
4280 if (final_invert)
4281 {
4282 if (generate)
4283 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4284 gen_rtx_NOT (mode, source)));
4285 insns++;
4286 }
4287
4288 return insns;
4289 }
4290
4291 /* Canonicalize a comparison so that we are more likely to recognize it.
4292 This can be done for a few constant compares, where we can make the
4293 immediate value easier to load. */
4294
4295 static void
4296 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4297 bool op0_preserve_value)
4298 {
4299 enum machine_mode mode;
4300 unsigned HOST_WIDE_INT i, maxval;
4301
4302 mode = GET_MODE (*op0);
4303 if (mode == VOIDmode)
4304 mode = GET_MODE (*op1);
4305
4306 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4307
4308 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4309 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4310 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4311 for GTU/LEU in Thumb mode. */
4312 if (mode == DImode)
4313 {
4314 rtx tem;
4315
4316 if (*code == GT || *code == LE
4317 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4318 {
4319 /* Missing comparison. First try to use an available
4320 comparison. */
4321 if (CONST_INT_P (*op1))
4322 {
4323 i = INTVAL (*op1);
4324 switch (*code)
4325 {
4326 case GT:
4327 case LE:
4328 if (i != maxval
4329 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4330 {
4331 *op1 = GEN_INT (i + 1);
4332 *code = *code == GT ? GE : LT;
4333 return;
4334 }
4335 break;
4336 case GTU:
4337 case LEU:
4338 if (i != ~((unsigned HOST_WIDE_INT) 0)
4339 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4340 {
4341 *op1 = GEN_INT (i + 1);
4342 *code = *code == GTU ? GEU : LTU;
4343 return;
4344 }
4345 break;
4346 default:
4347 gcc_unreachable ();
4348 }
4349 }
4350
4351 /* If that did not work, reverse the condition. */
4352 if (!op0_preserve_value)
4353 {
4354 tem = *op0;
4355 *op0 = *op1;
4356 *op1 = tem;
4357 *code = (int)swap_condition ((enum rtx_code)*code);
4358 }
4359 }
4360 return;
4361 }
4362
4363 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4364 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4365 to facilitate possible combining with a cmp into 'ands'. */
4366 if (mode == SImode
4367 && GET_CODE (*op0) == ZERO_EXTEND
4368 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4369 && GET_MODE (XEXP (*op0, 0)) == QImode
4370 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4371 && subreg_lowpart_p (XEXP (*op0, 0))
4372 && *op1 == const0_rtx)
4373 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4374 GEN_INT (255));
4375
4376 /* Comparisons smaller than DImode. Only adjust comparisons against
4377 an out-of-range constant. */
4378 if (!CONST_INT_P (*op1)
4379 || const_ok_for_arm (INTVAL (*op1))
4380 || const_ok_for_arm (- INTVAL (*op1)))
4381 return;
4382
4383 i = INTVAL (*op1);
4384
4385 switch (*code)
4386 {
4387 case EQ:
4388 case NE:
4389 return;
4390
4391 case GT:
4392 case LE:
4393 if (i != maxval
4394 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4395 {
4396 *op1 = GEN_INT (i + 1);
4397 *code = *code == GT ? GE : LT;
4398 return;
4399 }
4400 break;
4401
4402 case GE:
4403 case LT:
4404 if (i != ~maxval
4405 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4406 {
4407 *op1 = GEN_INT (i - 1);
4408 *code = *code == GE ? GT : LE;
4409 return;
4410 }
4411 break;
4412
4413 case GTU:
4414 case LEU:
4415 if (i != ~((unsigned HOST_WIDE_INT) 0)
4416 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4417 {
4418 *op1 = GEN_INT (i + 1);
4419 *code = *code == GTU ? GEU : LTU;
4420 return;
4421 }
4422 break;
4423
4424 case GEU:
4425 case LTU:
4426 if (i != 0
4427 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4428 {
4429 *op1 = GEN_INT (i - 1);
4430 *code = *code == GEU ? GTU : LEU;
4431 return;
4432 }
4433 break;
4434
4435 default:
4436 gcc_unreachable ();
4437 }
4438 }
4439
4440
4441 /* Define how to find the value returned by a function. */
4442
4443 static rtx
4444 arm_function_value(const_tree type, const_tree func,
4445 bool outgoing ATTRIBUTE_UNUSED)
4446 {
4447 enum machine_mode mode;
4448 int unsignedp ATTRIBUTE_UNUSED;
4449 rtx r ATTRIBUTE_UNUSED;
4450
4451 mode = TYPE_MODE (type);
4452
4453 if (TARGET_AAPCS_BASED)
4454 return aapcs_allocate_return_reg (mode, type, func);
4455
4456 /* Promote integer types. */
4457 if (INTEGRAL_TYPE_P (type))
4458 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4459
4460 /* Promotes small structs returned in a register to full-word size
4461 for big-endian AAPCS. */
4462 if (arm_return_in_msb (type))
4463 {
4464 HOST_WIDE_INT size = int_size_in_bytes (type);
4465 if (size % UNITS_PER_WORD != 0)
4466 {
4467 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4468 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4469 }
4470 }
4471
4472 return arm_libcall_value_1 (mode);
4473 }
4474
4475 /* libcall hashtable helpers. */
4476
4477 struct libcall_hasher : typed_noop_remove <rtx_def>
4478 {
4479 typedef rtx_def value_type;
4480 typedef rtx_def compare_type;
4481 static inline hashval_t hash (const value_type *);
4482 static inline bool equal (const value_type *, const compare_type *);
4483 static inline void remove (value_type *);
4484 };
4485
4486 inline bool
4487 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4488 {
4489 return rtx_equal_p (p1, p2);
4490 }
4491
4492 inline hashval_t
4493 libcall_hasher::hash (const value_type *p1)
4494 {
4495 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4496 }
4497
4498 typedef hash_table <libcall_hasher> libcall_table_type;
4499
4500 static void
4501 add_libcall (libcall_table_type htab, rtx libcall)
4502 {
4503 *htab.find_slot (libcall, INSERT) = libcall;
4504 }
4505
4506 static bool
4507 arm_libcall_uses_aapcs_base (const_rtx libcall)
4508 {
4509 static bool init_done = false;
4510 static libcall_table_type libcall_htab;
4511
4512 if (!init_done)
4513 {
4514 init_done = true;
4515
4516 libcall_htab.create (31);
4517 add_libcall (libcall_htab,
4518 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4519 add_libcall (libcall_htab,
4520 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4521 add_libcall (libcall_htab,
4522 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4523 add_libcall (libcall_htab,
4524 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4525
4526 add_libcall (libcall_htab,
4527 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4528 add_libcall (libcall_htab,
4529 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4530 add_libcall (libcall_htab,
4531 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4532 add_libcall (libcall_htab,
4533 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4534
4535 add_libcall (libcall_htab,
4536 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4537 add_libcall (libcall_htab,
4538 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4539 add_libcall (libcall_htab,
4540 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4541 add_libcall (libcall_htab,
4542 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4543 add_libcall (libcall_htab,
4544 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4545 add_libcall (libcall_htab,
4546 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4547 add_libcall (libcall_htab,
4548 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4549 add_libcall (libcall_htab,
4550 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4551
4552 /* Values from double-precision helper functions are returned in core
4553 registers if the selected core only supports single-precision
4554 arithmetic, even if we are using the hard-float ABI. The same is
4555 true for single-precision helpers, but we will never be using the
4556 hard-float ABI on a CPU which doesn't support single-precision
4557 operations in hardware. */
4558 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4559 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4560 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4561 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4562 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4563 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4564 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4565 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4566 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4567 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4568 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4569 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4570 SFmode));
4571 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4572 DFmode));
4573 }
4574
4575 return libcall && libcall_htab.find (libcall) != NULL;
4576 }
4577
4578 static rtx
4579 arm_libcall_value_1 (enum machine_mode mode)
4580 {
4581 if (TARGET_AAPCS_BASED)
4582 return aapcs_libcall_value (mode);
4583 else if (TARGET_IWMMXT_ABI
4584 && arm_vector_mode_supported_p (mode))
4585 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4586 else
4587 return gen_rtx_REG (mode, ARG_REGISTER (1));
4588 }
4589
4590 /* Define how to find the value returned by a library function
4591 assuming the value has mode MODE. */
4592
4593 static rtx
4594 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
4595 {
4596 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4597 && GET_MODE_CLASS (mode) == MODE_FLOAT)
4598 {
4599 /* The following libcalls return their result in integer registers,
4600 even though they return a floating point value. */
4601 if (arm_libcall_uses_aapcs_base (libcall))
4602 return gen_rtx_REG (mode, ARG_REGISTER(1));
4603
4604 }
4605
4606 return arm_libcall_value_1 (mode);
4607 }
4608
4609 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4610
4611 static bool
4612 arm_function_value_regno_p (const unsigned int regno)
4613 {
4614 if (regno == ARG_REGISTER (1)
4615 || (TARGET_32BIT
4616 && TARGET_AAPCS_BASED
4617 && TARGET_VFP
4618 && TARGET_HARD_FLOAT
4619 && regno == FIRST_VFP_REGNUM)
4620 || (TARGET_IWMMXT_ABI
4621 && regno == FIRST_IWMMXT_REGNUM))
4622 return true;
4623
4624 return false;
4625 }
4626
4627 /* Determine the amount of memory needed to store the possible return
4628 registers of an untyped call. */
4629 int
4630 arm_apply_result_size (void)
4631 {
4632 int size = 16;
4633
4634 if (TARGET_32BIT)
4635 {
4636 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4637 size += 32;
4638 if (TARGET_IWMMXT_ABI)
4639 size += 8;
4640 }
4641
4642 return size;
4643 }
4644
4645 /* Decide whether TYPE should be returned in memory (true)
4646 or in a register (false). FNTYPE is the type of the function making
4647 the call. */
4648 static bool
4649 arm_return_in_memory (const_tree type, const_tree fntype)
4650 {
4651 HOST_WIDE_INT size;
4652
4653 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4654
4655 if (TARGET_AAPCS_BASED)
4656 {
4657 /* Simple, non-aggregate types (ie not including vectors and
4658 complex) are always returned in a register (or registers).
4659 We don't care about which register here, so we can short-cut
4660 some of the detail. */
4661 if (!AGGREGATE_TYPE_P (type)
4662 && TREE_CODE (type) != VECTOR_TYPE
4663 && TREE_CODE (type) != COMPLEX_TYPE)
4664 return false;
4665
4666 /* Any return value that is no larger than one word can be
4667 returned in r0. */
4668 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4669 return false;
4670
4671 /* Check any available co-processors to see if they accept the
4672 type as a register candidate (VFP, for example, can return
4673 some aggregates in consecutive registers). These aren't
4674 available if the call is variadic. */
4675 if (aapcs_select_return_coproc (type, fntype) >= 0)
4676 return false;
4677
4678 /* Vector values should be returned using ARM registers, not
4679 memory (unless they're over 16 bytes, which will break since
4680 we only have four call-clobbered registers to play with). */
4681 if (TREE_CODE (type) == VECTOR_TYPE)
4682 return (size < 0 || size > (4 * UNITS_PER_WORD));
4683
4684 /* The rest go in memory. */
4685 return true;
4686 }
4687
4688 if (TREE_CODE (type) == VECTOR_TYPE)
4689 return (size < 0 || size > (4 * UNITS_PER_WORD));
4690
4691 if (!AGGREGATE_TYPE_P (type) &&
4692 (TREE_CODE (type) != VECTOR_TYPE))
4693 /* All simple types are returned in registers. */
4694 return false;
4695
4696 if (arm_abi != ARM_ABI_APCS)
4697 {
4698 /* ATPCS and later return aggregate types in memory only if they are
4699 larger than a word (or are variable size). */
4700 return (size < 0 || size > UNITS_PER_WORD);
4701 }
4702
4703 /* For the arm-wince targets we choose to be compatible with Microsoft's
4704 ARM and Thumb compilers, which always return aggregates in memory. */
4705 #ifndef ARM_WINCE
4706 /* All structures/unions bigger than one word are returned in memory.
4707 Also catch the case where int_size_in_bytes returns -1. In this case
4708 the aggregate is either huge or of variable size, and in either case
4709 we will want to return it via memory and not in a register. */
4710 if (size < 0 || size > UNITS_PER_WORD)
4711 return true;
4712
4713 if (TREE_CODE (type) == RECORD_TYPE)
4714 {
4715 tree field;
4716
4717 /* For a struct the APCS says that we only return in a register
4718 if the type is 'integer like' and every addressable element
4719 has an offset of zero. For practical purposes this means
4720 that the structure can have at most one non bit-field element
4721 and that this element must be the first one in the structure. */
4722
4723 /* Find the first field, ignoring non FIELD_DECL things which will
4724 have been created by C++. */
4725 for (field = TYPE_FIELDS (type);
4726 field && TREE_CODE (field) != FIELD_DECL;
4727 field = DECL_CHAIN (field))
4728 continue;
4729
4730 if (field == NULL)
4731 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4732
4733 /* Check that the first field is valid for returning in a register. */
4734
4735 /* ... Floats are not allowed */
4736 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4737 return true;
4738
4739 /* ... Aggregates that are not themselves valid for returning in
4740 a register are not allowed. */
4741 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4742 return true;
4743
4744 /* Now check the remaining fields, if any. Only bitfields are allowed,
4745 since they are not addressable. */
4746 for (field = DECL_CHAIN (field);
4747 field;
4748 field = DECL_CHAIN (field))
4749 {
4750 if (TREE_CODE (field) != FIELD_DECL)
4751 continue;
4752
4753 if (!DECL_BIT_FIELD_TYPE (field))
4754 return true;
4755 }
4756
4757 return false;
4758 }
4759
4760 if (TREE_CODE (type) == UNION_TYPE)
4761 {
4762 tree field;
4763
4764 /* Unions can be returned in registers if every element is
4765 integral, or can be returned in an integer register. */
4766 for (field = TYPE_FIELDS (type);
4767 field;
4768 field = DECL_CHAIN (field))
4769 {
4770 if (TREE_CODE (field) != FIELD_DECL)
4771 continue;
4772
4773 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4774 return true;
4775
4776 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4777 return true;
4778 }
4779
4780 return false;
4781 }
4782 #endif /* not ARM_WINCE */
4783
4784 /* Return all other types in memory. */
4785 return true;
4786 }
4787
4788 const struct pcs_attribute_arg
4789 {
4790 const char *arg;
4791 enum arm_pcs value;
4792 } pcs_attribute_args[] =
4793 {
4794 {"aapcs", ARM_PCS_AAPCS},
4795 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
4796 #if 0
4797 /* We could recognize these, but changes would be needed elsewhere
4798 * to implement them. */
4799 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
4800 {"atpcs", ARM_PCS_ATPCS},
4801 {"apcs", ARM_PCS_APCS},
4802 #endif
4803 {NULL, ARM_PCS_UNKNOWN}
4804 };
4805
4806 static enum arm_pcs
4807 arm_pcs_from_attribute (tree attr)
4808 {
4809 const struct pcs_attribute_arg *ptr;
4810 const char *arg;
4811
4812 /* Get the value of the argument. */
4813 if (TREE_VALUE (attr) == NULL_TREE
4814 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4815 return ARM_PCS_UNKNOWN;
4816
4817 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4818
4819 /* Check it against the list of known arguments. */
4820 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4821 if (streq (arg, ptr->arg))
4822 return ptr->value;
4823
4824 /* An unrecognized interrupt type. */
4825 return ARM_PCS_UNKNOWN;
4826 }
4827
4828 /* Get the PCS variant to use for this call. TYPE is the function's type
4829 specification, DECL is the specific declartion. DECL may be null if
4830 the call could be indirect or if this is a library call. */
4831 static enum arm_pcs
4832 arm_get_pcs_model (const_tree type, const_tree decl)
4833 {
4834 bool user_convention = false;
4835 enum arm_pcs user_pcs = arm_pcs_default;
4836 tree attr;
4837
4838 gcc_assert (type);
4839
4840 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4841 if (attr)
4842 {
4843 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4844 user_convention = true;
4845 }
4846
4847 if (TARGET_AAPCS_BASED)
4848 {
4849 /* Detect varargs functions. These always use the base rules
4850 (no argument is ever a candidate for a co-processor
4851 register). */
4852 bool base_rules = stdarg_p (type);
4853
4854 if (user_convention)
4855 {
4856 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4857 sorry ("non-AAPCS derived PCS variant");
4858 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4859 error ("variadic functions must use the base AAPCS variant");
4860 }
4861
4862 if (base_rules)
4863 return ARM_PCS_AAPCS;
4864 else if (user_convention)
4865 return user_pcs;
4866 else if (decl && flag_unit_at_a_time)
4867 {
4868 /* Local functions never leak outside this compilation unit,
4869 so we are free to use whatever conventions are
4870 appropriate. */
4871 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4872 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4873 if (i && i->local)
4874 return ARM_PCS_AAPCS_LOCAL;
4875 }
4876 }
4877 else if (user_convention && user_pcs != arm_pcs_default)
4878 sorry ("PCS variant");
4879
4880 /* For everything else we use the target's default. */
4881 return arm_pcs_default;
4882 }
4883
4884
4885 static void
4886 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4887 const_tree fntype ATTRIBUTE_UNUSED,
4888 rtx libcall ATTRIBUTE_UNUSED,
4889 const_tree fndecl ATTRIBUTE_UNUSED)
4890 {
4891 /* Record the unallocated VFP registers. */
4892 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4893 pcum->aapcs_vfp_reg_alloc = 0;
4894 }
4895
4896 /* Walk down the type tree of TYPE counting consecutive base elements.
4897 If *MODEP is VOIDmode, then set it to the first valid floating point
4898 type. If a non-floating point type is found, or if a floating point
4899 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4900 otherwise return the count in the sub-tree. */
4901 static int
4902 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4903 {
4904 enum machine_mode mode;
4905 HOST_WIDE_INT size;
4906
4907 switch (TREE_CODE (type))
4908 {
4909 case REAL_TYPE:
4910 mode = TYPE_MODE (type);
4911 if (mode != DFmode && mode != SFmode)
4912 return -1;
4913
4914 if (*modep == VOIDmode)
4915 *modep = mode;
4916
4917 if (*modep == mode)
4918 return 1;
4919
4920 break;
4921
4922 case COMPLEX_TYPE:
4923 mode = TYPE_MODE (TREE_TYPE (type));
4924 if (mode != DFmode && mode != SFmode)
4925 return -1;
4926
4927 if (*modep == VOIDmode)
4928 *modep = mode;
4929
4930 if (*modep == mode)
4931 return 2;
4932
4933 break;
4934
4935 case VECTOR_TYPE:
4936 /* Use V2SImode and V4SImode as representatives of all 64-bit
4937 and 128-bit vector types, whether or not those modes are
4938 supported with the present options. */
4939 size = int_size_in_bytes (type);
4940 switch (size)
4941 {
4942 case 8:
4943 mode = V2SImode;
4944 break;
4945 case 16:
4946 mode = V4SImode;
4947 break;
4948 default:
4949 return -1;
4950 }
4951
4952 if (*modep == VOIDmode)
4953 *modep = mode;
4954
4955 /* Vector modes are considered to be opaque: two vectors are
4956 equivalent for the purposes of being homogeneous aggregates
4957 if they are the same size. */
4958 if (*modep == mode)
4959 return 1;
4960
4961 break;
4962
4963 case ARRAY_TYPE:
4964 {
4965 int count;
4966 tree index = TYPE_DOMAIN (type);
4967
4968 /* Can't handle incomplete types. */
4969 if (!COMPLETE_TYPE_P (type))
4970 return -1;
4971
4972 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4973 if (count == -1
4974 || !index
4975 || !TYPE_MAX_VALUE (index)
4976 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
4977 || !TYPE_MIN_VALUE (index)
4978 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
4979 || count < 0)
4980 return -1;
4981
4982 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
4983 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
4984
4985 /* There must be no padding. */
4986 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
4987 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
4988 != count * GET_MODE_BITSIZE (*modep)))
4989 return -1;
4990
4991 return count;
4992 }
4993
4994 case RECORD_TYPE:
4995 {
4996 int count = 0;
4997 int sub_count;
4998 tree field;
4999
5000 /* Can't handle incomplete types. */
5001 if (!COMPLETE_TYPE_P (type))
5002 return -1;
5003
5004 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5005 {
5006 if (TREE_CODE (field) != FIELD_DECL)
5007 continue;
5008
5009 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5010 if (sub_count < 0)
5011 return -1;
5012 count += sub_count;
5013 }
5014
5015 /* There must be no padding. */
5016 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
5017 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
5018 != count * GET_MODE_BITSIZE (*modep)))
5019 return -1;
5020
5021 return count;
5022 }
5023
5024 case UNION_TYPE:
5025 case QUAL_UNION_TYPE:
5026 {
5027 /* These aren't very interesting except in a degenerate case. */
5028 int count = 0;
5029 int sub_count;
5030 tree field;
5031
5032 /* Can't handle incomplete types. */
5033 if (!COMPLETE_TYPE_P (type))
5034 return -1;
5035
5036 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5037 {
5038 if (TREE_CODE (field) != FIELD_DECL)
5039 continue;
5040
5041 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5042 if (sub_count < 0)
5043 return -1;
5044 count = count > sub_count ? count : sub_count;
5045 }
5046
5047 /* There must be no padding. */
5048 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
5049 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
5050 != count * GET_MODE_BITSIZE (*modep)))
5051 return -1;
5052
5053 return count;
5054 }
5055
5056 default:
5057 break;
5058 }
5059
5060 return -1;
5061 }
5062
5063 /* Return true if PCS_VARIANT should use VFP registers. */
5064 static bool
5065 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5066 {
5067 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5068 {
5069 static bool seen_thumb1_vfp = false;
5070
5071 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5072 {
5073 sorry ("Thumb-1 hard-float VFP ABI");
5074 /* sorry() is not immediately fatal, so only display this once. */
5075 seen_thumb1_vfp = true;
5076 }
5077
5078 return true;
5079 }
5080
5081 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5082 return false;
5083
5084 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5085 (TARGET_VFP_DOUBLE || !is_double));
5086 }
5087
5088 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5089 suitable for passing or returning in VFP registers for the PCS
5090 variant selected. If it is, then *BASE_MODE is updated to contain
5091 a machine mode describing each element of the argument's type and
5092 *COUNT to hold the number of such elements. */
5093 static bool
5094 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5095 enum machine_mode mode, const_tree type,
5096 enum machine_mode *base_mode, int *count)
5097 {
5098 enum machine_mode new_mode = VOIDmode;
5099
5100 /* If we have the type information, prefer that to working things
5101 out from the mode. */
5102 if (type)
5103 {
5104 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5105
5106 if (ag_count > 0 && ag_count <= 4)
5107 *count = ag_count;
5108 else
5109 return false;
5110 }
5111 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5112 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5113 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5114 {
5115 *count = 1;
5116 new_mode = mode;
5117 }
5118 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5119 {
5120 *count = 2;
5121 new_mode = (mode == DCmode ? DFmode : SFmode);
5122 }
5123 else
5124 return false;
5125
5126
5127 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5128 return false;
5129
5130 *base_mode = new_mode;
5131 return true;
5132 }
5133
5134 static bool
5135 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5136 enum machine_mode mode, const_tree type)
5137 {
5138 int count ATTRIBUTE_UNUSED;
5139 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
5140
5141 if (!use_vfp_abi (pcs_variant, false))
5142 return false;
5143 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5144 &ag_mode, &count);
5145 }
5146
5147 static bool
5148 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5149 const_tree type)
5150 {
5151 if (!use_vfp_abi (pcum->pcs_variant, false))
5152 return false;
5153
5154 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5155 &pcum->aapcs_vfp_rmode,
5156 &pcum->aapcs_vfp_rcount);
5157 }
5158
5159 static bool
5160 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5161 const_tree type ATTRIBUTE_UNUSED)
5162 {
5163 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5164 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5165 int regno;
5166
5167 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5168 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5169 {
5170 pcum->aapcs_vfp_reg_alloc = mask << regno;
5171 if (mode == BLKmode
5172 || (mode == TImode && ! TARGET_NEON)
5173 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5174 {
5175 int i;
5176 int rcount = pcum->aapcs_vfp_rcount;
5177 int rshift = shift;
5178 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
5179 rtx par;
5180 if (!TARGET_NEON)
5181 {
5182 /* Avoid using unsupported vector modes. */
5183 if (rmode == V2SImode)
5184 rmode = DImode;
5185 else if (rmode == V4SImode)
5186 {
5187 rmode = DImode;
5188 rcount *= 2;
5189 rshift /= 2;
5190 }
5191 }
5192 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5193 for (i = 0; i < rcount; i++)
5194 {
5195 rtx tmp = gen_rtx_REG (rmode,
5196 FIRST_VFP_REGNUM + regno + i * rshift);
5197 tmp = gen_rtx_EXPR_LIST
5198 (VOIDmode, tmp,
5199 GEN_INT (i * GET_MODE_SIZE (rmode)));
5200 XVECEXP (par, 0, i) = tmp;
5201 }
5202
5203 pcum->aapcs_reg = par;
5204 }
5205 else
5206 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5207 return true;
5208 }
5209 return false;
5210 }
5211
5212 static rtx
5213 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5214 enum machine_mode mode,
5215 const_tree type ATTRIBUTE_UNUSED)
5216 {
5217 if (!use_vfp_abi (pcs_variant, false))
5218 return NULL;
5219
5220 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5221 {
5222 int count;
5223 enum machine_mode ag_mode;
5224 int i;
5225 rtx par;
5226 int shift;
5227
5228 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5229 &ag_mode, &count);
5230
5231 if (!TARGET_NEON)
5232 {
5233 if (ag_mode == V2SImode)
5234 ag_mode = DImode;
5235 else if (ag_mode == V4SImode)
5236 {
5237 ag_mode = DImode;
5238 count *= 2;
5239 }
5240 }
5241 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5242 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5243 for (i = 0; i < count; i++)
5244 {
5245 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5246 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5247 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5248 XVECEXP (par, 0, i) = tmp;
5249 }
5250
5251 return par;
5252 }
5253
5254 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5255 }
5256
5257 static void
5258 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5259 enum machine_mode mode ATTRIBUTE_UNUSED,
5260 const_tree type ATTRIBUTE_UNUSED)
5261 {
5262 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5263 pcum->aapcs_vfp_reg_alloc = 0;
5264 return;
5265 }
5266
5267 #define AAPCS_CP(X) \
5268 { \
5269 aapcs_ ## X ## _cum_init, \
5270 aapcs_ ## X ## _is_call_candidate, \
5271 aapcs_ ## X ## _allocate, \
5272 aapcs_ ## X ## _is_return_candidate, \
5273 aapcs_ ## X ## _allocate_return_reg, \
5274 aapcs_ ## X ## _advance \
5275 }
5276
5277 /* Table of co-processors that can be used to pass arguments in
5278 registers. Idealy no arugment should be a candidate for more than
5279 one co-processor table entry, but the table is processed in order
5280 and stops after the first match. If that entry then fails to put
5281 the argument into a co-processor register, the argument will go on
5282 the stack. */
5283 static struct
5284 {
5285 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5286 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5287
5288 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5289 BLKmode) is a candidate for this co-processor's registers; this
5290 function should ignore any position-dependent state in
5291 CUMULATIVE_ARGS and only use call-type dependent information. */
5292 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5293
5294 /* Return true if the argument does get a co-processor register; it
5295 should set aapcs_reg to an RTX of the register allocated as is
5296 required for a return from FUNCTION_ARG. */
5297 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5298
5299 /* Return true if a result of mode MODE (or type TYPE if MODE is
5300 BLKmode) is can be returned in this co-processor's registers. */
5301 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
5302
5303 /* Allocate and return an RTX element to hold the return type of a
5304 call, this routine must not fail and will only be called if
5305 is_return_candidate returned true with the same parameters. */
5306 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
5307
5308 /* Finish processing this argument and prepare to start processing
5309 the next one. */
5310 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5311 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5312 {
5313 AAPCS_CP(vfp)
5314 };
5315
5316 #undef AAPCS_CP
5317
5318 static int
5319 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5320 const_tree type)
5321 {
5322 int i;
5323
5324 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5325 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5326 return i;
5327
5328 return -1;
5329 }
5330
5331 static int
5332 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5333 {
5334 /* We aren't passed a decl, so we can't check that a call is local.
5335 However, it isn't clear that that would be a win anyway, since it
5336 might limit some tail-calling opportunities. */
5337 enum arm_pcs pcs_variant;
5338
5339 if (fntype)
5340 {
5341 const_tree fndecl = NULL_TREE;
5342
5343 if (TREE_CODE (fntype) == FUNCTION_DECL)
5344 {
5345 fndecl = fntype;
5346 fntype = TREE_TYPE (fntype);
5347 }
5348
5349 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5350 }
5351 else
5352 pcs_variant = arm_pcs_default;
5353
5354 if (pcs_variant != ARM_PCS_AAPCS)
5355 {
5356 int i;
5357
5358 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5359 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5360 TYPE_MODE (type),
5361 type))
5362 return i;
5363 }
5364 return -1;
5365 }
5366
5367 static rtx
5368 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
5369 const_tree fntype)
5370 {
5371 /* We aren't passed a decl, so we can't check that a call is local.
5372 However, it isn't clear that that would be a win anyway, since it
5373 might limit some tail-calling opportunities. */
5374 enum arm_pcs pcs_variant;
5375 int unsignedp ATTRIBUTE_UNUSED;
5376
5377 if (fntype)
5378 {
5379 const_tree fndecl = NULL_TREE;
5380
5381 if (TREE_CODE (fntype) == FUNCTION_DECL)
5382 {
5383 fndecl = fntype;
5384 fntype = TREE_TYPE (fntype);
5385 }
5386
5387 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5388 }
5389 else
5390 pcs_variant = arm_pcs_default;
5391
5392 /* Promote integer types. */
5393 if (type && INTEGRAL_TYPE_P (type))
5394 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5395
5396 if (pcs_variant != ARM_PCS_AAPCS)
5397 {
5398 int i;
5399
5400 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5401 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5402 type))
5403 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5404 mode, type);
5405 }
5406
5407 /* Promotes small structs returned in a register to full-word size
5408 for big-endian AAPCS. */
5409 if (type && arm_return_in_msb (type))
5410 {
5411 HOST_WIDE_INT size = int_size_in_bytes (type);
5412 if (size % UNITS_PER_WORD != 0)
5413 {
5414 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5415 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5416 }
5417 }
5418
5419 return gen_rtx_REG (mode, R0_REGNUM);
5420 }
5421
5422 static rtx
5423 aapcs_libcall_value (enum machine_mode mode)
5424 {
5425 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5426 && GET_MODE_SIZE (mode) <= 4)
5427 mode = SImode;
5428
5429 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5430 }
5431
5432 /* Lay out a function argument using the AAPCS rules. The rule
5433 numbers referred to here are those in the AAPCS. */
5434 static void
5435 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5436 const_tree type, bool named)
5437 {
5438 int nregs, nregs2;
5439 int ncrn;
5440
5441 /* We only need to do this once per argument. */
5442 if (pcum->aapcs_arg_processed)
5443 return;
5444
5445 pcum->aapcs_arg_processed = true;
5446
5447 /* Special case: if named is false then we are handling an incoming
5448 anonymous argument which is on the stack. */
5449 if (!named)
5450 return;
5451
5452 /* Is this a potential co-processor register candidate? */
5453 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5454 {
5455 int slot = aapcs_select_call_coproc (pcum, mode, type);
5456 pcum->aapcs_cprc_slot = slot;
5457
5458 /* We don't have to apply any of the rules from part B of the
5459 preparation phase, these are handled elsewhere in the
5460 compiler. */
5461
5462 if (slot >= 0)
5463 {
5464 /* A Co-processor register candidate goes either in its own
5465 class of registers or on the stack. */
5466 if (!pcum->aapcs_cprc_failed[slot])
5467 {
5468 /* C1.cp - Try to allocate the argument to co-processor
5469 registers. */
5470 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5471 return;
5472
5473 /* C2.cp - Put the argument on the stack and note that we
5474 can't assign any more candidates in this slot. We also
5475 need to note that we have allocated stack space, so that
5476 we won't later try to split a non-cprc candidate between
5477 core registers and the stack. */
5478 pcum->aapcs_cprc_failed[slot] = true;
5479 pcum->can_split = false;
5480 }
5481
5482 /* We didn't get a register, so this argument goes on the
5483 stack. */
5484 gcc_assert (pcum->can_split == false);
5485 return;
5486 }
5487 }
5488
5489 /* C3 - For double-word aligned arguments, round the NCRN up to the
5490 next even number. */
5491 ncrn = pcum->aapcs_ncrn;
5492 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5493 ncrn++;
5494
5495 nregs = ARM_NUM_REGS2(mode, type);
5496
5497 /* Sigh, this test should really assert that nregs > 0, but a GCC
5498 extension allows empty structs and then gives them empty size; it
5499 then allows such a structure to be passed by value. For some of
5500 the code below we have to pretend that such an argument has
5501 non-zero size so that we 'locate' it correctly either in
5502 registers or on the stack. */
5503 gcc_assert (nregs >= 0);
5504
5505 nregs2 = nregs ? nregs : 1;
5506
5507 /* C4 - Argument fits entirely in core registers. */
5508 if (ncrn + nregs2 <= NUM_ARG_REGS)
5509 {
5510 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5511 pcum->aapcs_next_ncrn = ncrn + nregs;
5512 return;
5513 }
5514
5515 /* C5 - Some core registers left and there are no arguments already
5516 on the stack: split this argument between the remaining core
5517 registers and the stack. */
5518 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5519 {
5520 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5521 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5522 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5523 return;
5524 }
5525
5526 /* C6 - NCRN is set to 4. */
5527 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5528
5529 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5530 return;
5531 }
5532
5533 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5534 for a call to a function whose data type is FNTYPE.
5535 For a library call, FNTYPE is NULL. */
5536 void
5537 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5538 rtx libname,
5539 tree fndecl ATTRIBUTE_UNUSED)
5540 {
5541 /* Long call handling. */
5542 if (fntype)
5543 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5544 else
5545 pcum->pcs_variant = arm_pcs_default;
5546
5547 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5548 {
5549 if (arm_libcall_uses_aapcs_base (libname))
5550 pcum->pcs_variant = ARM_PCS_AAPCS;
5551
5552 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5553 pcum->aapcs_reg = NULL_RTX;
5554 pcum->aapcs_partial = 0;
5555 pcum->aapcs_arg_processed = false;
5556 pcum->aapcs_cprc_slot = -1;
5557 pcum->can_split = true;
5558
5559 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5560 {
5561 int i;
5562
5563 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5564 {
5565 pcum->aapcs_cprc_failed[i] = false;
5566 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5567 }
5568 }
5569 return;
5570 }
5571
5572 /* Legacy ABIs */
5573
5574 /* On the ARM, the offset starts at 0. */
5575 pcum->nregs = 0;
5576 pcum->iwmmxt_nregs = 0;
5577 pcum->can_split = true;
5578
5579 /* Varargs vectors are treated the same as long long.
5580 named_count avoids having to change the way arm handles 'named' */
5581 pcum->named_count = 0;
5582 pcum->nargs = 0;
5583
5584 if (TARGET_REALLY_IWMMXT && fntype)
5585 {
5586 tree fn_arg;
5587
5588 for (fn_arg = TYPE_ARG_TYPES (fntype);
5589 fn_arg;
5590 fn_arg = TREE_CHAIN (fn_arg))
5591 pcum->named_count += 1;
5592
5593 if (! pcum->named_count)
5594 pcum->named_count = INT_MAX;
5595 }
5596 }
5597
5598 /* Return true if we use LRA instead of reload pass. */
5599 static bool
5600 arm_lra_p (void)
5601 {
5602 return arm_lra_flag;
5603 }
5604
5605 /* Return true if mode/type need doubleword alignment. */
5606 static bool
5607 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
5608 {
5609 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5610 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5611 }
5612
5613
5614 /* Determine where to put an argument to a function.
5615 Value is zero to push the argument on the stack,
5616 or a hard register in which to store the argument.
5617
5618 MODE is the argument's machine mode.
5619 TYPE is the data type of the argument (as a tree).
5620 This is null for libcalls where that information may
5621 not be available.
5622 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5623 the preceding args and about the function being called.
5624 NAMED is nonzero if this argument is a named parameter
5625 (otherwise it is an extra parameter matching an ellipsis).
5626
5627 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5628 other arguments are passed on the stack. If (NAMED == 0) (which happens
5629 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5630 defined), say it is passed in the stack (function_prologue will
5631 indeed make it pass in the stack if necessary). */
5632
5633 static rtx
5634 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
5635 const_tree type, bool named)
5636 {
5637 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5638 int nregs;
5639
5640 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5641 a call insn (op3 of a call_value insn). */
5642 if (mode == VOIDmode)
5643 return const0_rtx;
5644
5645 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5646 {
5647 aapcs_layout_arg (pcum, mode, type, named);
5648 return pcum->aapcs_reg;
5649 }
5650
5651 /* Varargs vectors are treated the same as long long.
5652 named_count avoids having to change the way arm handles 'named' */
5653 if (TARGET_IWMMXT_ABI
5654 && arm_vector_mode_supported_p (mode)
5655 && pcum->named_count > pcum->nargs + 1)
5656 {
5657 if (pcum->iwmmxt_nregs <= 9)
5658 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5659 else
5660 {
5661 pcum->can_split = false;
5662 return NULL_RTX;
5663 }
5664 }
5665
5666 /* Put doubleword aligned quantities in even register pairs. */
5667 if (pcum->nregs & 1
5668 && ARM_DOUBLEWORD_ALIGN
5669 && arm_needs_doubleword_align (mode, type))
5670 pcum->nregs++;
5671
5672 /* Only allow splitting an arg between regs and memory if all preceding
5673 args were allocated to regs. For args passed by reference we only count
5674 the reference pointer. */
5675 if (pcum->can_split)
5676 nregs = 1;
5677 else
5678 nregs = ARM_NUM_REGS2 (mode, type);
5679
5680 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5681 return NULL_RTX;
5682
5683 return gen_rtx_REG (mode, pcum->nregs);
5684 }
5685
5686 static unsigned int
5687 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
5688 {
5689 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5690 ? DOUBLEWORD_ALIGNMENT
5691 : PARM_BOUNDARY);
5692 }
5693
5694 static int
5695 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
5696 tree type, bool named)
5697 {
5698 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5699 int nregs = pcum->nregs;
5700
5701 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5702 {
5703 aapcs_layout_arg (pcum, mode, type, named);
5704 return pcum->aapcs_partial;
5705 }
5706
5707 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
5708 return 0;
5709
5710 if (NUM_ARG_REGS > nregs
5711 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
5712 && pcum->can_split)
5713 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
5714
5715 return 0;
5716 }
5717
5718 /* Update the data in PCUM to advance over an argument
5719 of mode MODE and data type TYPE.
5720 (TYPE is null for libcalls where that information may not be available.) */
5721
5722 static void
5723 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
5724 const_tree type, bool named)
5725 {
5726 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5727
5728 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5729 {
5730 aapcs_layout_arg (pcum, mode, type, named);
5731
5732 if (pcum->aapcs_cprc_slot >= 0)
5733 {
5734 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
5735 type);
5736 pcum->aapcs_cprc_slot = -1;
5737 }
5738
5739 /* Generic stuff. */
5740 pcum->aapcs_arg_processed = false;
5741 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
5742 pcum->aapcs_reg = NULL_RTX;
5743 pcum->aapcs_partial = 0;
5744 }
5745 else
5746 {
5747 pcum->nargs += 1;
5748 if (arm_vector_mode_supported_p (mode)
5749 && pcum->named_count > pcum->nargs
5750 && TARGET_IWMMXT_ABI)
5751 pcum->iwmmxt_nregs += 1;
5752 else
5753 pcum->nregs += ARM_NUM_REGS2 (mode, type);
5754 }
5755 }
5756
5757 /* Variable sized types are passed by reference. This is a GCC
5758 extension to the ARM ABI. */
5759
5760 static bool
5761 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
5762 enum machine_mode mode ATTRIBUTE_UNUSED,
5763 const_tree type, bool named ATTRIBUTE_UNUSED)
5764 {
5765 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
5766 }
5767 \f
5768 /* Encode the current state of the #pragma [no_]long_calls. */
5769 typedef enum
5770 {
5771 OFF, /* No #pragma [no_]long_calls is in effect. */
5772 LONG, /* #pragma long_calls is in effect. */
5773 SHORT /* #pragma no_long_calls is in effect. */
5774 } arm_pragma_enum;
5775
5776 static arm_pragma_enum arm_pragma_long_calls = OFF;
5777
5778 void
5779 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5780 {
5781 arm_pragma_long_calls = LONG;
5782 }
5783
5784 void
5785 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5786 {
5787 arm_pragma_long_calls = SHORT;
5788 }
5789
5790 void
5791 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5792 {
5793 arm_pragma_long_calls = OFF;
5794 }
5795 \f
5796 /* Handle an attribute requiring a FUNCTION_DECL;
5797 arguments as in struct attribute_spec.handler. */
5798 static tree
5799 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
5800 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5801 {
5802 if (TREE_CODE (*node) != FUNCTION_DECL)
5803 {
5804 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5805 name);
5806 *no_add_attrs = true;
5807 }
5808
5809 return NULL_TREE;
5810 }
5811
5812 /* Handle an "interrupt" or "isr" attribute;
5813 arguments as in struct attribute_spec.handler. */
5814 static tree
5815 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
5816 bool *no_add_attrs)
5817 {
5818 if (DECL_P (*node))
5819 {
5820 if (TREE_CODE (*node) != FUNCTION_DECL)
5821 {
5822 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5823 name);
5824 *no_add_attrs = true;
5825 }
5826 /* FIXME: the argument if any is checked for type attributes;
5827 should it be checked for decl ones? */
5828 }
5829 else
5830 {
5831 if (TREE_CODE (*node) == FUNCTION_TYPE
5832 || TREE_CODE (*node) == METHOD_TYPE)
5833 {
5834 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5835 {
5836 warning (OPT_Wattributes, "%qE attribute ignored",
5837 name);
5838 *no_add_attrs = true;
5839 }
5840 }
5841 else if (TREE_CODE (*node) == POINTER_TYPE
5842 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5843 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
5844 && arm_isr_value (args) != ARM_FT_UNKNOWN)
5845 {
5846 *node = build_variant_type_copy (*node);
5847 TREE_TYPE (*node) = build_type_attribute_variant
5848 (TREE_TYPE (*node),
5849 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
5850 *no_add_attrs = true;
5851 }
5852 else
5853 {
5854 /* Possibly pass this attribute on from the type to a decl. */
5855 if (flags & ((int) ATTR_FLAG_DECL_NEXT
5856 | (int) ATTR_FLAG_FUNCTION_NEXT
5857 | (int) ATTR_FLAG_ARRAY_NEXT))
5858 {
5859 *no_add_attrs = true;
5860 return tree_cons (name, args, NULL_TREE);
5861 }
5862 else
5863 {
5864 warning (OPT_Wattributes, "%qE attribute ignored",
5865 name);
5866 }
5867 }
5868 }
5869
5870 return NULL_TREE;
5871 }
5872
5873 /* Handle a "pcs" attribute; arguments as in struct
5874 attribute_spec.handler. */
5875 static tree
5876 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5877 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5878 {
5879 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5880 {
5881 warning (OPT_Wattributes, "%qE attribute ignored", name);
5882 *no_add_attrs = true;
5883 }
5884 return NULL_TREE;
5885 }
5886
5887 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5888 /* Handle the "notshared" attribute. This attribute is another way of
5889 requesting hidden visibility. ARM's compiler supports
5890 "__declspec(notshared)"; we support the same thing via an
5891 attribute. */
5892
5893 static tree
5894 arm_handle_notshared_attribute (tree *node,
5895 tree name ATTRIBUTE_UNUSED,
5896 tree args ATTRIBUTE_UNUSED,
5897 int flags ATTRIBUTE_UNUSED,
5898 bool *no_add_attrs)
5899 {
5900 tree decl = TYPE_NAME (*node);
5901
5902 if (decl)
5903 {
5904 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5905 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5906 *no_add_attrs = false;
5907 }
5908 return NULL_TREE;
5909 }
5910 #endif
5911
5912 /* Return 0 if the attributes for two types are incompatible, 1 if they
5913 are compatible, and 2 if they are nearly compatible (which causes a
5914 warning to be generated). */
5915 static int
5916 arm_comp_type_attributes (const_tree type1, const_tree type2)
5917 {
5918 int l1, l2, s1, s2;
5919
5920 /* Check for mismatch of non-default calling convention. */
5921 if (TREE_CODE (type1) != FUNCTION_TYPE)
5922 return 1;
5923
5924 /* Check for mismatched call attributes. */
5925 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5926 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5927 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5928 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5929
5930 /* Only bother to check if an attribute is defined. */
5931 if (l1 | l2 | s1 | s2)
5932 {
5933 /* If one type has an attribute, the other must have the same attribute. */
5934 if ((l1 != l2) || (s1 != s2))
5935 return 0;
5936
5937 /* Disallow mixed attributes. */
5938 if ((l1 & s2) || (l2 & s1))
5939 return 0;
5940 }
5941
5942 /* Check for mismatched ISR attribute. */
5943 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5944 if (! l1)
5945 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5946 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5947 if (! l2)
5948 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5949 if (l1 != l2)
5950 return 0;
5951
5952 return 1;
5953 }
5954
5955 /* Assigns default attributes to newly defined type. This is used to
5956 set short_call/long_call attributes for function types of
5957 functions defined inside corresponding #pragma scopes. */
5958 static void
5959 arm_set_default_type_attributes (tree type)
5960 {
5961 /* Add __attribute__ ((long_call)) to all functions, when
5962 inside #pragma long_calls or __attribute__ ((short_call)),
5963 when inside #pragma no_long_calls. */
5964 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5965 {
5966 tree type_attr_list, attr_name;
5967 type_attr_list = TYPE_ATTRIBUTES (type);
5968
5969 if (arm_pragma_long_calls == LONG)
5970 attr_name = get_identifier ("long_call");
5971 else if (arm_pragma_long_calls == SHORT)
5972 attr_name = get_identifier ("short_call");
5973 else
5974 return;
5975
5976 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5977 TYPE_ATTRIBUTES (type) = type_attr_list;
5978 }
5979 }
5980 \f
5981 /* Return true if DECL is known to be linked into section SECTION. */
5982
5983 static bool
5984 arm_function_in_section_p (tree decl, section *section)
5985 {
5986 /* We can only be certain about functions defined in the same
5987 compilation unit. */
5988 if (!TREE_STATIC (decl))
5989 return false;
5990
5991 /* Make sure that SYMBOL always binds to the definition in this
5992 compilation unit. */
5993 if (!targetm.binds_local_p (decl))
5994 return false;
5995
5996 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5997 if (!DECL_SECTION_NAME (decl))
5998 {
5999 /* Make sure that we will not create a unique section for DECL. */
6000 if (flag_function_sections || DECL_ONE_ONLY (decl))
6001 return false;
6002 }
6003
6004 return function_section (decl) == section;
6005 }
6006
6007 /* Return nonzero if a 32-bit "long_call" should be generated for
6008 a call from the current function to DECL. We generate a long_call
6009 if the function:
6010
6011 a. has an __attribute__((long call))
6012 or b. is within the scope of a #pragma long_calls
6013 or c. the -mlong-calls command line switch has been specified
6014
6015 However we do not generate a long call if the function:
6016
6017 d. has an __attribute__ ((short_call))
6018 or e. is inside the scope of a #pragma no_long_calls
6019 or f. is defined in the same section as the current function. */
6020
6021 bool
6022 arm_is_long_call_p (tree decl)
6023 {
6024 tree attrs;
6025
6026 if (!decl)
6027 return TARGET_LONG_CALLS;
6028
6029 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6030 if (lookup_attribute ("short_call", attrs))
6031 return false;
6032
6033 /* For "f", be conservative, and only cater for cases in which the
6034 whole of the current function is placed in the same section. */
6035 if (!flag_reorder_blocks_and_partition
6036 && TREE_CODE (decl) == FUNCTION_DECL
6037 && arm_function_in_section_p (decl, current_function_section ()))
6038 return false;
6039
6040 if (lookup_attribute ("long_call", attrs))
6041 return true;
6042
6043 return TARGET_LONG_CALLS;
6044 }
6045
6046 /* Return nonzero if it is ok to make a tail-call to DECL. */
6047 static bool
6048 arm_function_ok_for_sibcall (tree decl, tree exp)
6049 {
6050 unsigned long func_type;
6051
6052 if (cfun->machine->sibcall_blocked)
6053 return false;
6054
6055 /* Never tailcall something if we are generating code for Thumb-1. */
6056 if (TARGET_THUMB1)
6057 return false;
6058
6059 /* The PIC register is live on entry to VxWorks PLT entries, so we
6060 must make the call before restoring the PIC register. */
6061 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6062 return false;
6063
6064 /* Cannot tail-call to long calls, since these are out of range of
6065 a branch instruction. */
6066 if (decl && arm_is_long_call_p (decl))
6067 return false;
6068
6069 /* If we are interworking and the function is not declared static
6070 then we can't tail-call it unless we know that it exists in this
6071 compilation unit (since it might be a Thumb routine). */
6072 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6073 && !TREE_ASM_WRITTEN (decl))
6074 return false;
6075
6076 func_type = arm_current_func_type ();
6077 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6078 if (IS_INTERRUPT (func_type))
6079 return false;
6080
6081 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6082 {
6083 /* Check that the return value locations are the same. For
6084 example that we aren't returning a value from the sibling in
6085 a VFP register but then need to transfer it to a core
6086 register. */
6087 rtx a, b;
6088
6089 a = arm_function_value (TREE_TYPE (exp), decl, false);
6090 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6091 cfun->decl, false);
6092 if (!rtx_equal_p (a, b))
6093 return false;
6094 }
6095
6096 /* Never tailcall if function may be called with a misaligned SP. */
6097 if (IS_STACKALIGN (func_type))
6098 return false;
6099
6100 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6101 references should become a NOP. Don't convert such calls into
6102 sibling calls. */
6103 if (TARGET_AAPCS_BASED
6104 && arm_abi == ARM_ABI_AAPCS
6105 && decl
6106 && DECL_WEAK (decl))
6107 return false;
6108
6109 /* Everything else is ok. */
6110 return true;
6111 }
6112
6113 \f
6114 /* Addressing mode support functions. */
6115
6116 /* Return nonzero if X is a legitimate immediate operand when compiling
6117 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6118 int
6119 legitimate_pic_operand_p (rtx x)
6120 {
6121 if (GET_CODE (x) == SYMBOL_REF
6122 || (GET_CODE (x) == CONST
6123 && GET_CODE (XEXP (x, 0)) == PLUS
6124 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6125 return 0;
6126
6127 return 1;
6128 }
6129
6130 /* Record that the current function needs a PIC register. Initialize
6131 cfun->machine->pic_reg if we have not already done so. */
6132
6133 static void
6134 require_pic_register (void)
6135 {
6136 /* A lot of the logic here is made obscure by the fact that this
6137 routine gets called as part of the rtx cost estimation process.
6138 We don't want those calls to affect any assumptions about the real
6139 function; and further, we can't call entry_of_function() until we
6140 start the real expansion process. */
6141 if (!crtl->uses_pic_offset_table)
6142 {
6143 gcc_assert (can_create_pseudo_p ());
6144 if (arm_pic_register != INVALID_REGNUM
6145 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6146 {
6147 if (!cfun->machine->pic_reg)
6148 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6149
6150 /* Play games to avoid marking the function as needing pic
6151 if we are being called as part of the cost-estimation
6152 process. */
6153 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6154 crtl->uses_pic_offset_table = 1;
6155 }
6156 else
6157 {
6158 rtx seq, insn;
6159
6160 if (!cfun->machine->pic_reg)
6161 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6162
6163 /* Play games to avoid marking the function as needing pic
6164 if we are being called as part of the cost-estimation
6165 process. */
6166 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6167 {
6168 crtl->uses_pic_offset_table = 1;
6169 start_sequence ();
6170
6171 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6172 && arm_pic_register > LAST_LO_REGNUM)
6173 emit_move_insn (cfun->machine->pic_reg,
6174 gen_rtx_REG (Pmode, arm_pic_register));
6175 else
6176 arm_load_pic_register (0UL);
6177
6178 seq = get_insns ();
6179 end_sequence ();
6180
6181 for (insn = seq; insn; insn = NEXT_INSN (insn))
6182 if (INSN_P (insn))
6183 INSN_LOCATION (insn) = prologue_location;
6184
6185 /* We can be called during expansion of PHI nodes, where
6186 we can't yet emit instructions directly in the final
6187 insn stream. Queue the insns on the entry edge, they will
6188 be committed after everything else is expanded. */
6189 insert_insn_on_edge (seq,
6190 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6191 }
6192 }
6193 }
6194 }
6195
6196 rtx
6197 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
6198 {
6199 if (GET_CODE (orig) == SYMBOL_REF
6200 || GET_CODE (orig) == LABEL_REF)
6201 {
6202 rtx insn;
6203
6204 if (reg == 0)
6205 {
6206 gcc_assert (can_create_pseudo_p ());
6207 reg = gen_reg_rtx (Pmode);
6208 }
6209
6210 /* VxWorks does not impose a fixed gap between segments; the run-time
6211 gap can be different from the object-file gap. We therefore can't
6212 use GOTOFF unless we are absolutely sure that the symbol is in the
6213 same segment as the GOT. Unfortunately, the flexibility of linker
6214 scripts means that we can't be sure of that in general, so assume
6215 that GOTOFF is never valid on VxWorks. */
6216 if ((GET_CODE (orig) == LABEL_REF
6217 || (GET_CODE (orig) == SYMBOL_REF &&
6218 SYMBOL_REF_LOCAL_P (orig)))
6219 && NEED_GOT_RELOC
6220 && arm_pic_data_is_text_relative)
6221 insn = arm_pic_static_addr (orig, reg);
6222 else
6223 {
6224 rtx pat;
6225 rtx mem;
6226
6227 /* If this function doesn't have a pic register, create one now. */
6228 require_pic_register ();
6229
6230 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6231
6232 /* Make the MEM as close to a constant as possible. */
6233 mem = SET_SRC (pat);
6234 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6235 MEM_READONLY_P (mem) = 1;
6236 MEM_NOTRAP_P (mem) = 1;
6237
6238 insn = emit_insn (pat);
6239 }
6240
6241 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6242 by loop. */
6243 set_unique_reg_note (insn, REG_EQUAL, orig);
6244
6245 return reg;
6246 }
6247 else if (GET_CODE (orig) == CONST)
6248 {
6249 rtx base, offset;
6250
6251 if (GET_CODE (XEXP (orig, 0)) == PLUS
6252 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6253 return orig;
6254
6255 /* Handle the case where we have: const (UNSPEC_TLS). */
6256 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6257 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6258 return orig;
6259
6260 /* Handle the case where we have:
6261 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6262 CONST_INT. */
6263 if (GET_CODE (XEXP (orig, 0)) == PLUS
6264 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6265 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6266 {
6267 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6268 return orig;
6269 }
6270
6271 if (reg == 0)
6272 {
6273 gcc_assert (can_create_pseudo_p ());
6274 reg = gen_reg_rtx (Pmode);
6275 }
6276
6277 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6278
6279 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6280 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6281 base == reg ? 0 : reg);
6282
6283 if (CONST_INT_P (offset))
6284 {
6285 /* The base register doesn't really matter, we only want to
6286 test the index for the appropriate mode. */
6287 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6288 {
6289 gcc_assert (can_create_pseudo_p ());
6290 offset = force_reg (Pmode, offset);
6291 }
6292
6293 if (CONST_INT_P (offset))
6294 return plus_constant (Pmode, base, INTVAL (offset));
6295 }
6296
6297 if (GET_MODE_SIZE (mode) > 4
6298 && (GET_MODE_CLASS (mode) == MODE_INT
6299 || TARGET_SOFT_FLOAT))
6300 {
6301 emit_insn (gen_addsi3 (reg, base, offset));
6302 return reg;
6303 }
6304
6305 return gen_rtx_PLUS (Pmode, base, offset);
6306 }
6307
6308 return orig;
6309 }
6310
6311
6312 /* Find a spare register to use during the prolog of a function. */
6313
6314 static int
6315 thumb_find_work_register (unsigned long pushed_regs_mask)
6316 {
6317 int reg;
6318
6319 /* Check the argument registers first as these are call-used. The
6320 register allocation order means that sometimes r3 might be used
6321 but earlier argument registers might not, so check them all. */
6322 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6323 if (!df_regs_ever_live_p (reg))
6324 return reg;
6325
6326 /* Before going on to check the call-saved registers we can try a couple
6327 more ways of deducing that r3 is available. The first is when we are
6328 pushing anonymous arguments onto the stack and we have less than 4
6329 registers worth of fixed arguments(*). In this case r3 will be part of
6330 the variable argument list and so we can be sure that it will be
6331 pushed right at the start of the function. Hence it will be available
6332 for the rest of the prologue.
6333 (*): ie crtl->args.pretend_args_size is greater than 0. */
6334 if (cfun->machine->uses_anonymous_args
6335 && crtl->args.pretend_args_size > 0)
6336 return LAST_ARG_REGNUM;
6337
6338 /* The other case is when we have fixed arguments but less than 4 registers
6339 worth. In this case r3 might be used in the body of the function, but
6340 it is not being used to convey an argument into the function. In theory
6341 we could just check crtl->args.size to see how many bytes are
6342 being passed in argument registers, but it seems that it is unreliable.
6343 Sometimes it will have the value 0 when in fact arguments are being
6344 passed. (See testcase execute/20021111-1.c for an example). So we also
6345 check the args_info.nregs field as well. The problem with this field is
6346 that it makes no allowances for arguments that are passed to the
6347 function but which are not used. Hence we could miss an opportunity
6348 when a function has an unused argument in r3. But it is better to be
6349 safe than to be sorry. */
6350 if (! cfun->machine->uses_anonymous_args
6351 && crtl->args.size >= 0
6352 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6353 && (TARGET_AAPCS_BASED
6354 ? crtl->args.info.aapcs_ncrn < 4
6355 : crtl->args.info.nregs < 4))
6356 return LAST_ARG_REGNUM;
6357
6358 /* Otherwise look for a call-saved register that is going to be pushed. */
6359 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6360 if (pushed_regs_mask & (1 << reg))
6361 return reg;
6362
6363 if (TARGET_THUMB2)
6364 {
6365 /* Thumb-2 can use high regs. */
6366 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6367 if (pushed_regs_mask & (1 << reg))
6368 return reg;
6369 }
6370 /* Something went wrong - thumb_compute_save_reg_mask()
6371 should have arranged for a suitable register to be pushed. */
6372 gcc_unreachable ();
6373 }
6374
6375 static GTY(()) int pic_labelno;
6376
6377 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6378 low register. */
6379
6380 void
6381 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6382 {
6383 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6384
6385 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6386 return;
6387
6388 gcc_assert (flag_pic);
6389
6390 pic_reg = cfun->machine->pic_reg;
6391 if (TARGET_VXWORKS_RTP)
6392 {
6393 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6394 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6395 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6396
6397 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6398
6399 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6400 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6401 }
6402 else
6403 {
6404 /* We use an UNSPEC rather than a LABEL_REF because this label
6405 never appears in the code stream. */
6406
6407 labelno = GEN_INT (pic_labelno++);
6408 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6409 l1 = gen_rtx_CONST (VOIDmode, l1);
6410
6411 /* On the ARM the PC register contains 'dot + 8' at the time of the
6412 addition, on the Thumb it is 'dot + 4'. */
6413 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6414 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6415 UNSPEC_GOTSYM_OFF);
6416 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6417
6418 if (TARGET_32BIT)
6419 {
6420 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6421 }
6422 else /* TARGET_THUMB1 */
6423 {
6424 if (arm_pic_register != INVALID_REGNUM
6425 && REGNO (pic_reg) > LAST_LO_REGNUM)
6426 {
6427 /* We will have pushed the pic register, so we should always be
6428 able to find a work register. */
6429 pic_tmp = gen_rtx_REG (SImode,
6430 thumb_find_work_register (saved_regs));
6431 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6432 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6433 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6434 }
6435 else if (arm_pic_register != INVALID_REGNUM
6436 && arm_pic_register > LAST_LO_REGNUM
6437 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6438 {
6439 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6440 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6441 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6442 }
6443 else
6444 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6445 }
6446 }
6447
6448 /* Need to emit this whether or not we obey regdecls,
6449 since setjmp/longjmp can cause life info to screw up. */
6450 emit_use (pic_reg);
6451 }
6452
6453 /* Generate code to load the address of a static var when flag_pic is set. */
6454 static rtx
6455 arm_pic_static_addr (rtx orig, rtx reg)
6456 {
6457 rtx l1, labelno, offset_rtx, insn;
6458
6459 gcc_assert (flag_pic);
6460
6461 /* We use an UNSPEC rather than a LABEL_REF because this label
6462 never appears in the code stream. */
6463 labelno = GEN_INT (pic_labelno++);
6464 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6465 l1 = gen_rtx_CONST (VOIDmode, l1);
6466
6467 /* On the ARM the PC register contains 'dot + 8' at the time of the
6468 addition, on the Thumb it is 'dot + 4'. */
6469 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6470 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6471 UNSPEC_SYMBOL_OFFSET);
6472 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6473
6474 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6475 return insn;
6476 }
6477
6478 /* Return nonzero if X is valid as an ARM state addressing register. */
6479 static int
6480 arm_address_register_rtx_p (rtx x, int strict_p)
6481 {
6482 int regno;
6483
6484 if (!REG_P (x))
6485 return 0;
6486
6487 regno = REGNO (x);
6488
6489 if (strict_p)
6490 return ARM_REGNO_OK_FOR_BASE_P (regno);
6491
6492 return (regno <= LAST_ARM_REGNUM
6493 || regno >= FIRST_PSEUDO_REGISTER
6494 || regno == FRAME_POINTER_REGNUM
6495 || regno == ARG_POINTER_REGNUM);
6496 }
6497
6498 /* Return TRUE if this rtx is the difference of a symbol and a label,
6499 and will reduce to a PC-relative relocation in the object file.
6500 Expressions like this can be left alone when generating PIC, rather
6501 than forced through the GOT. */
6502 static int
6503 pcrel_constant_p (rtx x)
6504 {
6505 if (GET_CODE (x) == MINUS)
6506 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6507
6508 return FALSE;
6509 }
6510
6511 /* Return true if X will surely end up in an index register after next
6512 splitting pass. */
6513 static bool
6514 will_be_in_index_register (const_rtx x)
6515 {
6516 /* arm.md: calculate_pic_address will split this into a register. */
6517 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6518 }
6519
6520 /* Return nonzero if X is a valid ARM state address operand. */
6521 int
6522 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
6523 int strict_p)
6524 {
6525 bool use_ldrd;
6526 enum rtx_code code = GET_CODE (x);
6527
6528 if (arm_address_register_rtx_p (x, strict_p))
6529 return 1;
6530
6531 use_ldrd = (TARGET_LDRD
6532 && (mode == DImode
6533 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6534
6535 if (code == POST_INC || code == PRE_DEC
6536 || ((code == PRE_INC || code == POST_DEC)
6537 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6538 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6539
6540 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6541 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6542 && GET_CODE (XEXP (x, 1)) == PLUS
6543 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6544 {
6545 rtx addend = XEXP (XEXP (x, 1), 1);
6546
6547 /* Don't allow ldrd post increment by register because it's hard
6548 to fixup invalid register choices. */
6549 if (use_ldrd
6550 && GET_CODE (x) == POST_MODIFY
6551 && REG_P (addend))
6552 return 0;
6553
6554 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6555 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6556 }
6557
6558 /* After reload constants split into minipools will have addresses
6559 from a LABEL_REF. */
6560 else if (reload_completed
6561 && (code == LABEL_REF
6562 || (code == CONST
6563 && GET_CODE (XEXP (x, 0)) == PLUS
6564 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6565 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6566 return 1;
6567
6568 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6569 return 0;
6570
6571 else if (code == PLUS)
6572 {
6573 rtx xop0 = XEXP (x, 0);
6574 rtx xop1 = XEXP (x, 1);
6575
6576 return ((arm_address_register_rtx_p (xop0, strict_p)
6577 && ((CONST_INT_P (xop1)
6578 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6579 || (!strict_p && will_be_in_index_register (xop1))))
6580 || (arm_address_register_rtx_p (xop1, strict_p)
6581 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6582 }
6583
6584 #if 0
6585 /* Reload currently can't handle MINUS, so disable this for now */
6586 else if (GET_CODE (x) == MINUS)
6587 {
6588 rtx xop0 = XEXP (x, 0);
6589 rtx xop1 = XEXP (x, 1);
6590
6591 return (arm_address_register_rtx_p (xop0, strict_p)
6592 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6593 }
6594 #endif
6595
6596 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6597 && code == SYMBOL_REF
6598 && CONSTANT_POOL_ADDRESS_P (x)
6599 && ! (flag_pic
6600 && symbol_mentioned_p (get_pool_constant (x))
6601 && ! pcrel_constant_p (get_pool_constant (x))))
6602 return 1;
6603
6604 return 0;
6605 }
6606
6607 /* Return nonzero if X is a valid Thumb-2 address operand. */
6608 static int
6609 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6610 {
6611 bool use_ldrd;
6612 enum rtx_code code = GET_CODE (x);
6613
6614 if (arm_address_register_rtx_p (x, strict_p))
6615 return 1;
6616
6617 use_ldrd = (TARGET_LDRD
6618 && (mode == DImode
6619 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6620
6621 if (code == POST_INC || code == PRE_DEC
6622 || ((code == PRE_INC || code == POST_DEC)
6623 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6624 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6625
6626 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6627 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6628 && GET_CODE (XEXP (x, 1)) == PLUS
6629 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6630 {
6631 /* Thumb-2 only has autoincrement by constant. */
6632 rtx addend = XEXP (XEXP (x, 1), 1);
6633 HOST_WIDE_INT offset;
6634
6635 if (!CONST_INT_P (addend))
6636 return 0;
6637
6638 offset = INTVAL(addend);
6639 if (GET_MODE_SIZE (mode) <= 4)
6640 return (offset > -256 && offset < 256);
6641
6642 return (use_ldrd && offset > -1024 && offset < 1024
6643 && (offset & 3) == 0);
6644 }
6645
6646 /* After reload constants split into minipools will have addresses
6647 from a LABEL_REF. */
6648 else if (reload_completed
6649 && (code == LABEL_REF
6650 || (code == CONST
6651 && GET_CODE (XEXP (x, 0)) == PLUS
6652 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6653 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6654 return 1;
6655
6656 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6657 return 0;
6658
6659 else if (code == PLUS)
6660 {
6661 rtx xop0 = XEXP (x, 0);
6662 rtx xop1 = XEXP (x, 1);
6663
6664 return ((arm_address_register_rtx_p (xop0, strict_p)
6665 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6666 || (!strict_p && will_be_in_index_register (xop1))))
6667 || (arm_address_register_rtx_p (xop1, strict_p)
6668 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6669 }
6670
6671 /* Normally we can assign constant values to target registers without
6672 the help of constant pool. But there are cases we have to use constant
6673 pool like:
6674 1) assign a label to register.
6675 2) sign-extend a 8bit value to 32bit and then assign to register.
6676
6677 Constant pool access in format:
6678 (set (reg r0) (mem (symbol_ref (".LC0"))))
6679 will cause the use of literal pool (later in function arm_reorg).
6680 So here we mark such format as an invalid format, then the compiler
6681 will adjust it into:
6682 (set (reg r0) (symbol_ref (".LC0")))
6683 (set (reg r0) (mem (reg r0))).
6684 No extra register is required, and (mem (reg r0)) won't cause the use
6685 of literal pools. */
6686 else if (arm_disable_literal_pool && code == SYMBOL_REF
6687 && CONSTANT_POOL_ADDRESS_P (x))
6688 return 0;
6689
6690 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6691 && code == SYMBOL_REF
6692 && CONSTANT_POOL_ADDRESS_P (x)
6693 && ! (flag_pic
6694 && symbol_mentioned_p (get_pool_constant (x))
6695 && ! pcrel_constant_p (get_pool_constant (x))))
6696 return 1;
6697
6698 return 0;
6699 }
6700
6701 /* Return nonzero if INDEX is valid for an address index operand in
6702 ARM state. */
6703 static int
6704 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
6705 int strict_p)
6706 {
6707 HOST_WIDE_INT range;
6708 enum rtx_code code = GET_CODE (index);
6709
6710 /* Standard coprocessor addressing modes. */
6711 if (TARGET_HARD_FLOAT
6712 && TARGET_VFP
6713 && (mode == SFmode || mode == DFmode))
6714 return (code == CONST_INT && INTVAL (index) < 1024
6715 && INTVAL (index) > -1024
6716 && (INTVAL (index) & 3) == 0);
6717
6718 /* For quad modes, we restrict the constant offset to be slightly less
6719 than what the instruction format permits. We do this because for
6720 quad mode moves, we will actually decompose them into two separate
6721 double-mode reads or writes. INDEX must therefore be a valid
6722 (double-mode) offset and so should INDEX+8. */
6723 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6724 return (code == CONST_INT
6725 && INTVAL (index) < 1016
6726 && INTVAL (index) > -1024
6727 && (INTVAL (index) & 3) == 0);
6728
6729 /* We have no such constraint on double mode offsets, so we permit the
6730 full range of the instruction format. */
6731 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6732 return (code == CONST_INT
6733 && INTVAL (index) < 1024
6734 && INTVAL (index) > -1024
6735 && (INTVAL (index) & 3) == 0);
6736
6737 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6738 return (code == CONST_INT
6739 && INTVAL (index) < 1024
6740 && INTVAL (index) > -1024
6741 && (INTVAL (index) & 3) == 0);
6742
6743 if (arm_address_register_rtx_p (index, strict_p)
6744 && (GET_MODE_SIZE (mode) <= 4))
6745 return 1;
6746
6747 if (mode == DImode || mode == DFmode)
6748 {
6749 if (code == CONST_INT)
6750 {
6751 HOST_WIDE_INT val = INTVAL (index);
6752
6753 if (TARGET_LDRD)
6754 return val > -256 && val < 256;
6755 else
6756 return val > -4096 && val < 4092;
6757 }
6758
6759 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
6760 }
6761
6762 if (GET_MODE_SIZE (mode) <= 4
6763 && ! (arm_arch4
6764 && (mode == HImode
6765 || mode == HFmode
6766 || (mode == QImode && outer == SIGN_EXTEND))))
6767 {
6768 if (code == MULT)
6769 {
6770 rtx xiop0 = XEXP (index, 0);
6771 rtx xiop1 = XEXP (index, 1);
6772
6773 return ((arm_address_register_rtx_p (xiop0, strict_p)
6774 && power_of_two_operand (xiop1, SImode))
6775 || (arm_address_register_rtx_p (xiop1, strict_p)
6776 && power_of_two_operand (xiop0, SImode)));
6777 }
6778 else if (code == LSHIFTRT || code == ASHIFTRT
6779 || code == ASHIFT || code == ROTATERT)
6780 {
6781 rtx op = XEXP (index, 1);
6782
6783 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6784 && CONST_INT_P (op)
6785 && INTVAL (op) > 0
6786 && INTVAL (op) <= 31);
6787 }
6788 }
6789
6790 /* For ARM v4 we may be doing a sign-extend operation during the
6791 load. */
6792 if (arm_arch4)
6793 {
6794 if (mode == HImode
6795 || mode == HFmode
6796 || (outer == SIGN_EXTEND && mode == QImode))
6797 range = 256;
6798 else
6799 range = 4096;
6800 }
6801 else
6802 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
6803
6804 return (code == CONST_INT
6805 && INTVAL (index) < range
6806 && INTVAL (index) > -range);
6807 }
6808
6809 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6810 index operand. i.e. 1, 2, 4 or 8. */
6811 static bool
6812 thumb2_index_mul_operand (rtx op)
6813 {
6814 HOST_WIDE_INT val;
6815
6816 if (!CONST_INT_P (op))
6817 return false;
6818
6819 val = INTVAL(op);
6820 return (val == 1 || val == 2 || val == 4 || val == 8);
6821 }
6822
6823 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6824 static int
6825 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
6826 {
6827 enum rtx_code code = GET_CODE (index);
6828
6829 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6830 /* Standard coprocessor addressing modes. */
6831 if (TARGET_HARD_FLOAT
6832 && TARGET_VFP
6833 && (mode == SFmode || mode == DFmode))
6834 return (code == CONST_INT && INTVAL (index) < 1024
6835 /* Thumb-2 allows only > -256 index range for it's core register
6836 load/stores. Since we allow SF/DF in core registers, we have
6837 to use the intersection between -256~4096 (core) and -1024~1024
6838 (coprocessor). */
6839 && INTVAL (index) > -256
6840 && (INTVAL (index) & 3) == 0);
6841
6842 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6843 {
6844 /* For DImode assume values will usually live in core regs
6845 and only allow LDRD addressing modes. */
6846 if (!TARGET_LDRD || mode != DImode)
6847 return (code == CONST_INT
6848 && INTVAL (index) < 1024
6849 && INTVAL (index) > -1024
6850 && (INTVAL (index) & 3) == 0);
6851 }
6852
6853 /* For quad modes, we restrict the constant offset to be slightly less
6854 than what the instruction format permits. We do this because for
6855 quad mode moves, we will actually decompose them into two separate
6856 double-mode reads or writes. INDEX must therefore be a valid
6857 (double-mode) offset and so should INDEX+8. */
6858 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6859 return (code == CONST_INT
6860 && INTVAL (index) < 1016
6861 && INTVAL (index) > -1024
6862 && (INTVAL (index) & 3) == 0);
6863
6864 /* We have no such constraint on double mode offsets, so we permit the
6865 full range of the instruction format. */
6866 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6867 return (code == CONST_INT
6868 && INTVAL (index) < 1024
6869 && INTVAL (index) > -1024
6870 && (INTVAL (index) & 3) == 0);
6871
6872 if (arm_address_register_rtx_p (index, strict_p)
6873 && (GET_MODE_SIZE (mode) <= 4))
6874 return 1;
6875
6876 if (mode == DImode || mode == DFmode)
6877 {
6878 if (code == CONST_INT)
6879 {
6880 HOST_WIDE_INT val = INTVAL (index);
6881 /* ??? Can we assume ldrd for thumb2? */
6882 /* Thumb-2 ldrd only has reg+const addressing modes. */
6883 /* ldrd supports offsets of +-1020.
6884 However the ldr fallback does not. */
6885 return val > -256 && val < 256 && (val & 3) == 0;
6886 }
6887 else
6888 return 0;
6889 }
6890
6891 if (code == MULT)
6892 {
6893 rtx xiop0 = XEXP (index, 0);
6894 rtx xiop1 = XEXP (index, 1);
6895
6896 return ((arm_address_register_rtx_p (xiop0, strict_p)
6897 && thumb2_index_mul_operand (xiop1))
6898 || (arm_address_register_rtx_p (xiop1, strict_p)
6899 && thumb2_index_mul_operand (xiop0)));
6900 }
6901 else if (code == ASHIFT)
6902 {
6903 rtx op = XEXP (index, 1);
6904
6905 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6906 && CONST_INT_P (op)
6907 && INTVAL (op) > 0
6908 && INTVAL (op) <= 3);
6909 }
6910
6911 return (code == CONST_INT
6912 && INTVAL (index) < 4096
6913 && INTVAL (index) > -256);
6914 }
6915
6916 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6917 static int
6918 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6919 {
6920 int regno;
6921
6922 if (!REG_P (x))
6923 return 0;
6924
6925 regno = REGNO (x);
6926
6927 if (strict_p)
6928 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6929
6930 return (regno <= LAST_LO_REGNUM
6931 || regno > LAST_VIRTUAL_REGISTER
6932 || regno == FRAME_POINTER_REGNUM
6933 || (GET_MODE_SIZE (mode) >= 4
6934 && (regno == STACK_POINTER_REGNUM
6935 || regno >= FIRST_PSEUDO_REGISTER
6936 || x == hard_frame_pointer_rtx
6937 || x == arg_pointer_rtx)));
6938 }
6939
6940 /* Return nonzero if x is a legitimate index register. This is the case
6941 for any base register that can access a QImode object. */
6942 inline static int
6943 thumb1_index_register_rtx_p (rtx x, int strict_p)
6944 {
6945 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6946 }
6947
6948 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6949
6950 The AP may be eliminated to either the SP or the FP, so we use the
6951 least common denominator, e.g. SImode, and offsets from 0 to 64.
6952
6953 ??? Verify whether the above is the right approach.
6954
6955 ??? Also, the FP may be eliminated to the SP, so perhaps that
6956 needs special handling also.
6957
6958 ??? Look at how the mips16 port solves this problem. It probably uses
6959 better ways to solve some of these problems.
6960
6961 Although it is not incorrect, we don't accept QImode and HImode
6962 addresses based on the frame pointer or arg pointer until the
6963 reload pass starts. This is so that eliminating such addresses
6964 into stack based ones won't produce impossible code. */
6965 int
6966 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6967 {
6968 /* ??? Not clear if this is right. Experiment. */
6969 if (GET_MODE_SIZE (mode) < 4
6970 && !(reload_in_progress || reload_completed)
6971 && (reg_mentioned_p (frame_pointer_rtx, x)
6972 || reg_mentioned_p (arg_pointer_rtx, x)
6973 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6974 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6975 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6976 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6977 return 0;
6978
6979 /* Accept any base register. SP only in SImode or larger. */
6980 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6981 return 1;
6982
6983 /* This is PC relative data before arm_reorg runs. */
6984 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6985 && GET_CODE (x) == SYMBOL_REF
6986 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6987 return 1;
6988
6989 /* This is PC relative data after arm_reorg runs. */
6990 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6991 && reload_completed
6992 && (GET_CODE (x) == LABEL_REF
6993 || (GET_CODE (x) == CONST
6994 && GET_CODE (XEXP (x, 0)) == PLUS
6995 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6996 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6997 return 1;
6998
6999 /* Post-inc indexing only supported for SImode and larger. */
7000 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7001 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7002 return 1;
7003
7004 else if (GET_CODE (x) == PLUS)
7005 {
7006 /* REG+REG address can be any two index registers. */
7007 /* We disallow FRAME+REG addressing since we know that FRAME
7008 will be replaced with STACK, and SP relative addressing only
7009 permits SP+OFFSET. */
7010 if (GET_MODE_SIZE (mode) <= 4
7011 && XEXP (x, 0) != frame_pointer_rtx
7012 && XEXP (x, 1) != frame_pointer_rtx
7013 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7014 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7015 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7016 return 1;
7017
7018 /* REG+const has 5-7 bit offset for non-SP registers. */
7019 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7020 || XEXP (x, 0) == arg_pointer_rtx)
7021 && CONST_INT_P (XEXP (x, 1))
7022 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7023 return 1;
7024
7025 /* REG+const has 10-bit offset for SP, but only SImode and
7026 larger is supported. */
7027 /* ??? Should probably check for DI/DFmode overflow here
7028 just like GO_IF_LEGITIMATE_OFFSET does. */
7029 else if (REG_P (XEXP (x, 0))
7030 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7031 && GET_MODE_SIZE (mode) >= 4
7032 && CONST_INT_P (XEXP (x, 1))
7033 && INTVAL (XEXP (x, 1)) >= 0
7034 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7035 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7036 return 1;
7037
7038 else if (REG_P (XEXP (x, 0))
7039 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7040 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7041 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7042 && REGNO (XEXP (x, 0))
7043 <= LAST_VIRTUAL_POINTER_REGISTER))
7044 && GET_MODE_SIZE (mode) >= 4
7045 && CONST_INT_P (XEXP (x, 1))
7046 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7047 return 1;
7048 }
7049
7050 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7051 && GET_MODE_SIZE (mode) == 4
7052 && GET_CODE (x) == SYMBOL_REF
7053 && CONSTANT_POOL_ADDRESS_P (x)
7054 && ! (flag_pic
7055 && symbol_mentioned_p (get_pool_constant (x))
7056 && ! pcrel_constant_p (get_pool_constant (x))))
7057 return 1;
7058
7059 return 0;
7060 }
7061
7062 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7063 instruction of mode MODE. */
7064 int
7065 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
7066 {
7067 switch (GET_MODE_SIZE (mode))
7068 {
7069 case 1:
7070 return val >= 0 && val < 32;
7071
7072 case 2:
7073 return val >= 0 && val < 64 && (val & 1) == 0;
7074
7075 default:
7076 return (val >= 0
7077 && (val + GET_MODE_SIZE (mode)) <= 128
7078 && (val & 3) == 0);
7079 }
7080 }
7081
7082 bool
7083 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
7084 {
7085 if (TARGET_ARM)
7086 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7087 else if (TARGET_THUMB2)
7088 return thumb2_legitimate_address_p (mode, x, strict_p);
7089 else /* if (TARGET_THUMB1) */
7090 return thumb1_legitimate_address_p (mode, x, strict_p);
7091 }
7092
7093 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7094
7095 Given an rtx X being reloaded into a reg required to be
7096 in class CLASS, return the class of reg to actually use.
7097 In general this is just CLASS, but for the Thumb core registers and
7098 immediate constants we prefer a LO_REGS class or a subset. */
7099
7100 static reg_class_t
7101 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7102 {
7103 if (TARGET_32BIT)
7104 return rclass;
7105 else
7106 {
7107 if (rclass == GENERAL_REGS)
7108 return LO_REGS;
7109 else
7110 return rclass;
7111 }
7112 }
7113
7114 /* Build the SYMBOL_REF for __tls_get_addr. */
7115
7116 static GTY(()) rtx tls_get_addr_libfunc;
7117
7118 static rtx
7119 get_tls_get_addr (void)
7120 {
7121 if (!tls_get_addr_libfunc)
7122 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7123 return tls_get_addr_libfunc;
7124 }
7125
7126 rtx
7127 arm_load_tp (rtx target)
7128 {
7129 if (!target)
7130 target = gen_reg_rtx (SImode);
7131
7132 if (TARGET_HARD_TP)
7133 {
7134 /* Can return in any reg. */
7135 emit_insn (gen_load_tp_hard (target));
7136 }
7137 else
7138 {
7139 /* Always returned in r0. Immediately copy the result into a pseudo,
7140 otherwise other uses of r0 (e.g. setting up function arguments) may
7141 clobber the value. */
7142
7143 rtx tmp;
7144
7145 emit_insn (gen_load_tp_soft ());
7146
7147 tmp = gen_rtx_REG (SImode, 0);
7148 emit_move_insn (target, tmp);
7149 }
7150 return target;
7151 }
7152
7153 static rtx
7154 load_tls_operand (rtx x, rtx reg)
7155 {
7156 rtx tmp;
7157
7158 if (reg == NULL_RTX)
7159 reg = gen_reg_rtx (SImode);
7160
7161 tmp = gen_rtx_CONST (SImode, x);
7162
7163 emit_move_insn (reg, tmp);
7164
7165 return reg;
7166 }
7167
7168 static rtx
7169 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7170 {
7171 rtx insns, label, labelno, sum;
7172
7173 gcc_assert (reloc != TLS_DESCSEQ);
7174 start_sequence ();
7175
7176 labelno = GEN_INT (pic_labelno++);
7177 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7178 label = gen_rtx_CONST (VOIDmode, label);
7179
7180 sum = gen_rtx_UNSPEC (Pmode,
7181 gen_rtvec (4, x, GEN_INT (reloc), label,
7182 GEN_INT (TARGET_ARM ? 8 : 4)),
7183 UNSPEC_TLS);
7184 reg = load_tls_operand (sum, reg);
7185
7186 if (TARGET_ARM)
7187 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7188 else
7189 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7190
7191 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7192 LCT_PURE, /* LCT_CONST? */
7193 Pmode, 1, reg, Pmode);
7194
7195 insns = get_insns ();
7196 end_sequence ();
7197
7198 return insns;
7199 }
7200
7201 static rtx
7202 arm_tls_descseq_addr (rtx x, rtx reg)
7203 {
7204 rtx labelno = GEN_INT (pic_labelno++);
7205 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7206 rtx sum = gen_rtx_UNSPEC (Pmode,
7207 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7208 gen_rtx_CONST (VOIDmode, label),
7209 GEN_INT (!TARGET_ARM)),
7210 UNSPEC_TLS);
7211 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7212
7213 emit_insn (gen_tlscall (x, labelno));
7214 if (!reg)
7215 reg = gen_reg_rtx (SImode);
7216 else
7217 gcc_assert (REGNO (reg) != 0);
7218
7219 emit_move_insn (reg, reg0);
7220
7221 return reg;
7222 }
7223
7224 rtx
7225 legitimize_tls_address (rtx x, rtx reg)
7226 {
7227 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7228 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7229
7230 switch (model)
7231 {
7232 case TLS_MODEL_GLOBAL_DYNAMIC:
7233 if (TARGET_GNU2_TLS)
7234 {
7235 reg = arm_tls_descseq_addr (x, reg);
7236
7237 tp = arm_load_tp (NULL_RTX);
7238
7239 dest = gen_rtx_PLUS (Pmode, tp, reg);
7240 }
7241 else
7242 {
7243 /* Original scheme */
7244 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7245 dest = gen_reg_rtx (Pmode);
7246 emit_libcall_block (insns, dest, ret, x);
7247 }
7248 return dest;
7249
7250 case TLS_MODEL_LOCAL_DYNAMIC:
7251 if (TARGET_GNU2_TLS)
7252 {
7253 reg = arm_tls_descseq_addr (x, reg);
7254
7255 tp = arm_load_tp (NULL_RTX);
7256
7257 dest = gen_rtx_PLUS (Pmode, tp, reg);
7258 }
7259 else
7260 {
7261 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7262
7263 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7264 share the LDM result with other LD model accesses. */
7265 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7266 UNSPEC_TLS);
7267 dest = gen_reg_rtx (Pmode);
7268 emit_libcall_block (insns, dest, ret, eqv);
7269
7270 /* Load the addend. */
7271 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7272 GEN_INT (TLS_LDO32)),
7273 UNSPEC_TLS);
7274 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7275 dest = gen_rtx_PLUS (Pmode, dest, addend);
7276 }
7277 return dest;
7278
7279 case TLS_MODEL_INITIAL_EXEC:
7280 labelno = GEN_INT (pic_labelno++);
7281 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7282 label = gen_rtx_CONST (VOIDmode, label);
7283 sum = gen_rtx_UNSPEC (Pmode,
7284 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7285 GEN_INT (TARGET_ARM ? 8 : 4)),
7286 UNSPEC_TLS);
7287 reg = load_tls_operand (sum, reg);
7288
7289 if (TARGET_ARM)
7290 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7291 else if (TARGET_THUMB2)
7292 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7293 else
7294 {
7295 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7296 emit_move_insn (reg, gen_const_mem (SImode, reg));
7297 }
7298
7299 tp = arm_load_tp (NULL_RTX);
7300
7301 return gen_rtx_PLUS (Pmode, tp, reg);
7302
7303 case TLS_MODEL_LOCAL_EXEC:
7304 tp = arm_load_tp (NULL_RTX);
7305
7306 reg = gen_rtx_UNSPEC (Pmode,
7307 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7308 UNSPEC_TLS);
7309 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7310
7311 return gen_rtx_PLUS (Pmode, tp, reg);
7312
7313 default:
7314 abort ();
7315 }
7316 }
7317
7318 /* Try machine-dependent ways of modifying an illegitimate address
7319 to be legitimate. If we find one, return the new, valid address. */
7320 rtx
7321 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7322 {
7323 if (!TARGET_ARM)
7324 {
7325 /* TODO: legitimize_address for Thumb2. */
7326 if (TARGET_THUMB2)
7327 return x;
7328 return thumb_legitimize_address (x, orig_x, mode);
7329 }
7330
7331 if (arm_tls_symbol_p (x))
7332 return legitimize_tls_address (x, NULL_RTX);
7333
7334 if (GET_CODE (x) == PLUS)
7335 {
7336 rtx xop0 = XEXP (x, 0);
7337 rtx xop1 = XEXP (x, 1);
7338
7339 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7340 xop0 = force_reg (SImode, xop0);
7341
7342 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7343 && !symbol_mentioned_p (xop1))
7344 xop1 = force_reg (SImode, xop1);
7345
7346 if (ARM_BASE_REGISTER_RTX_P (xop0)
7347 && CONST_INT_P (xop1))
7348 {
7349 HOST_WIDE_INT n, low_n;
7350 rtx base_reg, val;
7351 n = INTVAL (xop1);
7352
7353 /* VFP addressing modes actually allow greater offsets, but for
7354 now we just stick with the lowest common denominator. */
7355 if (mode == DImode
7356 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7357 {
7358 low_n = n & 0x0f;
7359 n &= ~0x0f;
7360 if (low_n > 4)
7361 {
7362 n += 16;
7363 low_n -= 16;
7364 }
7365 }
7366 else
7367 {
7368 low_n = ((mode) == TImode ? 0
7369 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7370 n -= low_n;
7371 }
7372
7373 base_reg = gen_reg_rtx (SImode);
7374 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7375 emit_move_insn (base_reg, val);
7376 x = plus_constant (Pmode, base_reg, low_n);
7377 }
7378 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7379 x = gen_rtx_PLUS (SImode, xop0, xop1);
7380 }
7381
7382 /* XXX We don't allow MINUS any more -- see comment in
7383 arm_legitimate_address_outer_p (). */
7384 else if (GET_CODE (x) == MINUS)
7385 {
7386 rtx xop0 = XEXP (x, 0);
7387 rtx xop1 = XEXP (x, 1);
7388
7389 if (CONSTANT_P (xop0))
7390 xop0 = force_reg (SImode, xop0);
7391
7392 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7393 xop1 = force_reg (SImode, xop1);
7394
7395 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7396 x = gen_rtx_MINUS (SImode, xop0, xop1);
7397 }
7398
7399 /* Make sure to take full advantage of the pre-indexed addressing mode
7400 with absolute addresses which often allows for the base register to
7401 be factorized for multiple adjacent memory references, and it might
7402 even allows for the mini pool to be avoided entirely. */
7403 else if (CONST_INT_P (x) && optimize > 0)
7404 {
7405 unsigned int bits;
7406 HOST_WIDE_INT mask, base, index;
7407 rtx base_reg;
7408
7409 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7410 use a 8-bit index. So let's use a 12-bit index for SImode only and
7411 hope that arm_gen_constant will enable ldrb to use more bits. */
7412 bits = (mode == SImode) ? 12 : 8;
7413 mask = (1 << bits) - 1;
7414 base = INTVAL (x) & ~mask;
7415 index = INTVAL (x) & mask;
7416 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7417 {
7418 /* It'll most probably be more efficient to generate the base
7419 with more bits set and use a negative index instead. */
7420 base |= mask;
7421 index -= mask;
7422 }
7423 base_reg = force_reg (SImode, GEN_INT (base));
7424 x = plus_constant (Pmode, base_reg, index);
7425 }
7426
7427 if (flag_pic)
7428 {
7429 /* We need to find and carefully transform any SYMBOL and LABEL
7430 references; so go back to the original address expression. */
7431 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7432
7433 if (new_x != orig_x)
7434 x = new_x;
7435 }
7436
7437 return x;
7438 }
7439
7440
7441 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7442 to be legitimate. If we find one, return the new, valid address. */
7443 rtx
7444 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7445 {
7446 if (arm_tls_symbol_p (x))
7447 return legitimize_tls_address (x, NULL_RTX);
7448
7449 if (GET_CODE (x) == PLUS
7450 && CONST_INT_P (XEXP (x, 1))
7451 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7452 || INTVAL (XEXP (x, 1)) < 0))
7453 {
7454 rtx xop0 = XEXP (x, 0);
7455 rtx xop1 = XEXP (x, 1);
7456 HOST_WIDE_INT offset = INTVAL (xop1);
7457
7458 /* Try and fold the offset into a biasing of the base register and
7459 then offsetting that. Don't do this when optimizing for space
7460 since it can cause too many CSEs. */
7461 if (optimize_size && offset >= 0
7462 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7463 {
7464 HOST_WIDE_INT delta;
7465
7466 if (offset >= 256)
7467 delta = offset - (256 - GET_MODE_SIZE (mode));
7468 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7469 delta = 31 * GET_MODE_SIZE (mode);
7470 else
7471 delta = offset & (~31 * GET_MODE_SIZE (mode));
7472
7473 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7474 NULL_RTX);
7475 x = plus_constant (Pmode, xop0, delta);
7476 }
7477 else if (offset < 0 && offset > -256)
7478 /* Small negative offsets are best done with a subtract before the
7479 dereference, forcing these into a register normally takes two
7480 instructions. */
7481 x = force_operand (x, NULL_RTX);
7482 else
7483 {
7484 /* For the remaining cases, force the constant into a register. */
7485 xop1 = force_reg (SImode, xop1);
7486 x = gen_rtx_PLUS (SImode, xop0, xop1);
7487 }
7488 }
7489 else if (GET_CODE (x) == PLUS
7490 && s_register_operand (XEXP (x, 1), SImode)
7491 && !s_register_operand (XEXP (x, 0), SImode))
7492 {
7493 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7494
7495 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7496 }
7497
7498 if (flag_pic)
7499 {
7500 /* We need to find and carefully transform any SYMBOL and LABEL
7501 references; so go back to the original address expression. */
7502 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7503
7504 if (new_x != orig_x)
7505 x = new_x;
7506 }
7507
7508 return x;
7509 }
7510
7511 bool
7512 arm_legitimize_reload_address (rtx *p,
7513 enum machine_mode mode,
7514 int opnum, int type,
7515 int ind_levels ATTRIBUTE_UNUSED)
7516 {
7517 /* We must recognize output that we have already generated ourselves. */
7518 if (GET_CODE (*p) == PLUS
7519 && GET_CODE (XEXP (*p, 0)) == PLUS
7520 && REG_P (XEXP (XEXP (*p, 0), 0))
7521 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7522 && CONST_INT_P (XEXP (*p, 1)))
7523 {
7524 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7525 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7526 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7527 return true;
7528 }
7529
7530 if (GET_CODE (*p) == PLUS
7531 && REG_P (XEXP (*p, 0))
7532 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7533 /* If the base register is equivalent to a constant, let the generic
7534 code handle it. Otherwise we will run into problems if a future
7535 reload pass decides to rematerialize the constant. */
7536 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7537 && CONST_INT_P (XEXP (*p, 1)))
7538 {
7539 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7540 HOST_WIDE_INT low, high;
7541
7542 /* Detect coprocessor load/stores. */
7543 bool coproc_p = ((TARGET_HARD_FLOAT
7544 && TARGET_VFP
7545 && (mode == SFmode || mode == DFmode))
7546 || (TARGET_REALLY_IWMMXT
7547 && VALID_IWMMXT_REG_MODE (mode))
7548 || (TARGET_NEON
7549 && (VALID_NEON_DREG_MODE (mode)
7550 || VALID_NEON_QREG_MODE (mode))));
7551
7552 /* For some conditions, bail out when lower two bits are unaligned. */
7553 if ((val & 0x3) != 0
7554 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7555 && (coproc_p
7556 /* For DI, and DF under soft-float: */
7557 || ((mode == DImode || mode == DFmode)
7558 /* Without ldrd, we use stm/ldm, which does not
7559 fair well with unaligned bits. */
7560 && (! TARGET_LDRD
7561 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7562 || TARGET_THUMB2))))
7563 return false;
7564
7565 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7566 of which the (reg+high) gets turned into a reload add insn,
7567 we try to decompose the index into high/low values that can often
7568 also lead to better reload CSE.
7569 For example:
7570 ldr r0, [r2, #4100] // Offset too large
7571 ldr r1, [r2, #4104] // Offset too large
7572
7573 is best reloaded as:
7574 add t1, r2, #4096
7575 ldr r0, [t1, #4]
7576 add t2, r2, #4096
7577 ldr r1, [t2, #8]
7578
7579 which post-reload CSE can simplify in most cases to eliminate the
7580 second add instruction:
7581 add t1, r2, #4096
7582 ldr r0, [t1, #4]
7583 ldr r1, [t1, #8]
7584
7585 The idea here is that we want to split out the bits of the constant
7586 as a mask, rather than as subtracting the maximum offset that the
7587 respective type of load/store used can handle.
7588
7589 When encountering negative offsets, we can still utilize it even if
7590 the overall offset is positive; sometimes this may lead to an immediate
7591 that can be constructed with fewer instructions.
7592 For example:
7593 ldr r0, [r2, #0x3FFFFC]
7594
7595 This is best reloaded as:
7596 add t1, r2, #0x400000
7597 ldr r0, [t1, #-4]
7598
7599 The trick for spotting this for a load insn with N bits of offset
7600 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7601 negative offset that is going to make bit N and all the bits below
7602 it become zero in the remainder part.
7603
7604 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7605 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7606 used in most cases of ARM load/store instructions. */
7607
7608 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7609 (((VAL) & ((1 << (N)) - 1)) \
7610 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7611 : 0)
7612
7613 if (coproc_p)
7614 {
7615 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7616
7617 /* NEON quad-word load/stores are made of two double-word accesses,
7618 so the valid index range is reduced by 8. Treat as 9-bit range if
7619 we go over it. */
7620 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7621 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7622 }
7623 else if (GET_MODE_SIZE (mode) == 8)
7624 {
7625 if (TARGET_LDRD)
7626 low = (TARGET_THUMB2
7627 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7628 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7629 else
7630 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7631 to access doublewords. The supported load/store offsets are
7632 -8, -4, and 4, which we try to produce here. */
7633 low = ((val & 0xf) ^ 0x8) - 0x8;
7634 }
7635 else if (GET_MODE_SIZE (mode) < 8)
7636 {
7637 /* NEON element load/stores do not have an offset. */
7638 if (TARGET_NEON_FP16 && mode == HFmode)
7639 return false;
7640
7641 if (TARGET_THUMB2)
7642 {
7643 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7644 Try the wider 12-bit range first, and re-try if the result
7645 is out of range. */
7646 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7647 if (low < -255)
7648 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7649 }
7650 else
7651 {
7652 if (mode == HImode || mode == HFmode)
7653 {
7654 if (arm_arch4)
7655 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7656 else
7657 {
7658 /* The storehi/movhi_bytes fallbacks can use only
7659 [-4094,+4094] of the full ldrb/strb index range. */
7660 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7661 if (low == 4095 || low == -4095)
7662 return false;
7663 }
7664 }
7665 else
7666 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7667 }
7668 }
7669 else
7670 return false;
7671
7672 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7673 ^ (unsigned HOST_WIDE_INT) 0x80000000)
7674 - (unsigned HOST_WIDE_INT) 0x80000000);
7675 /* Check for overflow or zero */
7676 if (low == 0 || high == 0 || (high + low != val))
7677 return false;
7678
7679 /* Reload the high part into a base reg; leave the low part
7680 in the mem.
7681 Note that replacing this gen_rtx_PLUS with plus_constant is
7682 wrong in this case because we rely on the
7683 (plus (plus reg c1) c2) structure being preserved so that
7684 XEXP (*p, 0) in push_reload below uses the correct term. */
7685 *p = gen_rtx_PLUS (GET_MODE (*p),
7686 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
7687 GEN_INT (high)),
7688 GEN_INT (low));
7689 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7690 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7691 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7692 return true;
7693 }
7694
7695 return false;
7696 }
7697
7698 rtx
7699 thumb_legitimize_reload_address (rtx *x_p,
7700 enum machine_mode mode,
7701 int opnum, int type,
7702 int ind_levels ATTRIBUTE_UNUSED)
7703 {
7704 rtx x = *x_p;
7705
7706 if (GET_CODE (x) == PLUS
7707 && GET_MODE_SIZE (mode) < 4
7708 && REG_P (XEXP (x, 0))
7709 && XEXP (x, 0) == stack_pointer_rtx
7710 && CONST_INT_P (XEXP (x, 1))
7711 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7712 {
7713 rtx orig_x = x;
7714
7715 x = copy_rtx (x);
7716 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7717 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7718 return x;
7719 }
7720
7721 /* If both registers are hi-regs, then it's better to reload the
7722 entire expression rather than each register individually. That
7723 only requires one reload register rather than two. */
7724 if (GET_CODE (x) == PLUS
7725 && REG_P (XEXP (x, 0))
7726 && REG_P (XEXP (x, 1))
7727 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
7728 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
7729 {
7730 rtx orig_x = x;
7731
7732 x = copy_rtx (x);
7733 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7734 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7735 return x;
7736 }
7737
7738 return NULL;
7739 }
7740
7741 /* Test for various thread-local symbols. */
7742
7743 /* Return TRUE if X is a thread-local symbol. */
7744
7745 static bool
7746 arm_tls_symbol_p (rtx x)
7747 {
7748 if (! TARGET_HAVE_TLS)
7749 return false;
7750
7751 if (GET_CODE (x) != SYMBOL_REF)
7752 return false;
7753
7754 return SYMBOL_REF_TLS_MODEL (x) != 0;
7755 }
7756
7757 /* Helper for arm_tls_referenced_p. */
7758
7759 static int
7760 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
7761 {
7762 if (GET_CODE (*x) == SYMBOL_REF)
7763 return SYMBOL_REF_TLS_MODEL (*x) != 0;
7764
7765 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7766 TLS offsets, not real symbol references. */
7767 if (GET_CODE (*x) == UNSPEC
7768 && XINT (*x, 1) == UNSPEC_TLS)
7769 return -1;
7770
7771 return 0;
7772 }
7773
7774 /* Return TRUE if X contains any TLS symbol references. */
7775
7776 bool
7777 arm_tls_referenced_p (rtx x)
7778 {
7779 if (! TARGET_HAVE_TLS)
7780 return false;
7781
7782 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
7783 }
7784
7785 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7786
7787 On the ARM, allow any integer (invalid ones are removed later by insn
7788 patterns), nice doubles and symbol_refs which refer to the function's
7789 constant pool XXX.
7790
7791 When generating pic allow anything. */
7792
7793 static bool
7794 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
7795 {
7796 /* At present, we have no support for Neon structure constants, so forbid
7797 them here. It might be possible to handle simple cases like 0 and -1
7798 in future. */
7799 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
7800 return false;
7801
7802 return flag_pic || !label_mentioned_p (x);
7803 }
7804
7805 static bool
7806 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7807 {
7808 return (CONST_INT_P (x)
7809 || CONST_DOUBLE_P (x)
7810 || CONSTANT_ADDRESS_P (x)
7811 || flag_pic);
7812 }
7813
7814 static bool
7815 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
7816 {
7817 return (!arm_cannot_force_const_mem (mode, x)
7818 && (TARGET_32BIT
7819 ? arm_legitimate_constant_p_1 (mode, x)
7820 : thumb_legitimate_constant_p (mode, x)));
7821 }
7822
7823 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7824
7825 static bool
7826 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7827 {
7828 rtx base, offset;
7829
7830 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
7831 {
7832 split_const (x, &base, &offset);
7833 if (GET_CODE (base) == SYMBOL_REF
7834 && !offset_within_block_p (base, INTVAL (offset)))
7835 return true;
7836 }
7837 return arm_tls_referenced_p (x);
7838 }
7839 \f
7840 #define REG_OR_SUBREG_REG(X) \
7841 (REG_P (X) \
7842 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
7843
7844 #define REG_OR_SUBREG_RTX(X) \
7845 (REG_P (X) ? (X) : SUBREG_REG (X))
7846
7847 static inline int
7848 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7849 {
7850 enum machine_mode mode = GET_MODE (x);
7851 int total, words;
7852
7853 switch (code)
7854 {
7855 case ASHIFT:
7856 case ASHIFTRT:
7857 case LSHIFTRT:
7858 case ROTATERT:
7859 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
7860
7861 case PLUS:
7862 case MINUS:
7863 case COMPARE:
7864 case NEG:
7865 case NOT:
7866 return COSTS_N_INSNS (1);
7867
7868 case MULT:
7869 if (CONST_INT_P (XEXP (x, 1)))
7870 {
7871 int cycles = 0;
7872 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7873
7874 while (i)
7875 {
7876 i >>= 2;
7877 cycles++;
7878 }
7879 return COSTS_N_INSNS (2) + cycles;
7880 }
7881 return COSTS_N_INSNS (1) + 16;
7882
7883 case SET:
7884 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
7885 the mode. */
7886 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
7887 return (COSTS_N_INSNS (words)
7888 + 4 * ((MEM_P (SET_SRC (x)))
7889 + MEM_P (SET_DEST (x))));
7890
7891 case CONST_INT:
7892 if (outer == SET)
7893 {
7894 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7895 return 0;
7896 if (thumb_shiftable_const (INTVAL (x)))
7897 return COSTS_N_INSNS (2);
7898 return COSTS_N_INSNS (3);
7899 }
7900 else if ((outer == PLUS || outer == COMPARE)
7901 && INTVAL (x) < 256 && INTVAL (x) > -256)
7902 return 0;
7903 else if ((outer == IOR || outer == XOR || outer == AND)
7904 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7905 return COSTS_N_INSNS (1);
7906 else if (outer == AND)
7907 {
7908 int i;
7909 /* This duplicates the tests in the andsi3 expander. */
7910 for (i = 9; i <= 31; i++)
7911 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7912 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7913 return COSTS_N_INSNS (2);
7914 }
7915 else if (outer == ASHIFT || outer == ASHIFTRT
7916 || outer == LSHIFTRT)
7917 return 0;
7918 return COSTS_N_INSNS (2);
7919
7920 case CONST:
7921 case CONST_DOUBLE:
7922 case LABEL_REF:
7923 case SYMBOL_REF:
7924 return COSTS_N_INSNS (3);
7925
7926 case UDIV:
7927 case UMOD:
7928 case DIV:
7929 case MOD:
7930 return 100;
7931
7932 case TRUNCATE:
7933 return 99;
7934
7935 case AND:
7936 case XOR:
7937 case IOR:
7938 /* XXX guess. */
7939 return 8;
7940
7941 case MEM:
7942 /* XXX another guess. */
7943 /* Memory costs quite a lot for the first word, but subsequent words
7944 load at the equivalent of a single insn each. */
7945 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7946 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7947 ? 4 : 0));
7948
7949 case IF_THEN_ELSE:
7950 /* XXX a guess. */
7951 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7952 return 14;
7953 return 2;
7954
7955 case SIGN_EXTEND:
7956 case ZERO_EXTEND:
7957 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7958 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7959
7960 if (mode == SImode)
7961 return total;
7962
7963 if (arm_arch6)
7964 return total + COSTS_N_INSNS (1);
7965
7966 /* Assume a two-shift sequence. Increase the cost slightly so
7967 we prefer actual shifts over an extend operation. */
7968 return total + 1 + COSTS_N_INSNS (2);
7969
7970 default:
7971 return 99;
7972 }
7973 }
7974
7975 static inline bool
7976 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7977 {
7978 enum machine_mode mode = GET_MODE (x);
7979 enum rtx_code subcode;
7980 rtx operand;
7981 enum rtx_code code = GET_CODE (x);
7982 *total = 0;
7983
7984 switch (code)
7985 {
7986 case MEM:
7987 /* Memory costs quite a lot for the first word, but subsequent words
7988 load at the equivalent of a single insn each. */
7989 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7990 return true;
7991
7992 case DIV:
7993 case MOD:
7994 case UDIV:
7995 case UMOD:
7996 if (TARGET_HARD_FLOAT && mode == SFmode)
7997 *total = COSTS_N_INSNS (2);
7998 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7999 *total = COSTS_N_INSNS (4);
8000 else
8001 *total = COSTS_N_INSNS (20);
8002 return false;
8003
8004 case ROTATE:
8005 if (REG_P (XEXP (x, 1)))
8006 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8007 else if (!CONST_INT_P (XEXP (x, 1)))
8008 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8009
8010 /* Fall through */
8011 case ROTATERT:
8012 if (mode != SImode)
8013 {
8014 *total += COSTS_N_INSNS (4);
8015 return true;
8016 }
8017
8018 /* Fall through */
8019 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8020 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8021 if (mode == DImode)
8022 {
8023 *total += COSTS_N_INSNS (3);
8024 return true;
8025 }
8026
8027 *total += COSTS_N_INSNS (1);
8028 /* Increase the cost of complex shifts because they aren't any faster,
8029 and reduce dual issue opportunities. */
8030 if (arm_tune_cortex_a9
8031 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8032 ++*total;
8033
8034 return true;
8035
8036 case MINUS:
8037 if (mode == DImode)
8038 {
8039 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8040 if (CONST_INT_P (XEXP (x, 0))
8041 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8042 {
8043 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8044 return true;
8045 }
8046
8047 if (CONST_INT_P (XEXP (x, 1))
8048 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8049 {
8050 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8051 return true;
8052 }
8053
8054 return false;
8055 }
8056
8057 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8058 {
8059 if (TARGET_HARD_FLOAT
8060 && (mode == SFmode
8061 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8062 {
8063 *total = COSTS_N_INSNS (1);
8064 if (CONST_DOUBLE_P (XEXP (x, 0))
8065 && arm_const_double_rtx (XEXP (x, 0)))
8066 {
8067 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8068 return true;
8069 }
8070
8071 if (CONST_DOUBLE_P (XEXP (x, 1))
8072 && arm_const_double_rtx (XEXP (x, 1)))
8073 {
8074 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8075 return true;
8076 }
8077
8078 return false;
8079 }
8080 *total = COSTS_N_INSNS (20);
8081 return false;
8082 }
8083
8084 *total = COSTS_N_INSNS (1);
8085 if (CONST_INT_P (XEXP (x, 0))
8086 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8087 {
8088 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8089 return true;
8090 }
8091
8092 subcode = GET_CODE (XEXP (x, 1));
8093 if (subcode == ASHIFT || subcode == ASHIFTRT
8094 || subcode == LSHIFTRT
8095 || subcode == ROTATE || subcode == ROTATERT)
8096 {
8097 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8098 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8099 return true;
8100 }
8101
8102 /* A shift as a part of RSB costs no more than RSB itself. */
8103 if (GET_CODE (XEXP (x, 0)) == MULT
8104 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8105 {
8106 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8107 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8108 return true;
8109 }
8110
8111 if (subcode == MULT
8112 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8113 {
8114 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8115 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8116 return true;
8117 }
8118
8119 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8120 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8121 {
8122 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8123 if (REG_P (XEXP (XEXP (x, 1), 0))
8124 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8125 *total += COSTS_N_INSNS (1);
8126
8127 return true;
8128 }
8129
8130 /* Fall through */
8131
8132 case PLUS:
8133 if (code == PLUS && arm_arch6 && mode == SImode
8134 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8135 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8136 {
8137 *total = COSTS_N_INSNS (1);
8138 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8139 0, speed);
8140 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8141 return true;
8142 }
8143
8144 /* MLA: All arguments must be registers. We filter out
8145 multiplication by a power of two, so that we fall down into
8146 the code below. */
8147 if (GET_CODE (XEXP (x, 0)) == MULT
8148 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8149 {
8150 /* The cost comes from the cost of the multiply. */
8151 return false;
8152 }
8153
8154 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8155 {
8156 if (TARGET_HARD_FLOAT
8157 && (mode == SFmode
8158 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8159 {
8160 *total = COSTS_N_INSNS (1);
8161 if (CONST_DOUBLE_P (XEXP (x, 1))
8162 && arm_const_double_rtx (XEXP (x, 1)))
8163 {
8164 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8165 return true;
8166 }
8167
8168 return false;
8169 }
8170
8171 *total = COSTS_N_INSNS (20);
8172 return false;
8173 }
8174
8175 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8176 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8177 {
8178 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8179 if (REG_P (XEXP (XEXP (x, 0), 0))
8180 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8181 *total += COSTS_N_INSNS (1);
8182 return true;
8183 }
8184
8185 /* Fall through */
8186
8187 case AND: case XOR: case IOR:
8188
8189 /* Normally the frame registers will be spilt into reg+const during
8190 reload, so it is a bad idea to combine them with other instructions,
8191 since then they might not be moved outside of loops. As a compromise
8192 we allow integration with ops that have a constant as their second
8193 operand. */
8194 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8195 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8196 && !CONST_INT_P (XEXP (x, 1)))
8197 *total = COSTS_N_INSNS (1);
8198
8199 if (mode == DImode)
8200 {
8201 *total += COSTS_N_INSNS (2);
8202 if (CONST_INT_P (XEXP (x, 1))
8203 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8204 {
8205 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8206 return true;
8207 }
8208
8209 return false;
8210 }
8211
8212 *total += COSTS_N_INSNS (1);
8213 if (CONST_INT_P (XEXP (x, 1))
8214 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8215 {
8216 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8217 return true;
8218 }
8219 subcode = GET_CODE (XEXP (x, 0));
8220 if (subcode == ASHIFT || subcode == ASHIFTRT
8221 || subcode == LSHIFTRT
8222 || subcode == ROTATE || subcode == ROTATERT)
8223 {
8224 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8225 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8226 return true;
8227 }
8228
8229 if (subcode == MULT
8230 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8231 {
8232 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8233 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8234 return true;
8235 }
8236
8237 if (subcode == UMIN || subcode == UMAX
8238 || subcode == SMIN || subcode == SMAX)
8239 {
8240 *total = COSTS_N_INSNS (3);
8241 return true;
8242 }
8243
8244 return false;
8245
8246 case MULT:
8247 /* This should have been handled by the CPU specific routines. */
8248 gcc_unreachable ();
8249
8250 case TRUNCATE:
8251 if (arm_arch3m && mode == SImode
8252 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8253 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8254 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8255 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8256 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8257 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8258 {
8259 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8260 return true;
8261 }
8262 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8263 return false;
8264
8265 case NEG:
8266 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8267 {
8268 if (TARGET_HARD_FLOAT
8269 && (mode == SFmode
8270 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8271 {
8272 *total = COSTS_N_INSNS (1);
8273 return false;
8274 }
8275 *total = COSTS_N_INSNS (2);
8276 return false;
8277 }
8278
8279 /* Fall through */
8280 case NOT:
8281 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8282 if (mode == SImode && code == NOT)
8283 {
8284 subcode = GET_CODE (XEXP (x, 0));
8285 if (subcode == ASHIFT || subcode == ASHIFTRT
8286 || subcode == LSHIFTRT
8287 || subcode == ROTATE || subcode == ROTATERT
8288 || (subcode == MULT
8289 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8290 {
8291 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8292 /* Register shifts cost an extra cycle. */
8293 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8294 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8295 subcode, 1, speed);
8296 return true;
8297 }
8298 }
8299
8300 return false;
8301
8302 case IF_THEN_ELSE:
8303 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8304 {
8305 *total = COSTS_N_INSNS (4);
8306 return true;
8307 }
8308
8309 operand = XEXP (x, 0);
8310
8311 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8312 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8313 && REG_P (XEXP (operand, 0))
8314 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8315 *total += COSTS_N_INSNS (1);
8316 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8317 + rtx_cost (XEXP (x, 2), code, 2, speed));
8318 return true;
8319
8320 case NE:
8321 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8322 {
8323 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8324 return true;
8325 }
8326 goto scc_insn;
8327
8328 case GE:
8329 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8330 && mode == SImode && XEXP (x, 1) == const0_rtx)
8331 {
8332 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8333 return true;
8334 }
8335 goto scc_insn;
8336
8337 case LT:
8338 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8339 && mode == SImode && XEXP (x, 1) == const0_rtx)
8340 {
8341 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8342 return true;
8343 }
8344 goto scc_insn;
8345
8346 case EQ:
8347 case GT:
8348 case LE:
8349 case GEU:
8350 case LTU:
8351 case GTU:
8352 case LEU:
8353 case UNORDERED:
8354 case ORDERED:
8355 case UNEQ:
8356 case UNGE:
8357 case UNLT:
8358 case UNGT:
8359 case UNLE:
8360 scc_insn:
8361 /* SCC insns. In the case where the comparison has already been
8362 performed, then they cost 2 instructions. Otherwise they need
8363 an additional comparison before them. */
8364 *total = COSTS_N_INSNS (2);
8365 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8366 {
8367 return true;
8368 }
8369
8370 /* Fall through */
8371 case COMPARE:
8372 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8373 {
8374 *total = 0;
8375 return true;
8376 }
8377
8378 *total += COSTS_N_INSNS (1);
8379 if (CONST_INT_P (XEXP (x, 1))
8380 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8381 {
8382 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8383 return true;
8384 }
8385
8386 subcode = GET_CODE (XEXP (x, 0));
8387 if (subcode == ASHIFT || subcode == ASHIFTRT
8388 || subcode == LSHIFTRT
8389 || subcode == ROTATE || subcode == ROTATERT)
8390 {
8391 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8392 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8393 return true;
8394 }
8395
8396 if (subcode == MULT
8397 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8398 {
8399 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8400 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8401 return true;
8402 }
8403
8404 return false;
8405
8406 case UMIN:
8407 case UMAX:
8408 case SMIN:
8409 case SMAX:
8410 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8411 if (!CONST_INT_P (XEXP (x, 1))
8412 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8413 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8414 return true;
8415
8416 case ABS:
8417 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8418 {
8419 if (TARGET_HARD_FLOAT
8420 && (mode == SFmode
8421 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8422 {
8423 *total = COSTS_N_INSNS (1);
8424 return false;
8425 }
8426 *total = COSTS_N_INSNS (20);
8427 return false;
8428 }
8429 *total = COSTS_N_INSNS (1);
8430 if (mode == DImode)
8431 *total += COSTS_N_INSNS (3);
8432 return false;
8433
8434 case SIGN_EXTEND:
8435 case ZERO_EXTEND:
8436 *total = 0;
8437 if (GET_MODE_CLASS (mode) == MODE_INT)
8438 {
8439 rtx op = XEXP (x, 0);
8440 enum machine_mode opmode = GET_MODE (op);
8441
8442 if (mode == DImode)
8443 *total += COSTS_N_INSNS (1);
8444
8445 if (opmode != SImode)
8446 {
8447 if (MEM_P (op))
8448 {
8449 /* If !arm_arch4, we use one of the extendhisi2_mem
8450 or movhi_bytes patterns for HImode. For a QImode
8451 sign extension, we first zero-extend from memory
8452 and then perform a shift sequence. */
8453 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8454 *total += COSTS_N_INSNS (2);
8455 }
8456 else if (arm_arch6)
8457 *total += COSTS_N_INSNS (1);
8458
8459 /* We don't have the necessary insn, so we need to perform some
8460 other operation. */
8461 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8462 /* An and with constant 255. */
8463 *total += COSTS_N_INSNS (1);
8464 else
8465 /* A shift sequence. Increase costs slightly to avoid
8466 combining two shifts into an extend operation. */
8467 *total += COSTS_N_INSNS (2) + 1;
8468 }
8469
8470 return false;
8471 }
8472
8473 switch (GET_MODE (XEXP (x, 0)))
8474 {
8475 case V8QImode:
8476 case V4HImode:
8477 case V2SImode:
8478 case V4QImode:
8479 case V2HImode:
8480 *total = COSTS_N_INSNS (1);
8481 return false;
8482
8483 default:
8484 gcc_unreachable ();
8485 }
8486 gcc_unreachable ();
8487
8488 case ZERO_EXTRACT:
8489 case SIGN_EXTRACT:
8490 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8491 return true;
8492
8493 case CONST_INT:
8494 if (const_ok_for_arm (INTVAL (x))
8495 || const_ok_for_arm (~INTVAL (x)))
8496 *total = COSTS_N_INSNS (1);
8497 else
8498 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8499 INTVAL (x), NULL_RTX,
8500 NULL_RTX, 0, 0));
8501 return true;
8502
8503 case CONST:
8504 case LABEL_REF:
8505 case SYMBOL_REF:
8506 *total = COSTS_N_INSNS (3);
8507 return true;
8508
8509 case HIGH:
8510 *total = COSTS_N_INSNS (1);
8511 return true;
8512
8513 case LO_SUM:
8514 *total = COSTS_N_INSNS (1);
8515 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8516 return true;
8517
8518 case CONST_DOUBLE:
8519 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8520 && (mode == SFmode || !TARGET_VFP_SINGLE))
8521 *total = COSTS_N_INSNS (1);
8522 else
8523 *total = COSTS_N_INSNS (4);
8524 return true;
8525
8526 case SET:
8527 /* The vec_extract patterns accept memory operands that require an
8528 address reload. Account for the cost of that reload to give the
8529 auto-inc-dec pass an incentive to try to replace them. */
8530 if (TARGET_NEON && MEM_P (SET_DEST (x))
8531 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8532 {
8533 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8534 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8535 *total += COSTS_N_INSNS (1);
8536 return true;
8537 }
8538 /* Likewise for the vec_set patterns. */
8539 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8540 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8541 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8542 {
8543 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8544 *total = rtx_cost (mem, code, 0, speed);
8545 if (!neon_vector_mem_operand (mem, 2, true))
8546 *total += COSTS_N_INSNS (1);
8547 return true;
8548 }
8549 return false;
8550
8551 case UNSPEC:
8552 /* We cost this as high as our memory costs to allow this to
8553 be hoisted from loops. */
8554 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8555 {
8556 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8557 }
8558 return true;
8559
8560 case CONST_VECTOR:
8561 if (TARGET_NEON
8562 && TARGET_HARD_FLOAT
8563 && outer == SET
8564 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8565 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8566 *total = COSTS_N_INSNS (1);
8567 else
8568 *total = COSTS_N_INSNS (4);
8569 return true;
8570
8571 default:
8572 *total = COSTS_N_INSNS (4);
8573 return false;
8574 }
8575 }
8576
8577 /* Estimates the size cost of thumb1 instructions.
8578 For now most of the code is copied from thumb1_rtx_costs. We need more
8579 fine grain tuning when we have more related test cases. */
8580 static inline int
8581 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8582 {
8583 enum machine_mode mode = GET_MODE (x);
8584 int words;
8585
8586 switch (code)
8587 {
8588 case ASHIFT:
8589 case ASHIFTRT:
8590 case LSHIFTRT:
8591 case ROTATERT:
8592 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8593
8594 case PLUS:
8595 case MINUS:
8596 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8597 defined by RTL expansion, especially for the expansion of
8598 multiplication. */
8599 if ((GET_CODE (XEXP (x, 0)) == MULT
8600 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8601 || (GET_CODE (XEXP (x, 1)) == MULT
8602 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8603 return COSTS_N_INSNS (2);
8604 /* On purpose fall through for normal RTX. */
8605 case COMPARE:
8606 case NEG:
8607 case NOT:
8608 return COSTS_N_INSNS (1);
8609
8610 case MULT:
8611 if (CONST_INT_P (XEXP (x, 1)))
8612 {
8613 /* Thumb1 mul instruction can't operate on const. We must Load it
8614 into a register first. */
8615 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8616 return COSTS_N_INSNS (1) + const_size;
8617 }
8618 return COSTS_N_INSNS (1);
8619
8620 case SET:
8621 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8622 the mode. */
8623 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8624 return (COSTS_N_INSNS (words)
8625 + 4 * ((MEM_P (SET_SRC (x)))
8626 + MEM_P (SET_DEST (x))));
8627
8628 case CONST_INT:
8629 if (outer == SET)
8630 {
8631 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8632 return COSTS_N_INSNS (1);
8633 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8634 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8635 return COSTS_N_INSNS (2);
8636 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8637 if (thumb_shiftable_const (INTVAL (x)))
8638 return COSTS_N_INSNS (2);
8639 return COSTS_N_INSNS (3);
8640 }
8641 else if ((outer == PLUS || outer == COMPARE)
8642 && INTVAL (x) < 256 && INTVAL (x) > -256)
8643 return 0;
8644 else if ((outer == IOR || outer == XOR || outer == AND)
8645 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8646 return COSTS_N_INSNS (1);
8647 else if (outer == AND)
8648 {
8649 int i;
8650 /* This duplicates the tests in the andsi3 expander. */
8651 for (i = 9; i <= 31; i++)
8652 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8653 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8654 return COSTS_N_INSNS (2);
8655 }
8656 else if (outer == ASHIFT || outer == ASHIFTRT
8657 || outer == LSHIFTRT)
8658 return 0;
8659 return COSTS_N_INSNS (2);
8660
8661 case CONST:
8662 case CONST_DOUBLE:
8663 case LABEL_REF:
8664 case SYMBOL_REF:
8665 return COSTS_N_INSNS (3);
8666
8667 case UDIV:
8668 case UMOD:
8669 case DIV:
8670 case MOD:
8671 return 100;
8672
8673 case TRUNCATE:
8674 return 99;
8675
8676 case AND:
8677 case XOR:
8678 case IOR:
8679 /* XXX guess. */
8680 return 8;
8681
8682 case MEM:
8683 /* XXX another guess. */
8684 /* Memory costs quite a lot for the first word, but subsequent words
8685 load at the equivalent of a single insn each. */
8686 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8687 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8688 ? 4 : 0));
8689
8690 case IF_THEN_ELSE:
8691 /* XXX a guess. */
8692 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8693 return 14;
8694 return 2;
8695
8696 case ZERO_EXTEND:
8697 /* XXX still guessing. */
8698 switch (GET_MODE (XEXP (x, 0)))
8699 {
8700 case QImode:
8701 return (1 + (mode == DImode ? 4 : 0)
8702 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8703
8704 case HImode:
8705 return (4 + (mode == DImode ? 4 : 0)
8706 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8707
8708 case SImode:
8709 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8710
8711 default:
8712 return 99;
8713 }
8714
8715 default:
8716 return 99;
8717 }
8718 }
8719
8720 /* RTX costs when optimizing for size. */
8721 static bool
8722 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8723 int *total)
8724 {
8725 enum machine_mode mode = GET_MODE (x);
8726 if (TARGET_THUMB1)
8727 {
8728 *total = thumb1_size_rtx_costs (x, code, outer_code);
8729 return true;
8730 }
8731
8732 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8733 switch (code)
8734 {
8735 case MEM:
8736 /* A memory access costs 1 insn if the mode is small, or the address is
8737 a single register, otherwise it costs one insn per word. */
8738 if (REG_P (XEXP (x, 0)))
8739 *total = COSTS_N_INSNS (1);
8740 else if (flag_pic
8741 && GET_CODE (XEXP (x, 0)) == PLUS
8742 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
8743 /* This will be split into two instructions.
8744 See arm.md:calculate_pic_address. */
8745 *total = COSTS_N_INSNS (2);
8746 else
8747 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8748 return true;
8749
8750 case DIV:
8751 case MOD:
8752 case UDIV:
8753 case UMOD:
8754 /* Needs a libcall, so it costs about this. */
8755 *total = COSTS_N_INSNS (2);
8756 return false;
8757
8758 case ROTATE:
8759 if (mode == SImode && REG_P (XEXP (x, 1)))
8760 {
8761 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
8762 return true;
8763 }
8764 /* Fall through */
8765 case ROTATERT:
8766 case ASHIFT:
8767 case LSHIFTRT:
8768 case ASHIFTRT:
8769 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8770 {
8771 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
8772 return true;
8773 }
8774 else if (mode == SImode)
8775 {
8776 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
8777 /* Slightly disparage register shifts, but not by much. */
8778 if (!CONST_INT_P (XEXP (x, 1)))
8779 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
8780 return true;
8781 }
8782
8783 /* Needs a libcall. */
8784 *total = COSTS_N_INSNS (2);
8785 return false;
8786
8787 case MINUS:
8788 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8789 && (mode == SFmode || !TARGET_VFP_SINGLE))
8790 {
8791 *total = COSTS_N_INSNS (1);
8792 return false;
8793 }
8794
8795 if (mode == SImode)
8796 {
8797 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
8798 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
8799
8800 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
8801 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
8802 || subcode1 == ROTATE || subcode1 == ROTATERT
8803 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
8804 || subcode1 == ASHIFTRT)
8805 {
8806 /* It's just the cost of the two operands. */
8807 *total = 0;
8808 return false;
8809 }
8810
8811 *total = COSTS_N_INSNS (1);
8812 return false;
8813 }
8814
8815 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8816 return false;
8817
8818 case PLUS:
8819 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8820 && (mode == SFmode || !TARGET_VFP_SINGLE))
8821 {
8822 *total = COSTS_N_INSNS (1);
8823 return false;
8824 }
8825
8826 /* A shift as a part of ADD costs nothing. */
8827 if (GET_CODE (XEXP (x, 0)) == MULT
8828 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8829 {
8830 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
8831 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
8832 *total += rtx_cost (XEXP (x, 1), code, 1, false);
8833 return true;
8834 }
8835
8836 /* Fall through */
8837 case AND: case XOR: case IOR:
8838 if (mode == SImode)
8839 {
8840 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
8841
8842 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
8843 || subcode == LSHIFTRT || subcode == ASHIFTRT
8844 || (code == AND && subcode == NOT))
8845 {
8846 /* It's just the cost of the two operands. */
8847 *total = 0;
8848 return false;
8849 }
8850 }
8851
8852 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8853 return false;
8854
8855 case MULT:
8856 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8857 return false;
8858
8859 case NEG:
8860 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8861 && (mode == SFmode || !TARGET_VFP_SINGLE))
8862 {
8863 *total = COSTS_N_INSNS (1);
8864 return false;
8865 }
8866
8867 /* Fall through */
8868 case NOT:
8869 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8870
8871 return false;
8872
8873 case IF_THEN_ELSE:
8874 *total = 0;
8875 return false;
8876
8877 case COMPARE:
8878 if (cc_register (XEXP (x, 0), VOIDmode))
8879 * total = 0;
8880 else
8881 *total = COSTS_N_INSNS (1);
8882 return false;
8883
8884 case ABS:
8885 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8886 && (mode == SFmode || !TARGET_VFP_SINGLE))
8887 *total = COSTS_N_INSNS (1);
8888 else
8889 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
8890 return false;
8891
8892 case SIGN_EXTEND:
8893 case ZERO_EXTEND:
8894 return arm_rtx_costs_1 (x, outer_code, total, 0);
8895
8896 case CONST_INT:
8897 if (const_ok_for_arm (INTVAL (x)))
8898 /* A multiplication by a constant requires another instruction
8899 to load the constant to a register. */
8900 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
8901 ? 1 : 0);
8902 else if (const_ok_for_arm (~INTVAL (x)))
8903 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
8904 else if (const_ok_for_arm (-INTVAL (x)))
8905 {
8906 if (outer_code == COMPARE || outer_code == PLUS
8907 || outer_code == MINUS)
8908 *total = 0;
8909 else
8910 *total = COSTS_N_INSNS (1);
8911 }
8912 else
8913 *total = COSTS_N_INSNS (2);
8914 return true;
8915
8916 case CONST:
8917 case LABEL_REF:
8918 case SYMBOL_REF:
8919 *total = COSTS_N_INSNS (2);
8920 return true;
8921
8922 case CONST_DOUBLE:
8923 *total = COSTS_N_INSNS (4);
8924 return true;
8925
8926 case CONST_VECTOR:
8927 if (TARGET_NEON
8928 && TARGET_HARD_FLOAT
8929 && outer_code == SET
8930 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8931 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8932 *total = COSTS_N_INSNS (1);
8933 else
8934 *total = COSTS_N_INSNS (4);
8935 return true;
8936
8937 case HIGH:
8938 case LO_SUM:
8939 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8940 cost of these slightly. */
8941 *total = COSTS_N_INSNS (1) + 1;
8942 return true;
8943
8944 case SET:
8945 return false;
8946
8947 default:
8948 if (mode != VOIDmode)
8949 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8950 else
8951 *total = COSTS_N_INSNS (4); /* How knows? */
8952 return false;
8953 }
8954 }
8955
8956 /* Helper function for arm_rtx_costs. If the operand is a valid shift
8957 operand, then return the operand that is being shifted. If the shift
8958 is not by a constant, then set SHIFT_REG to point to the operand.
8959 Return NULL if OP is not a shifter operand. */
8960 static rtx
8961 shifter_op_p (rtx op, rtx *shift_reg)
8962 {
8963 enum rtx_code code = GET_CODE (op);
8964
8965 if (code == MULT && CONST_INT_P (XEXP (op, 1))
8966 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
8967 return XEXP (op, 0);
8968 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
8969 return XEXP (op, 0);
8970 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
8971 || code == ASHIFTRT)
8972 {
8973 if (!CONST_INT_P (XEXP (op, 1)))
8974 *shift_reg = XEXP (op, 1);
8975 return XEXP (op, 0);
8976 }
8977
8978 return NULL;
8979 }
8980
8981 static bool
8982 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
8983 {
8984 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
8985 gcc_assert (GET_CODE (x) == UNSPEC);
8986
8987 switch (XINT (x, 1))
8988 {
8989 case UNSPEC_UNALIGNED_LOAD:
8990 /* We can only do unaligned loads into the integer unit, and we can't
8991 use LDM or LDRD. */
8992 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
8993 if (speed_p)
8994 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
8995 + extra_cost->ldst.load_unaligned);
8996
8997 #ifdef NOT_YET
8998 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
8999 ADDR_SPACE_GENERIC, speed_p);
9000 #endif
9001 return true;
9002
9003 case UNSPEC_UNALIGNED_STORE:
9004 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9005 if (speed_p)
9006 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9007 + extra_cost->ldst.store_unaligned);
9008
9009 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9010 #ifdef NOT_YET
9011 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9012 ADDR_SPACE_GENERIC, speed_p);
9013 #endif
9014 return true;
9015
9016 case UNSPEC_VRINTZ:
9017 case UNSPEC_VRINTP:
9018 case UNSPEC_VRINTM:
9019 case UNSPEC_VRINTR:
9020 case UNSPEC_VRINTX:
9021 case UNSPEC_VRINTA:
9022 *cost = COSTS_N_INSNS (1);
9023 if (speed_p)
9024 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9025
9026 return true;
9027 default:
9028 *cost = COSTS_N_INSNS (2);
9029 break;
9030 }
9031 return false;
9032 }
9033
9034 /* Cost of a libcall. We assume one insn per argument, an amount for the
9035 call (one insn for -Os) and then one for processing the result. */
9036 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9037
9038 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9039 do \
9040 { \
9041 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9042 if (shift_op != NULL \
9043 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9044 { \
9045 if (shift_reg) \
9046 { \
9047 if (speed_p) \
9048 *cost += extra_cost->alu.arith_shift_reg; \
9049 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9050 } \
9051 else if (speed_p) \
9052 *cost += extra_cost->alu.arith_shift; \
9053 \
9054 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9055 + rtx_cost (XEXP (x, 1 - IDX), \
9056 OP, 1, speed_p)); \
9057 return true; \
9058 } \
9059 } \
9060 while (0);
9061
9062 /* RTX costs. Make an estimate of the cost of executing the operation
9063 X, which is contained with an operation with code OUTER_CODE.
9064 SPEED_P indicates whether the cost desired is the performance cost,
9065 or the size cost. The estimate is stored in COST and the return
9066 value is TRUE if the cost calculation is final, or FALSE if the
9067 caller should recurse through the operands of X to add additional
9068 costs.
9069
9070 We currently make no attempt to model the size savings of Thumb-2
9071 16-bit instructions. At the normal points in compilation where
9072 this code is called we have no measure of whether the condition
9073 flags are live or not, and thus no realistic way to determine what
9074 the size will eventually be. */
9075 static bool
9076 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9077 const struct cpu_cost_table *extra_cost,
9078 int *cost, bool speed_p)
9079 {
9080 enum machine_mode mode = GET_MODE (x);
9081
9082 if (TARGET_THUMB1)
9083 {
9084 if (speed_p)
9085 *cost = thumb1_rtx_costs (x, code, outer_code);
9086 else
9087 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9088 return true;
9089 }
9090
9091 switch (code)
9092 {
9093 case SET:
9094 *cost = 0;
9095 if (REG_P (SET_SRC (x))
9096 && REG_P (SET_DEST (x)))
9097 {
9098 /* Assume that most copies can be done with a single insn,
9099 unless we don't have HW FP, in which case everything
9100 larger than word mode will require two insns. */
9101 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9102 && GET_MODE_SIZE (mode) > 4)
9103 || mode == DImode)
9104 ? 2 : 1);
9105 /* Conditional register moves can be encoded
9106 in 16 bits in Thumb mode. */
9107 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9108 *cost >>= 1;
9109 }
9110
9111 if (CONST_INT_P (SET_SRC (x)))
9112 {
9113 /* Handle CONST_INT here, since the value doesn't have a mode
9114 and we would otherwise be unable to work out the true cost. */
9115 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9116 mode = GET_MODE (SET_DEST (x));
9117 outer_code = SET;
9118 /* Slightly lower the cost of setting a core reg to a constant.
9119 This helps break up chains and allows for better scheduling. */
9120 if (REG_P (SET_DEST (x))
9121 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9122 *cost -= 1;
9123 x = SET_SRC (x);
9124 /* Immediate moves with an immediate in the range [0, 255] can be
9125 encoded in 16 bits in Thumb mode. */
9126 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9127 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9128 *cost >>= 1;
9129 goto const_int_cost;
9130 }
9131
9132 return false;
9133
9134 case MEM:
9135 /* A memory access costs 1 insn if the mode is small, or the address is
9136 a single register, otherwise it costs one insn per word. */
9137 if (REG_P (XEXP (x, 0)))
9138 *cost = COSTS_N_INSNS (1);
9139 else if (flag_pic
9140 && GET_CODE (XEXP (x, 0)) == PLUS
9141 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9142 /* This will be split into two instructions.
9143 See arm.md:calculate_pic_address. */
9144 *cost = COSTS_N_INSNS (2);
9145 else
9146 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9147
9148 /* For speed optimizations, add the costs of the address and
9149 accessing memory. */
9150 if (speed_p)
9151 #ifdef NOT_YET
9152 *cost += (extra_cost->ldst.load
9153 + arm_address_cost (XEXP (x, 0), mode,
9154 ADDR_SPACE_GENERIC, speed_p));
9155 #else
9156 *cost += extra_cost->ldst.load;
9157 #endif
9158 return true;
9159
9160 case PARALLEL:
9161 {
9162 /* Calculations of LDM costs are complex. We assume an initial cost
9163 (ldm_1st) which will load the number of registers mentioned in
9164 ldm_regs_per_insn_1st registers; then each additional
9165 ldm_regs_per_insn_subsequent registers cost one more insn. The
9166 formula for N regs is thus:
9167
9168 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9169 + ldm_regs_per_insn_subsequent - 1)
9170 / ldm_regs_per_insn_subsequent).
9171
9172 Additional costs may also be added for addressing. A similar
9173 formula is used for STM. */
9174
9175 bool is_ldm = load_multiple_operation (x, SImode);
9176 bool is_stm = store_multiple_operation (x, SImode);
9177
9178 *cost = COSTS_N_INSNS (1);
9179
9180 if (is_ldm || is_stm)
9181 {
9182 if (speed_p)
9183 {
9184 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9185 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9186 ? extra_cost->ldst.ldm_regs_per_insn_1st
9187 : extra_cost->ldst.stm_regs_per_insn_1st;
9188 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9189 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9190 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9191
9192 *cost += regs_per_insn_1st
9193 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9194 + regs_per_insn_sub - 1)
9195 / regs_per_insn_sub);
9196 return true;
9197 }
9198
9199 }
9200 return false;
9201 }
9202 case DIV:
9203 case UDIV:
9204 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9205 && (mode == SFmode || !TARGET_VFP_SINGLE))
9206 *cost = COSTS_N_INSNS (speed_p
9207 ? extra_cost->fp[mode != SFmode].div : 1);
9208 else if (mode == SImode && TARGET_IDIV)
9209 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9210 else
9211 *cost = LIBCALL_COST (2);
9212 return false; /* All arguments must be in registers. */
9213
9214 case MOD:
9215 case UMOD:
9216 *cost = LIBCALL_COST (2);
9217 return false; /* All arguments must be in registers. */
9218
9219 case ROTATE:
9220 if (mode == SImode && REG_P (XEXP (x, 1)))
9221 {
9222 *cost = (COSTS_N_INSNS (2)
9223 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9224 if (speed_p)
9225 *cost += extra_cost->alu.shift_reg;
9226 return true;
9227 }
9228 /* Fall through */
9229 case ROTATERT:
9230 case ASHIFT:
9231 case LSHIFTRT:
9232 case ASHIFTRT:
9233 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9234 {
9235 *cost = (COSTS_N_INSNS (3)
9236 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9237 if (speed_p)
9238 *cost += 2 * extra_cost->alu.shift;
9239 return true;
9240 }
9241 else if (mode == SImode)
9242 {
9243 *cost = (COSTS_N_INSNS (1)
9244 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9245 /* Slightly disparage register shifts at -Os, but not by much. */
9246 if (!CONST_INT_P (XEXP (x, 1)))
9247 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9248 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9249 return true;
9250 }
9251 else if (GET_MODE_CLASS (mode) == MODE_INT
9252 && GET_MODE_SIZE (mode) < 4)
9253 {
9254 if (code == ASHIFT)
9255 {
9256 *cost = (COSTS_N_INSNS (1)
9257 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9258 /* Slightly disparage register shifts at -Os, but not by
9259 much. */
9260 if (!CONST_INT_P (XEXP (x, 1)))
9261 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9262 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9263 }
9264 else if (code == LSHIFTRT || code == ASHIFTRT)
9265 {
9266 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9267 {
9268 /* Can use SBFX/UBFX. */
9269 *cost = COSTS_N_INSNS (1);
9270 if (speed_p)
9271 *cost += extra_cost->alu.bfx;
9272 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9273 }
9274 else
9275 {
9276 *cost = COSTS_N_INSNS (2);
9277 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9278 if (speed_p)
9279 {
9280 if (CONST_INT_P (XEXP (x, 1)))
9281 *cost += 2 * extra_cost->alu.shift;
9282 else
9283 *cost += (extra_cost->alu.shift
9284 + extra_cost->alu.shift_reg);
9285 }
9286 else
9287 /* Slightly disparage register shifts. */
9288 *cost += !CONST_INT_P (XEXP (x, 1));
9289 }
9290 }
9291 else /* Rotates. */
9292 {
9293 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9294 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9295 if (speed_p)
9296 {
9297 if (CONST_INT_P (XEXP (x, 1)))
9298 *cost += (2 * extra_cost->alu.shift
9299 + extra_cost->alu.log_shift);
9300 else
9301 *cost += (extra_cost->alu.shift
9302 + extra_cost->alu.shift_reg
9303 + extra_cost->alu.log_shift_reg);
9304 }
9305 }
9306 return true;
9307 }
9308
9309 *cost = LIBCALL_COST (2);
9310 return false;
9311
9312 case MINUS:
9313 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9314 && (mode == SFmode || !TARGET_VFP_SINGLE))
9315 {
9316 *cost = COSTS_N_INSNS (1);
9317 if (GET_CODE (XEXP (x, 0)) == MULT
9318 || GET_CODE (XEXP (x, 1)) == MULT)
9319 {
9320 rtx mul_op0, mul_op1, sub_op;
9321
9322 if (speed_p)
9323 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9324
9325 if (GET_CODE (XEXP (x, 0)) == MULT)
9326 {
9327 mul_op0 = XEXP (XEXP (x, 0), 0);
9328 mul_op1 = XEXP (XEXP (x, 0), 1);
9329 sub_op = XEXP (x, 1);
9330 }
9331 else
9332 {
9333 mul_op0 = XEXP (XEXP (x, 1), 0);
9334 mul_op1 = XEXP (XEXP (x, 1), 1);
9335 sub_op = XEXP (x, 0);
9336 }
9337
9338 /* The first operand of the multiply may be optionally
9339 negated. */
9340 if (GET_CODE (mul_op0) == NEG)
9341 mul_op0 = XEXP (mul_op0, 0);
9342
9343 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9344 + rtx_cost (mul_op1, code, 0, speed_p)
9345 + rtx_cost (sub_op, code, 0, speed_p));
9346
9347 return true;
9348 }
9349
9350 if (speed_p)
9351 *cost += extra_cost->fp[mode != SFmode].addsub;
9352 return false;
9353 }
9354
9355 if (mode == SImode)
9356 {
9357 rtx shift_by_reg = NULL;
9358 rtx shift_op;
9359 rtx non_shift_op;
9360
9361 *cost = COSTS_N_INSNS (1);
9362
9363 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9364 if (shift_op == NULL)
9365 {
9366 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9367 non_shift_op = XEXP (x, 0);
9368 }
9369 else
9370 non_shift_op = XEXP (x, 1);
9371
9372 if (shift_op != NULL)
9373 {
9374 if (shift_by_reg != NULL)
9375 {
9376 if (speed_p)
9377 *cost += extra_cost->alu.arith_shift_reg;
9378 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9379 }
9380 else if (speed_p)
9381 *cost += extra_cost->alu.arith_shift;
9382
9383 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9384 + rtx_cost (non_shift_op, code, 0, speed_p));
9385 return true;
9386 }
9387
9388 if (arm_arch_thumb2
9389 && GET_CODE (XEXP (x, 1)) == MULT)
9390 {
9391 /* MLS. */
9392 if (speed_p)
9393 *cost += extra_cost->mult[0].add;
9394 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9395 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9396 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9397 return true;
9398 }
9399
9400 if (CONST_INT_P (XEXP (x, 0)))
9401 {
9402 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9403 INTVAL (XEXP (x, 0)), NULL_RTX,
9404 NULL_RTX, 1, 0);
9405 *cost = COSTS_N_INSNS (insns);
9406 if (speed_p)
9407 *cost += insns * extra_cost->alu.arith;
9408 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9409 return true;
9410 }
9411
9412 return false;
9413 }
9414
9415 if (GET_MODE_CLASS (mode) == MODE_INT
9416 && GET_MODE_SIZE (mode) < 4)
9417 {
9418 rtx shift_op, shift_reg;
9419 shift_reg = NULL;
9420
9421 /* We check both sides of the MINUS for shifter operands since,
9422 unlike PLUS, it's not commutative. */
9423
9424 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9425 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9426
9427 /* Slightly disparage, as we might need to widen the result. */
9428 *cost = 1 + COSTS_N_INSNS (1);
9429 if (speed_p)
9430 *cost += extra_cost->alu.arith;
9431
9432 if (CONST_INT_P (XEXP (x, 0)))
9433 {
9434 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9435 return true;
9436 }
9437
9438 return false;
9439 }
9440
9441 if (mode == DImode)
9442 {
9443 *cost = COSTS_N_INSNS (2);
9444
9445 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9446 {
9447 rtx op1 = XEXP (x, 1);
9448
9449 if (speed_p)
9450 *cost += 2 * extra_cost->alu.arith;
9451
9452 if (GET_CODE (op1) == ZERO_EXTEND)
9453 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9454 else
9455 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9456 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9457 0, speed_p);
9458 return true;
9459 }
9460 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9461 {
9462 if (speed_p)
9463 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9464 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9465 0, speed_p)
9466 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9467 return true;
9468 }
9469 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9470 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9471 {
9472 if (speed_p)
9473 *cost += (extra_cost->alu.arith
9474 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9475 ? extra_cost->alu.arith
9476 : extra_cost->alu.arith_shift));
9477 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9478 + rtx_cost (XEXP (XEXP (x, 1), 0),
9479 GET_CODE (XEXP (x, 1)), 0, speed_p));
9480 return true;
9481 }
9482
9483 if (speed_p)
9484 *cost += 2 * extra_cost->alu.arith;
9485 return false;
9486 }
9487
9488 /* Vector mode? */
9489
9490 *cost = LIBCALL_COST (2);
9491 return false;
9492
9493 case PLUS:
9494 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9495 && (mode == SFmode || !TARGET_VFP_SINGLE))
9496 {
9497 *cost = COSTS_N_INSNS (1);
9498 if (GET_CODE (XEXP (x, 0)) == MULT)
9499 {
9500 rtx mul_op0, mul_op1, add_op;
9501
9502 if (speed_p)
9503 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9504
9505 mul_op0 = XEXP (XEXP (x, 0), 0);
9506 mul_op1 = XEXP (XEXP (x, 0), 1);
9507 add_op = XEXP (x, 1);
9508
9509 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9510 + rtx_cost (mul_op1, code, 0, speed_p)
9511 + rtx_cost (add_op, code, 0, speed_p));
9512
9513 return true;
9514 }
9515
9516 if (speed_p)
9517 *cost += extra_cost->fp[mode != SFmode].addsub;
9518 return false;
9519 }
9520 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9521 {
9522 *cost = LIBCALL_COST (2);
9523 return false;
9524 }
9525
9526 /* Narrow modes can be synthesized in SImode, but the range
9527 of useful sub-operations is limited. Check for shift operations
9528 on one of the operands. Only left shifts can be used in the
9529 narrow modes. */
9530 if (GET_MODE_CLASS (mode) == MODE_INT
9531 && GET_MODE_SIZE (mode) < 4)
9532 {
9533 rtx shift_op, shift_reg;
9534 shift_reg = NULL;
9535
9536 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9537
9538 if (CONST_INT_P (XEXP (x, 1)))
9539 {
9540 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9541 INTVAL (XEXP (x, 1)), NULL_RTX,
9542 NULL_RTX, 1, 0);
9543 *cost = COSTS_N_INSNS (insns);
9544 if (speed_p)
9545 *cost += insns * extra_cost->alu.arith;
9546 /* Slightly penalize a narrow operation as the result may
9547 need widening. */
9548 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9549 return true;
9550 }
9551
9552 /* Slightly penalize a narrow operation as the result may
9553 need widening. */
9554 *cost = 1 + COSTS_N_INSNS (1);
9555 if (speed_p)
9556 *cost += extra_cost->alu.arith;
9557
9558 return false;
9559 }
9560
9561 if (mode == SImode)
9562 {
9563 rtx shift_op, shift_reg;
9564
9565 *cost = COSTS_N_INSNS (1);
9566 if (TARGET_INT_SIMD
9567 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9568 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9569 {
9570 /* UXTA[BH] or SXTA[BH]. */
9571 if (speed_p)
9572 *cost += extra_cost->alu.extnd_arith;
9573 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9574 speed_p)
9575 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9576 return true;
9577 }
9578
9579 shift_reg = NULL;
9580 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9581 if (shift_op != NULL)
9582 {
9583 if (shift_reg)
9584 {
9585 if (speed_p)
9586 *cost += extra_cost->alu.arith_shift_reg;
9587 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9588 }
9589 else if (speed_p)
9590 *cost += extra_cost->alu.arith_shift;
9591
9592 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9593 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9594 return true;
9595 }
9596 if (GET_CODE (XEXP (x, 0)) == MULT)
9597 {
9598 rtx mul_op = XEXP (x, 0);
9599
9600 *cost = COSTS_N_INSNS (1);
9601
9602 if (TARGET_DSP_MULTIPLY
9603 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9604 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9605 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9606 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9607 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9608 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9609 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9610 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9611 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9612 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9613 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9614 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9615 == 16))))))
9616 {
9617 /* SMLA[BT][BT]. */
9618 if (speed_p)
9619 *cost += extra_cost->mult[0].extend_add;
9620 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9621 SIGN_EXTEND, 0, speed_p)
9622 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9623 SIGN_EXTEND, 0, speed_p)
9624 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9625 return true;
9626 }
9627
9628 if (speed_p)
9629 *cost += extra_cost->mult[0].add;
9630 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9631 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9632 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9633 return true;
9634 }
9635 if (CONST_INT_P (XEXP (x, 1)))
9636 {
9637 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9638 INTVAL (XEXP (x, 1)), NULL_RTX,
9639 NULL_RTX, 1, 0);
9640 *cost = COSTS_N_INSNS (insns);
9641 if (speed_p)
9642 *cost += insns * extra_cost->alu.arith;
9643 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9644 return true;
9645 }
9646 return false;
9647 }
9648
9649 if (mode == DImode)
9650 {
9651 if (arm_arch3m
9652 && GET_CODE (XEXP (x, 0)) == MULT
9653 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9654 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9655 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9656 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9657 {
9658 *cost = COSTS_N_INSNS (1);
9659 if (speed_p)
9660 *cost += extra_cost->mult[1].extend_add;
9661 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
9662 ZERO_EXTEND, 0, speed_p)
9663 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
9664 ZERO_EXTEND, 0, speed_p)
9665 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9666 return true;
9667 }
9668
9669 *cost = COSTS_N_INSNS (2);
9670
9671 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9672 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9673 {
9674 if (speed_p)
9675 *cost += (extra_cost->alu.arith
9676 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9677 ? extra_cost->alu.arith
9678 : extra_cost->alu.arith_shift));
9679
9680 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9681 speed_p)
9682 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9683 return true;
9684 }
9685
9686 if (speed_p)
9687 *cost += 2 * extra_cost->alu.arith;
9688 return false;
9689 }
9690
9691 /* Vector mode? */
9692 *cost = LIBCALL_COST (2);
9693 return false;
9694
9695 case AND: case XOR: case IOR:
9696 if (mode == SImode)
9697 {
9698 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9699 rtx op0 = XEXP (x, 0);
9700 rtx shift_op, shift_reg;
9701
9702 *cost = COSTS_N_INSNS (1);
9703
9704 if (subcode == NOT
9705 && (code == AND
9706 || (code == IOR && TARGET_THUMB2)))
9707 op0 = XEXP (op0, 0);
9708
9709 shift_reg = NULL;
9710 shift_op = shifter_op_p (op0, &shift_reg);
9711 if (shift_op != NULL)
9712 {
9713 if (shift_reg)
9714 {
9715 if (speed_p)
9716 *cost += extra_cost->alu.log_shift_reg;
9717 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9718 }
9719 else if (speed_p)
9720 *cost += extra_cost->alu.log_shift;
9721
9722 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9723 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9724 return true;
9725 }
9726
9727 if (CONST_INT_P (XEXP (x, 1)))
9728 {
9729 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9730 INTVAL (XEXP (x, 1)), NULL_RTX,
9731 NULL_RTX, 1, 0);
9732
9733 *cost = COSTS_N_INSNS (insns);
9734 if (speed_p)
9735 *cost += insns * extra_cost->alu.logical;
9736 *cost += rtx_cost (op0, code, 0, speed_p);
9737 return true;
9738 }
9739
9740 if (speed_p)
9741 *cost += extra_cost->alu.logical;
9742 *cost += (rtx_cost (op0, code, 0, speed_p)
9743 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9744 return true;
9745 }
9746
9747 if (mode == DImode)
9748 {
9749 rtx op0 = XEXP (x, 0);
9750 enum rtx_code subcode = GET_CODE (op0);
9751
9752 *cost = COSTS_N_INSNS (2);
9753
9754 if (subcode == NOT
9755 && (code == AND
9756 || (code == IOR && TARGET_THUMB2)))
9757 op0 = XEXP (op0, 0);
9758
9759 if (GET_CODE (op0) == ZERO_EXTEND)
9760 {
9761 if (speed_p)
9762 *cost += 2 * extra_cost->alu.logical;
9763
9764 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
9765 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9766 return true;
9767 }
9768 else if (GET_CODE (op0) == SIGN_EXTEND)
9769 {
9770 if (speed_p)
9771 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9772
9773 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
9774 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9775 return true;
9776 }
9777
9778 if (speed_p)
9779 *cost += 2 * extra_cost->alu.logical;
9780
9781 return true;
9782 }
9783 /* Vector mode? */
9784
9785 *cost = LIBCALL_COST (2);
9786 return false;
9787
9788 case MULT:
9789 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9790 && (mode == SFmode || !TARGET_VFP_SINGLE))
9791 {
9792 rtx op0 = XEXP (x, 0);
9793
9794 *cost = COSTS_N_INSNS (1);
9795
9796 if (GET_CODE (op0) == NEG)
9797 op0 = XEXP (op0, 0);
9798
9799 if (speed_p)
9800 *cost += extra_cost->fp[mode != SFmode].mult;
9801
9802 *cost += (rtx_cost (op0, MULT, 0, speed_p)
9803 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
9804 return true;
9805 }
9806 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9807 {
9808 *cost = LIBCALL_COST (2);
9809 return false;
9810 }
9811
9812 if (mode == SImode)
9813 {
9814 *cost = COSTS_N_INSNS (1);
9815 if (TARGET_DSP_MULTIPLY
9816 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9817 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9818 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9819 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9820 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
9821 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
9822 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9823 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
9824 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9825 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9826 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9827 && (INTVAL (XEXP (XEXP (x, 1), 1))
9828 == 16))))))
9829 {
9830 /* SMUL[TB][TB]. */
9831 if (speed_p)
9832 *cost += extra_cost->mult[0].extend;
9833 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
9834 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
9835 return true;
9836 }
9837 if (speed_p)
9838 *cost += extra_cost->mult[0].simple;
9839 return false;
9840 }
9841
9842 if (mode == DImode)
9843 {
9844 if (arm_arch3m
9845 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9846 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
9847 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9848 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
9849 {
9850 *cost = COSTS_N_INSNS (1);
9851 if (speed_p)
9852 *cost += extra_cost->mult[1].extend;
9853 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
9854 ZERO_EXTEND, 0, speed_p)
9855 + rtx_cost (XEXP (XEXP (x, 1), 0),
9856 ZERO_EXTEND, 0, speed_p));
9857 return true;
9858 }
9859
9860 *cost = LIBCALL_COST (2);
9861 return false;
9862 }
9863
9864 /* Vector mode? */
9865 *cost = LIBCALL_COST (2);
9866 return false;
9867
9868 case NEG:
9869 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9870 && (mode == SFmode || !TARGET_VFP_SINGLE))
9871 {
9872 *cost = COSTS_N_INSNS (1);
9873 if (speed_p)
9874 *cost += extra_cost->fp[mode != SFmode].neg;
9875
9876 return false;
9877 }
9878 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9879 {
9880 *cost = LIBCALL_COST (1);
9881 return false;
9882 }
9883
9884 if (mode == SImode)
9885 {
9886 if (GET_CODE (XEXP (x, 0)) == ABS)
9887 {
9888 *cost = COSTS_N_INSNS (2);
9889 /* Assume the non-flag-changing variant. */
9890 if (speed_p)
9891 *cost += (extra_cost->alu.log_shift
9892 + extra_cost->alu.arith_shift);
9893 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
9894 return true;
9895 }
9896
9897 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
9898 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
9899 {
9900 *cost = COSTS_N_INSNS (2);
9901 /* No extra cost for MOV imm and MVN imm. */
9902 /* If the comparison op is using the flags, there's no further
9903 cost, otherwise we need to add the cost of the comparison. */
9904 if (!(REG_P (XEXP (XEXP (x, 0), 0))
9905 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
9906 && XEXP (XEXP (x, 0), 1) == const0_rtx))
9907 {
9908 *cost += (COSTS_N_INSNS (1)
9909 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
9910 speed_p)
9911 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
9912 speed_p));
9913 if (speed_p)
9914 *cost += extra_cost->alu.arith;
9915 }
9916 return true;
9917 }
9918 *cost = COSTS_N_INSNS (1);
9919 if (speed_p)
9920 *cost += extra_cost->alu.arith;
9921 return false;
9922 }
9923
9924 if (GET_MODE_CLASS (mode) == MODE_INT
9925 && GET_MODE_SIZE (mode) < 4)
9926 {
9927 /* Slightly disparage, as we might need an extend operation. */
9928 *cost = 1 + COSTS_N_INSNS (1);
9929 if (speed_p)
9930 *cost += extra_cost->alu.arith;
9931 return false;
9932 }
9933
9934 if (mode == DImode)
9935 {
9936 *cost = COSTS_N_INSNS (2);
9937 if (speed_p)
9938 *cost += 2 * extra_cost->alu.arith;
9939 return false;
9940 }
9941
9942 /* Vector mode? */
9943 *cost = LIBCALL_COST (1);
9944 return false;
9945
9946 case NOT:
9947 if (mode == SImode)
9948 {
9949 rtx shift_op;
9950 rtx shift_reg = NULL;
9951
9952 *cost = COSTS_N_INSNS (1);
9953 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9954
9955 if (shift_op)
9956 {
9957 if (shift_reg != NULL)
9958 {
9959 if (speed_p)
9960 *cost += extra_cost->alu.log_shift_reg;
9961 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9962 }
9963 else if (speed_p)
9964 *cost += extra_cost->alu.log_shift;
9965 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
9966 return true;
9967 }
9968
9969 if (speed_p)
9970 *cost += extra_cost->alu.logical;
9971 return false;
9972 }
9973 if (mode == DImode)
9974 {
9975 *cost = COSTS_N_INSNS (2);
9976 return false;
9977 }
9978
9979 /* Vector mode? */
9980
9981 *cost += LIBCALL_COST (1);
9982 return false;
9983
9984 case IF_THEN_ELSE:
9985 {
9986 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9987 {
9988 *cost = COSTS_N_INSNS (4);
9989 return true;
9990 }
9991 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
9992 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
9993
9994 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
9995 /* Assume that if one arm of the if_then_else is a register,
9996 that it will be tied with the result and eliminate the
9997 conditional insn. */
9998 if (REG_P (XEXP (x, 1)))
9999 *cost += op2cost;
10000 else if (REG_P (XEXP (x, 2)))
10001 *cost += op1cost;
10002 else
10003 {
10004 if (speed_p)
10005 {
10006 if (extra_cost->alu.non_exec_costs_exec)
10007 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10008 else
10009 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10010 }
10011 else
10012 *cost += op1cost + op2cost;
10013 }
10014 }
10015 return true;
10016
10017 case COMPARE:
10018 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10019 *cost = 0;
10020 else
10021 {
10022 enum machine_mode op0mode;
10023 /* We'll mostly assume that the cost of a compare is the cost of the
10024 LHS. However, there are some notable exceptions. */
10025
10026 /* Floating point compares are never done as side-effects. */
10027 op0mode = GET_MODE (XEXP (x, 0));
10028 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10029 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10030 {
10031 *cost = COSTS_N_INSNS (1);
10032 if (speed_p)
10033 *cost += extra_cost->fp[op0mode != SFmode].compare;
10034
10035 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10036 {
10037 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10038 return true;
10039 }
10040
10041 return false;
10042 }
10043 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10044 {
10045 *cost = LIBCALL_COST (2);
10046 return false;
10047 }
10048
10049 /* DImode compares normally take two insns. */
10050 if (op0mode == DImode)
10051 {
10052 *cost = COSTS_N_INSNS (2);
10053 if (speed_p)
10054 *cost += 2 * extra_cost->alu.arith;
10055 return false;
10056 }
10057
10058 if (op0mode == SImode)
10059 {
10060 rtx shift_op;
10061 rtx shift_reg;
10062
10063 if (XEXP (x, 1) == const0_rtx
10064 && !(REG_P (XEXP (x, 0))
10065 || (GET_CODE (XEXP (x, 0)) == SUBREG
10066 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10067 {
10068 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10069
10070 /* Multiply operations that set the flags are often
10071 significantly more expensive. */
10072 if (speed_p
10073 && GET_CODE (XEXP (x, 0)) == MULT
10074 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10075 *cost += extra_cost->mult[0].flag_setting;
10076
10077 if (speed_p
10078 && GET_CODE (XEXP (x, 0)) == PLUS
10079 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10080 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10081 0), 1), mode))
10082 *cost += extra_cost->mult[0].flag_setting;
10083 return true;
10084 }
10085
10086 shift_reg = NULL;
10087 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10088 if (shift_op != NULL)
10089 {
10090 *cost = COSTS_N_INSNS (1);
10091 if (shift_reg != NULL)
10092 {
10093 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10094 if (speed_p)
10095 *cost += extra_cost->alu.arith_shift_reg;
10096 }
10097 else if (speed_p)
10098 *cost += extra_cost->alu.arith_shift;
10099 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10100 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10101 return true;
10102 }
10103
10104 *cost = COSTS_N_INSNS (1);
10105 if (speed_p)
10106 *cost += extra_cost->alu.arith;
10107 if (CONST_INT_P (XEXP (x, 1))
10108 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10109 {
10110 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10111 return true;
10112 }
10113 return false;
10114 }
10115
10116 /* Vector mode? */
10117
10118 *cost = LIBCALL_COST (2);
10119 return false;
10120 }
10121 return true;
10122
10123 case EQ:
10124 case NE:
10125 case LT:
10126 case LE:
10127 case GT:
10128 case GE:
10129 case LTU:
10130 case LEU:
10131 case GEU:
10132 case GTU:
10133 case ORDERED:
10134 case UNORDERED:
10135 case UNEQ:
10136 case UNLE:
10137 case UNLT:
10138 case UNGE:
10139 case UNGT:
10140 case LTGT:
10141 if (outer_code == SET)
10142 {
10143 /* Is it a store-flag operation? */
10144 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10145 && XEXP (x, 1) == const0_rtx)
10146 {
10147 /* Thumb also needs an IT insn. */
10148 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10149 return true;
10150 }
10151 if (XEXP (x, 1) == const0_rtx)
10152 {
10153 switch (code)
10154 {
10155 case LT:
10156 /* LSR Rd, Rn, #31. */
10157 *cost = COSTS_N_INSNS (1);
10158 if (speed_p)
10159 *cost += extra_cost->alu.shift;
10160 break;
10161
10162 case EQ:
10163 /* RSBS T1, Rn, #0
10164 ADC Rd, Rn, T1. */
10165
10166 case NE:
10167 /* SUBS T1, Rn, #1
10168 SBC Rd, Rn, T1. */
10169 *cost = COSTS_N_INSNS (2);
10170 break;
10171
10172 case LE:
10173 /* RSBS T1, Rn, Rn, LSR #31
10174 ADC Rd, Rn, T1. */
10175 *cost = COSTS_N_INSNS (2);
10176 if (speed_p)
10177 *cost += extra_cost->alu.arith_shift;
10178 break;
10179
10180 case GT:
10181 /* RSB Rd, Rn, Rn, ASR #1
10182 LSR Rd, Rd, #31. */
10183 *cost = COSTS_N_INSNS (2);
10184 if (speed_p)
10185 *cost += (extra_cost->alu.arith_shift
10186 + extra_cost->alu.shift);
10187 break;
10188
10189 case GE:
10190 /* ASR Rd, Rn, #31
10191 ADD Rd, Rn, #1. */
10192 *cost = COSTS_N_INSNS (2);
10193 if (speed_p)
10194 *cost += extra_cost->alu.shift;
10195 break;
10196
10197 default:
10198 /* Remaining cases are either meaningless or would take
10199 three insns anyway. */
10200 *cost = COSTS_N_INSNS (3);
10201 break;
10202 }
10203 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10204 return true;
10205 }
10206 else
10207 {
10208 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10209 if (CONST_INT_P (XEXP (x, 1))
10210 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10211 {
10212 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10213 return true;
10214 }
10215
10216 return false;
10217 }
10218 }
10219 /* Not directly inside a set. If it involves the condition code
10220 register it must be the condition for a branch, cond_exec or
10221 I_T_E operation. Since the comparison is performed elsewhere
10222 this is just the control part which has no additional
10223 cost. */
10224 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10225 && XEXP (x, 1) == const0_rtx)
10226 {
10227 *cost = 0;
10228 return true;
10229 }
10230 return false;
10231
10232 case ABS:
10233 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10234 && (mode == SFmode || !TARGET_VFP_SINGLE))
10235 {
10236 *cost = COSTS_N_INSNS (1);
10237 if (speed_p)
10238 *cost += extra_cost->fp[mode != SFmode].neg;
10239
10240 return false;
10241 }
10242 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10243 {
10244 *cost = LIBCALL_COST (1);
10245 return false;
10246 }
10247
10248 if (mode == SImode)
10249 {
10250 *cost = COSTS_N_INSNS (1);
10251 if (speed_p)
10252 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10253 return false;
10254 }
10255 /* Vector mode? */
10256 *cost = LIBCALL_COST (1);
10257 return false;
10258
10259 case SIGN_EXTEND:
10260 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10261 && MEM_P (XEXP (x, 0)))
10262 {
10263 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10264
10265 if (mode == DImode)
10266 *cost += COSTS_N_INSNS (1);
10267
10268 if (!speed_p)
10269 return true;
10270
10271 if (GET_MODE (XEXP (x, 0)) == SImode)
10272 *cost += extra_cost->ldst.load;
10273 else
10274 *cost += extra_cost->ldst.load_sign_extend;
10275
10276 if (mode == DImode)
10277 *cost += extra_cost->alu.shift;
10278
10279 return true;
10280 }
10281
10282 /* Widening from less than 32-bits requires an extend operation. */
10283 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10284 {
10285 /* We have SXTB/SXTH. */
10286 *cost = COSTS_N_INSNS (1);
10287 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10288 if (speed_p)
10289 *cost += extra_cost->alu.extnd;
10290 }
10291 else if (GET_MODE (XEXP (x, 0)) != SImode)
10292 {
10293 /* Needs two shifts. */
10294 *cost = COSTS_N_INSNS (2);
10295 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10296 if (speed_p)
10297 *cost += 2 * extra_cost->alu.shift;
10298 }
10299
10300 /* Widening beyond 32-bits requires one more insn. */
10301 if (mode == DImode)
10302 {
10303 *cost += COSTS_N_INSNS (1);
10304 if (speed_p)
10305 *cost += extra_cost->alu.shift;
10306 }
10307
10308 return true;
10309
10310 case ZERO_EXTEND:
10311 if ((arm_arch4
10312 || GET_MODE (XEXP (x, 0)) == SImode
10313 || GET_MODE (XEXP (x, 0)) == QImode)
10314 && MEM_P (XEXP (x, 0)))
10315 {
10316 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10317
10318 if (mode == DImode)
10319 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10320
10321 return true;
10322 }
10323
10324 /* Widening from less than 32-bits requires an extend operation. */
10325 if (GET_MODE (XEXP (x, 0)) == QImode)
10326 {
10327 /* UXTB can be a shorter instruction in Thumb2, but it might
10328 be slower than the AND Rd, Rn, #255 alternative. When
10329 optimizing for speed it should never be slower to use
10330 AND, and we don't really model 16-bit vs 32-bit insns
10331 here. */
10332 *cost = COSTS_N_INSNS (1);
10333 if (speed_p)
10334 *cost += extra_cost->alu.logical;
10335 }
10336 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10337 {
10338 /* We have UXTB/UXTH. */
10339 *cost = COSTS_N_INSNS (1);
10340 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10341 if (speed_p)
10342 *cost += extra_cost->alu.extnd;
10343 }
10344 else if (GET_MODE (XEXP (x, 0)) != SImode)
10345 {
10346 /* Needs two shifts. It's marginally preferable to use
10347 shifts rather than two BIC instructions as the second
10348 shift may merge with a subsequent insn as a shifter
10349 op. */
10350 *cost = COSTS_N_INSNS (2);
10351 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10352 if (speed_p)
10353 *cost += 2 * extra_cost->alu.shift;
10354 }
10355 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10356 *cost = COSTS_N_INSNS (1);
10357
10358 /* Widening beyond 32-bits requires one more insn. */
10359 if (mode == DImode)
10360 {
10361 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10362 }
10363
10364 return true;
10365
10366 case CONST_INT:
10367 *cost = 0;
10368 /* CONST_INT has no mode, so we cannot tell for sure how many
10369 insns are really going to be needed. The best we can do is
10370 look at the value passed. If it fits in SImode, then assume
10371 that's the mode it will be used for. Otherwise assume it
10372 will be used in DImode. */
10373 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10374 mode = SImode;
10375 else
10376 mode = DImode;
10377
10378 /* Avoid blowing up in arm_gen_constant (). */
10379 if (!(outer_code == PLUS
10380 || outer_code == AND
10381 || outer_code == IOR
10382 || outer_code == XOR
10383 || outer_code == MINUS))
10384 outer_code = SET;
10385
10386 const_int_cost:
10387 if (mode == SImode)
10388 {
10389 *cost += 0;
10390 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10391 INTVAL (x), NULL, NULL,
10392 0, 0));
10393 /* Extra costs? */
10394 }
10395 else
10396 {
10397 *cost += COSTS_N_INSNS (arm_gen_constant
10398 (outer_code, SImode, NULL,
10399 trunc_int_for_mode (INTVAL (x), SImode),
10400 NULL, NULL, 0, 0)
10401 + arm_gen_constant (outer_code, SImode, NULL,
10402 INTVAL (x) >> 32, NULL,
10403 NULL, 0, 0));
10404 /* Extra costs? */
10405 }
10406
10407 return true;
10408
10409 case CONST:
10410 case LABEL_REF:
10411 case SYMBOL_REF:
10412 if (speed_p)
10413 {
10414 if (arm_arch_thumb2 && !flag_pic)
10415 *cost = COSTS_N_INSNS (2);
10416 else
10417 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10418 }
10419 else
10420 *cost = COSTS_N_INSNS (2);
10421
10422 if (flag_pic)
10423 {
10424 *cost += COSTS_N_INSNS (1);
10425 if (speed_p)
10426 *cost += extra_cost->alu.arith;
10427 }
10428
10429 return true;
10430
10431 case CONST_FIXED:
10432 *cost = COSTS_N_INSNS (4);
10433 /* Fixme. */
10434 return true;
10435
10436 case CONST_DOUBLE:
10437 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10438 && (mode == SFmode || !TARGET_VFP_SINGLE))
10439 {
10440 if (vfp3_const_double_rtx (x))
10441 {
10442 *cost = COSTS_N_INSNS (1);
10443 if (speed_p)
10444 *cost += extra_cost->fp[mode == DFmode].fpconst;
10445 return true;
10446 }
10447
10448 if (speed_p)
10449 {
10450 *cost = COSTS_N_INSNS (1);
10451 if (mode == DFmode)
10452 *cost += extra_cost->ldst.loadd;
10453 else
10454 *cost += extra_cost->ldst.loadf;
10455 }
10456 else
10457 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10458
10459 return true;
10460 }
10461 *cost = COSTS_N_INSNS (4);
10462 return true;
10463
10464 case CONST_VECTOR:
10465 /* Fixme. */
10466 if (TARGET_NEON
10467 && TARGET_HARD_FLOAT
10468 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10469 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10470 *cost = COSTS_N_INSNS (1);
10471 else
10472 *cost = COSTS_N_INSNS (4);
10473 return true;
10474
10475 case HIGH:
10476 case LO_SUM:
10477 *cost = COSTS_N_INSNS (1);
10478 /* When optimizing for size, we prefer constant pool entries to
10479 MOVW/MOVT pairs, so bump the cost of these slightly. */
10480 if (!speed_p)
10481 *cost += 1;
10482 return true;
10483
10484 case CLZ:
10485 *cost = COSTS_N_INSNS (1);
10486 if (speed_p)
10487 *cost += extra_cost->alu.clz;
10488 return false;
10489
10490 case SMIN:
10491 if (XEXP (x, 1) == const0_rtx)
10492 {
10493 *cost = COSTS_N_INSNS (1);
10494 if (speed_p)
10495 *cost += extra_cost->alu.log_shift;
10496 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10497 return true;
10498 }
10499 /* Fall through. */
10500 case SMAX:
10501 case UMIN:
10502 case UMAX:
10503 *cost = COSTS_N_INSNS (2);
10504 return false;
10505
10506 case TRUNCATE:
10507 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10508 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10509 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10510 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10511 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10512 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10513 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10514 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10515 == ZERO_EXTEND))))
10516 {
10517 *cost = COSTS_N_INSNS (1);
10518 if (speed_p)
10519 *cost += extra_cost->mult[1].extend;
10520 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10521 speed_p)
10522 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10523 0, speed_p));
10524 return true;
10525 }
10526 *cost = LIBCALL_COST (1);
10527 return false;
10528
10529 case UNSPEC:
10530 return arm_unspec_cost (x, outer_code, speed_p, cost);
10531
10532 case PC:
10533 /* Reading the PC is like reading any other register. Writing it
10534 is more expensive, but we take that into account elsewhere. */
10535 *cost = 0;
10536 return true;
10537
10538 case ZERO_EXTRACT:
10539 /* TODO: Simple zero_extract of bottom bits using AND. */
10540 /* Fall through. */
10541 case SIGN_EXTRACT:
10542 if (arm_arch6
10543 && mode == SImode
10544 && CONST_INT_P (XEXP (x, 1))
10545 && CONST_INT_P (XEXP (x, 2)))
10546 {
10547 *cost = COSTS_N_INSNS (1);
10548 if (speed_p)
10549 *cost += extra_cost->alu.bfx;
10550 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10551 return true;
10552 }
10553 /* Without UBFX/SBFX, need to resort to shift operations. */
10554 *cost = COSTS_N_INSNS (2);
10555 if (speed_p)
10556 *cost += 2 * extra_cost->alu.shift;
10557 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10558 return true;
10559
10560 case FLOAT_EXTEND:
10561 if (TARGET_HARD_FLOAT)
10562 {
10563 *cost = COSTS_N_INSNS (1);
10564 if (speed_p)
10565 *cost += extra_cost->fp[mode == DFmode].widen;
10566 if (!TARGET_FPU_ARMV8
10567 && GET_MODE (XEXP (x, 0)) == HFmode)
10568 {
10569 /* Pre v8, widening HF->DF is a two-step process, first
10570 widening to SFmode. */
10571 *cost += COSTS_N_INSNS (1);
10572 if (speed_p)
10573 *cost += extra_cost->fp[0].widen;
10574 }
10575 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10576 return true;
10577 }
10578
10579 *cost = LIBCALL_COST (1);
10580 return false;
10581
10582 case FLOAT_TRUNCATE:
10583 if (TARGET_HARD_FLOAT)
10584 {
10585 *cost = COSTS_N_INSNS (1);
10586 if (speed_p)
10587 *cost += extra_cost->fp[mode == DFmode].narrow;
10588 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10589 return true;
10590 /* Vector modes? */
10591 }
10592 *cost = LIBCALL_COST (1);
10593 return false;
10594
10595 case FIX:
10596 case UNSIGNED_FIX:
10597 if (TARGET_HARD_FLOAT)
10598 {
10599 if (GET_MODE_CLASS (mode) == MODE_INT)
10600 {
10601 *cost = COSTS_N_INSNS (1);
10602 if (speed_p)
10603 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10604 /* Strip of the 'cost' of rounding towards zero. */
10605 if (GET_CODE (XEXP (x, 0)) == FIX)
10606 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10607 else
10608 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10609 /* ??? Increase the cost to deal with transferring from
10610 FP -> CORE registers? */
10611 return true;
10612 }
10613 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10614 && TARGET_FPU_ARMV8)
10615 {
10616 *cost = COSTS_N_INSNS (1);
10617 if (speed_p)
10618 *cost += extra_cost->fp[mode == DFmode].roundint;
10619 return false;
10620 }
10621 /* Vector costs? */
10622 }
10623 *cost = LIBCALL_COST (1);
10624 return false;
10625
10626 case FLOAT:
10627 case UNSIGNED_FLOAT:
10628 if (TARGET_HARD_FLOAT)
10629 {
10630 /* ??? Increase the cost to deal with transferring from CORE
10631 -> FP registers? */
10632 *cost = COSTS_N_INSNS (1);
10633 if (speed_p)
10634 *cost += extra_cost->fp[mode == DFmode].fromint;
10635 return false;
10636 }
10637 *cost = LIBCALL_COST (1);
10638 return false;
10639
10640 case CALL:
10641 *cost = COSTS_N_INSNS (1);
10642 return true;
10643
10644 case ASM_OPERANDS:
10645 /* Just a guess. Cost one insn per input. */
10646 *cost = COSTS_N_INSNS (ASM_OPERANDS_INPUT_LENGTH (x));
10647 return true;
10648
10649 default:
10650 if (mode != VOIDmode)
10651 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10652 else
10653 *cost = COSTS_N_INSNS (4); /* Who knows? */
10654 return false;
10655 }
10656 }
10657
10658 #undef HANDLE_NARROW_SHIFT_ARITH
10659
10660 /* RTX costs when optimizing for size. */
10661 static bool
10662 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10663 int *total, bool speed)
10664 {
10665 bool result;
10666
10667 if (TARGET_OLD_RTX_COSTS
10668 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
10669 {
10670 /* Old way. (Deprecated.) */
10671 if (!speed)
10672 result = arm_size_rtx_costs (x, (enum rtx_code) code,
10673 (enum rtx_code) outer_code, total);
10674 else
10675 result = current_tune->rtx_costs (x, (enum rtx_code) code,
10676 (enum rtx_code) outer_code, total,
10677 speed);
10678 }
10679 else
10680 {
10681 /* New way. */
10682 if (current_tune->insn_extra_cost)
10683 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10684 (enum rtx_code) outer_code,
10685 current_tune->insn_extra_cost,
10686 total, speed);
10687 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
10688 && current_tune->insn_extra_cost != NULL */
10689 else
10690 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10691 (enum rtx_code) outer_code,
10692 &generic_extra_costs, total, speed);
10693 }
10694
10695 if (dump_file && (dump_flags & TDF_DETAILS))
10696 {
10697 print_rtl_single (dump_file, x);
10698 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10699 *total, result ? "final" : "partial");
10700 }
10701 return result;
10702 }
10703
10704 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
10705 supported on any "slowmul" cores, so it can be ignored. */
10706
10707 static bool
10708 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10709 int *total, bool speed)
10710 {
10711 enum machine_mode mode = GET_MODE (x);
10712
10713 if (TARGET_THUMB)
10714 {
10715 *total = thumb1_rtx_costs (x, code, outer_code);
10716 return true;
10717 }
10718
10719 switch (code)
10720 {
10721 case MULT:
10722 if (GET_MODE_CLASS (mode) == MODE_FLOAT
10723 || mode == DImode)
10724 {
10725 *total = COSTS_N_INSNS (20);
10726 return false;
10727 }
10728
10729 if (CONST_INT_P (XEXP (x, 1)))
10730 {
10731 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
10732 & (unsigned HOST_WIDE_INT) 0xffffffff);
10733 int cost, const_ok = const_ok_for_arm (i);
10734 int j, booth_unit_size;
10735
10736 /* Tune as appropriate. */
10737 cost = const_ok ? 4 : 8;
10738 booth_unit_size = 2;
10739 for (j = 0; i && j < 32; j += booth_unit_size)
10740 {
10741 i >>= booth_unit_size;
10742 cost++;
10743 }
10744
10745 *total = COSTS_N_INSNS (cost);
10746 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
10747 return true;
10748 }
10749
10750 *total = COSTS_N_INSNS (20);
10751 return false;
10752
10753 default:
10754 return arm_rtx_costs_1 (x, outer_code, total, speed);;
10755 }
10756 }
10757
10758
10759 /* RTX cost for cores with a fast multiply unit (M variants). */
10760
10761 static bool
10762 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10763 int *total, bool speed)
10764 {
10765 enum machine_mode mode = GET_MODE (x);
10766
10767 if (TARGET_THUMB1)
10768 {
10769 *total = thumb1_rtx_costs (x, code, outer_code);
10770 return true;
10771 }
10772
10773 /* ??? should thumb2 use different costs? */
10774 switch (code)
10775 {
10776 case MULT:
10777 /* There is no point basing this on the tuning, since it is always the
10778 fast variant if it exists at all. */
10779 if (mode == DImode
10780 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
10781 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10782 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10783 {
10784 *total = COSTS_N_INSNS(2);
10785 return false;
10786 }
10787
10788
10789 if (mode == DImode)
10790 {
10791 *total = COSTS_N_INSNS (5);
10792 return false;
10793 }
10794
10795 if (CONST_INT_P (XEXP (x, 1)))
10796 {
10797 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
10798 & (unsigned HOST_WIDE_INT) 0xffffffff);
10799 int cost, const_ok = const_ok_for_arm (i);
10800 int j, booth_unit_size;
10801
10802 /* Tune as appropriate. */
10803 cost = const_ok ? 4 : 8;
10804 booth_unit_size = 8;
10805 for (j = 0; i && j < 32; j += booth_unit_size)
10806 {
10807 i >>= booth_unit_size;
10808 cost++;
10809 }
10810
10811 *total = COSTS_N_INSNS(cost);
10812 return false;
10813 }
10814
10815 if (mode == SImode)
10816 {
10817 *total = COSTS_N_INSNS (4);
10818 return false;
10819 }
10820
10821 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10822 {
10823 if (TARGET_HARD_FLOAT
10824 && (mode == SFmode
10825 || (mode == DFmode && !TARGET_VFP_SINGLE)))
10826 {
10827 *total = COSTS_N_INSNS (1);
10828 return false;
10829 }
10830 }
10831
10832 /* Requires a lib call */
10833 *total = COSTS_N_INSNS (20);
10834 return false;
10835
10836 default:
10837 return arm_rtx_costs_1 (x, outer_code, total, speed);
10838 }
10839 }
10840
10841
10842 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
10843 so it can be ignored. */
10844
10845 static bool
10846 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10847 int *total, bool speed)
10848 {
10849 enum machine_mode mode = GET_MODE (x);
10850
10851 if (TARGET_THUMB)
10852 {
10853 *total = thumb1_rtx_costs (x, code, outer_code);
10854 return true;
10855 }
10856
10857 switch (code)
10858 {
10859 case COMPARE:
10860 if (GET_CODE (XEXP (x, 0)) != MULT)
10861 return arm_rtx_costs_1 (x, outer_code, total, speed);
10862
10863 /* A COMPARE of a MULT is slow on XScale; the muls instruction
10864 will stall until the multiplication is complete. */
10865 *total = COSTS_N_INSNS (3);
10866 return false;
10867
10868 case MULT:
10869 /* There is no point basing this on the tuning, since it is always the
10870 fast variant if it exists at all. */
10871 if (mode == DImode
10872 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
10873 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10874 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10875 {
10876 *total = COSTS_N_INSNS (2);
10877 return false;
10878 }
10879
10880
10881 if (mode == DImode)
10882 {
10883 *total = COSTS_N_INSNS (5);
10884 return false;
10885 }
10886
10887 if (CONST_INT_P (XEXP (x, 1)))
10888 {
10889 /* If operand 1 is a constant we can more accurately
10890 calculate the cost of the multiply. The multiplier can
10891 retire 15 bits on the first cycle and a further 12 on the
10892 second. We do, of course, have to load the constant into
10893 a register first. */
10894 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
10895 /* There's a general overhead of one cycle. */
10896 int cost = 1;
10897 unsigned HOST_WIDE_INT masked_const;
10898
10899 if (i & 0x80000000)
10900 i = ~i;
10901
10902 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
10903
10904 masked_const = i & 0xffff8000;
10905 if (masked_const != 0)
10906 {
10907 cost++;
10908 masked_const = i & 0xf8000000;
10909 if (masked_const != 0)
10910 cost++;
10911 }
10912 *total = COSTS_N_INSNS (cost);
10913 return false;
10914 }
10915
10916 if (mode == SImode)
10917 {
10918 *total = COSTS_N_INSNS (3);
10919 return false;
10920 }
10921
10922 /* Requires a lib call */
10923 *total = COSTS_N_INSNS (20);
10924 return false;
10925
10926 default:
10927 return arm_rtx_costs_1 (x, outer_code, total, speed);
10928 }
10929 }
10930
10931
10932 /* RTX costs for 9e (and later) cores. */
10933
10934 static bool
10935 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10936 int *total, bool speed)
10937 {
10938 enum machine_mode mode = GET_MODE (x);
10939
10940 if (TARGET_THUMB1)
10941 {
10942 switch (code)
10943 {
10944 case MULT:
10945 *total = COSTS_N_INSNS (3);
10946 return true;
10947
10948 default:
10949 *total = thumb1_rtx_costs (x, code, outer_code);
10950 return true;
10951 }
10952 }
10953
10954 switch (code)
10955 {
10956 case MULT:
10957 /* There is no point basing this on the tuning, since it is always the
10958 fast variant if it exists at all. */
10959 if (mode == DImode
10960 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
10961 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10962 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10963 {
10964 *total = COSTS_N_INSNS (2);
10965 return false;
10966 }
10967
10968
10969 if (mode == DImode)
10970 {
10971 *total = COSTS_N_INSNS (5);
10972 return false;
10973 }
10974
10975 if (mode == SImode)
10976 {
10977 *total = COSTS_N_INSNS (2);
10978 return false;
10979 }
10980
10981 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10982 {
10983 if (TARGET_HARD_FLOAT
10984 && (mode == SFmode
10985 || (mode == DFmode && !TARGET_VFP_SINGLE)))
10986 {
10987 *total = COSTS_N_INSNS (1);
10988 return false;
10989 }
10990 }
10991
10992 *total = COSTS_N_INSNS (20);
10993 return false;
10994
10995 default:
10996 return arm_rtx_costs_1 (x, outer_code, total, speed);
10997 }
10998 }
10999 /* All address computations that can be done are free, but rtx cost returns
11000 the same for practically all of them. So we weight the different types
11001 of address here in the order (most pref first):
11002 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11003 static inline int
11004 arm_arm_address_cost (rtx x)
11005 {
11006 enum rtx_code c = GET_CODE (x);
11007
11008 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11009 return 0;
11010 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11011 return 10;
11012
11013 if (c == PLUS)
11014 {
11015 if (CONST_INT_P (XEXP (x, 1)))
11016 return 2;
11017
11018 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11019 return 3;
11020
11021 return 4;
11022 }
11023
11024 return 6;
11025 }
11026
11027 static inline int
11028 arm_thumb_address_cost (rtx x)
11029 {
11030 enum rtx_code c = GET_CODE (x);
11031
11032 if (c == REG)
11033 return 1;
11034 if (c == PLUS
11035 && REG_P (XEXP (x, 0))
11036 && CONST_INT_P (XEXP (x, 1)))
11037 return 1;
11038
11039 return 2;
11040 }
11041
11042 static int
11043 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
11044 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11045 {
11046 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11047 }
11048
11049 /* Adjust cost hook for XScale. */
11050 static bool
11051 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11052 {
11053 /* Some true dependencies can have a higher cost depending
11054 on precisely how certain input operands are used. */
11055 if (REG_NOTE_KIND(link) == 0
11056 && recog_memoized (insn) >= 0
11057 && recog_memoized (dep) >= 0)
11058 {
11059 int shift_opnum = get_attr_shift (insn);
11060 enum attr_type attr_type = get_attr_type (dep);
11061
11062 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11063 operand for INSN. If we have a shifted input operand and the
11064 instruction we depend on is another ALU instruction, then we may
11065 have to account for an additional stall. */
11066 if (shift_opnum != 0
11067 && (attr_type == TYPE_ALU_SHIFT_IMM
11068 || attr_type == TYPE_ALUS_SHIFT_IMM
11069 || attr_type == TYPE_LOGIC_SHIFT_IMM
11070 || attr_type == TYPE_LOGICS_SHIFT_IMM
11071 || attr_type == TYPE_ALU_SHIFT_REG
11072 || attr_type == TYPE_ALUS_SHIFT_REG
11073 || attr_type == TYPE_LOGIC_SHIFT_REG
11074 || attr_type == TYPE_LOGICS_SHIFT_REG
11075 || attr_type == TYPE_MOV_SHIFT
11076 || attr_type == TYPE_MVN_SHIFT
11077 || attr_type == TYPE_MOV_SHIFT_REG
11078 || attr_type == TYPE_MVN_SHIFT_REG))
11079 {
11080 rtx shifted_operand;
11081 int opno;
11082
11083 /* Get the shifted operand. */
11084 extract_insn (insn);
11085 shifted_operand = recog_data.operand[shift_opnum];
11086
11087 /* Iterate over all the operands in DEP. If we write an operand
11088 that overlaps with SHIFTED_OPERAND, then we have increase the
11089 cost of this dependency. */
11090 extract_insn (dep);
11091 preprocess_constraints ();
11092 for (opno = 0; opno < recog_data.n_operands; opno++)
11093 {
11094 /* We can ignore strict inputs. */
11095 if (recog_data.operand_type[opno] == OP_IN)
11096 continue;
11097
11098 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11099 shifted_operand))
11100 {
11101 *cost = 2;
11102 return false;
11103 }
11104 }
11105 }
11106 }
11107 return true;
11108 }
11109
11110 /* Adjust cost hook for Cortex A9. */
11111 static bool
11112 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11113 {
11114 switch (REG_NOTE_KIND (link))
11115 {
11116 case REG_DEP_ANTI:
11117 *cost = 0;
11118 return false;
11119
11120 case REG_DEP_TRUE:
11121 case REG_DEP_OUTPUT:
11122 if (recog_memoized (insn) >= 0
11123 && recog_memoized (dep) >= 0)
11124 {
11125 if (GET_CODE (PATTERN (insn)) == SET)
11126 {
11127 if (GET_MODE_CLASS
11128 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11129 || GET_MODE_CLASS
11130 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11131 {
11132 enum attr_type attr_type_insn = get_attr_type (insn);
11133 enum attr_type attr_type_dep = get_attr_type (dep);
11134
11135 /* By default all dependencies of the form
11136 s0 = s0 <op> s1
11137 s0 = s0 <op> s2
11138 have an extra latency of 1 cycle because
11139 of the input and output dependency in this
11140 case. However this gets modeled as an true
11141 dependency and hence all these checks. */
11142 if (REG_P (SET_DEST (PATTERN (insn)))
11143 && REG_P (SET_DEST (PATTERN (dep)))
11144 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11145 SET_DEST (PATTERN (dep))))
11146 {
11147 /* FMACS is a special case where the dependent
11148 instruction can be issued 3 cycles before
11149 the normal latency in case of an output
11150 dependency. */
11151 if ((attr_type_insn == TYPE_FMACS
11152 || attr_type_insn == TYPE_FMACD)
11153 && (attr_type_dep == TYPE_FMACS
11154 || attr_type_dep == TYPE_FMACD))
11155 {
11156 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11157 *cost = insn_default_latency (dep) - 3;
11158 else
11159 *cost = insn_default_latency (dep);
11160 return false;
11161 }
11162 else
11163 {
11164 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11165 *cost = insn_default_latency (dep) + 1;
11166 else
11167 *cost = insn_default_latency (dep);
11168 }
11169 return false;
11170 }
11171 }
11172 }
11173 }
11174 break;
11175
11176 default:
11177 gcc_unreachable ();
11178 }
11179
11180 return true;
11181 }
11182
11183 /* Adjust cost hook for FA726TE. */
11184 static bool
11185 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11186 {
11187 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11188 have penalty of 3. */
11189 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11190 && recog_memoized (insn) >= 0
11191 && recog_memoized (dep) >= 0
11192 && get_attr_conds (dep) == CONDS_SET)
11193 {
11194 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11195 if (get_attr_conds (insn) == CONDS_USE
11196 && get_attr_type (insn) != TYPE_BRANCH)
11197 {
11198 *cost = 3;
11199 return false;
11200 }
11201
11202 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11203 || get_attr_conds (insn) == CONDS_USE)
11204 {
11205 *cost = 0;
11206 return false;
11207 }
11208 }
11209
11210 return true;
11211 }
11212
11213 /* Implement TARGET_REGISTER_MOVE_COST.
11214
11215 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11216 it is typically more expensive than a single memory access. We set
11217 the cost to less than two memory accesses so that floating
11218 point to integer conversion does not go through memory. */
11219
11220 int
11221 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
11222 reg_class_t from, reg_class_t to)
11223 {
11224 if (TARGET_32BIT)
11225 {
11226 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11227 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11228 return 15;
11229 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11230 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11231 return 4;
11232 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11233 return 20;
11234 else
11235 return 2;
11236 }
11237 else
11238 {
11239 if (from == HI_REGS || to == HI_REGS)
11240 return 4;
11241 else
11242 return 2;
11243 }
11244 }
11245
11246 /* Implement TARGET_MEMORY_MOVE_COST. */
11247
11248 int
11249 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
11250 bool in ATTRIBUTE_UNUSED)
11251 {
11252 if (TARGET_32BIT)
11253 return 10;
11254 else
11255 {
11256 if (GET_MODE_SIZE (mode) < 4)
11257 return 8;
11258 else
11259 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11260 }
11261 }
11262
11263 /* Vectorizer cost model implementation. */
11264
11265 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11266 static int
11267 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11268 tree vectype,
11269 int misalign ATTRIBUTE_UNUSED)
11270 {
11271 unsigned elements;
11272
11273 switch (type_of_cost)
11274 {
11275 case scalar_stmt:
11276 return current_tune->vec_costs->scalar_stmt_cost;
11277
11278 case scalar_load:
11279 return current_tune->vec_costs->scalar_load_cost;
11280
11281 case scalar_store:
11282 return current_tune->vec_costs->scalar_store_cost;
11283
11284 case vector_stmt:
11285 return current_tune->vec_costs->vec_stmt_cost;
11286
11287 case vector_load:
11288 return current_tune->vec_costs->vec_align_load_cost;
11289
11290 case vector_store:
11291 return current_tune->vec_costs->vec_store_cost;
11292
11293 case vec_to_scalar:
11294 return current_tune->vec_costs->vec_to_scalar_cost;
11295
11296 case scalar_to_vec:
11297 return current_tune->vec_costs->scalar_to_vec_cost;
11298
11299 case unaligned_load:
11300 return current_tune->vec_costs->vec_unalign_load_cost;
11301
11302 case unaligned_store:
11303 return current_tune->vec_costs->vec_unalign_store_cost;
11304
11305 case cond_branch_taken:
11306 return current_tune->vec_costs->cond_taken_branch_cost;
11307
11308 case cond_branch_not_taken:
11309 return current_tune->vec_costs->cond_not_taken_branch_cost;
11310
11311 case vec_perm:
11312 case vec_promote_demote:
11313 return current_tune->vec_costs->vec_stmt_cost;
11314
11315 case vec_construct:
11316 elements = TYPE_VECTOR_SUBPARTS (vectype);
11317 return elements / 2 + 1;
11318
11319 default:
11320 gcc_unreachable ();
11321 }
11322 }
11323
11324 /* Implement targetm.vectorize.add_stmt_cost. */
11325
11326 static unsigned
11327 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11328 struct _stmt_vec_info *stmt_info, int misalign,
11329 enum vect_cost_model_location where)
11330 {
11331 unsigned *cost = (unsigned *) data;
11332 unsigned retval = 0;
11333
11334 if (flag_vect_cost_model)
11335 {
11336 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11337 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11338
11339 /* Statements in an inner loop relative to the loop being
11340 vectorized are weighted more heavily. The value here is
11341 arbitrary and could potentially be improved with analysis. */
11342 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11343 count *= 50; /* FIXME. */
11344
11345 retval = (unsigned) (count * stmt_cost);
11346 cost[where] += retval;
11347 }
11348
11349 return retval;
11350 }
11351
11352 /* Return true if and only if this insn can dual-issue only as older. */
11353 static bool
11354 cortexa7_older_only (rtx insn)
11355 {
11356 if (recog_memoized (insn) < 0)
11357 return false;
11358
11359 switch (get_attr_type (insn))
11360 {
11361 case TYPE_ALU_REG:
11362 case TYPE_ALUS_REG:
11363 case TYPE_LOGIC_REG:
11364 case TYPE_LOGICS_REG:
11365 case TYPE_ADC_REG:
11366 case TYPE_ADCS_REG:
11367 case TYPE_ADR:
11368 case TYPE_BFM:
11369 case TYPE_REV:
11370 case TYPE_MVN_REG:
11371 case TYPE_SHIFT_IMM:
11372 case TYPE_SHIFT_REG:
11373 case TYPE_LOAD_BYTE:
11374 case TYPE_LOAD1:
11375 case TYPE_STORE1:
11376 case TYPE_FFARITHS:
11377 case TYPE_FADDS:
11378 case TYPE_FFARITHD:
11379 case TYPE_FADDD:
11380 case TYPE_FMOV:
11381 case TYPE_F_CVT:
11382 case TYPE_FCMPS:
11383 case TYPE_FCMPD:
11384 case TYPE_FCONSTS:
11385 case TYPE_FCONSTD:
11386 case TYPE_FMULS:
11387 case TYPE_FMACS:
11388 case TYPE_FMULD:
11389 case TYPE_FMACD:
11390 case TYPE_FDIVS:
11391 case TYPE_FDIVD:
11392 case TYPE_F_MRC:
11393 case TYPE_F_MRRC:
11394 case TYPE_F_FLAG:
11395 case TYPE_F_LOADS:
11396 case TYPE_F_STORES:
11397 return true;
11398 default:
11399 return false;
11400 }
11401 }
11402
11403 /* Return true if and only if this insn can dual-issue as younger. */
11404 static bool
11405 cortexa7_younger (FILE *file, int verbose, rtx insn)
11406 {
11407 if (recog_memoized (insn) < 0)
11408 {
11409 if (verbose > 5)
11410 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11411 return false;
11412 }
11413
11414 switch (get_attr_type (insn))
11415 {
11416 case TYPE_ALU_IMM:
11417 case TYPE_ALUS_IMM:
11418 case TYPE_LOGIC_IMM:
11419 case TYPE_LOGICS_IMM:
11420 case TYPE_EXTEND:
11421 case TYPE_MVN_IMM:
11422 case TYPE_MOV_IMM:
11423 case TYPE_MOV_REG:
11424 case TYPE_MOV_SHIFT:
11425 case TYPE_MOV_SHIFT_REG:
11426 case TYPE_BRANCH:
11427 case TYPE_CALL:
11428 return true;
11429 default:
11430 return false;
11431 }
11432 }
11433
11434
11435 /* Look for an instruction that can dual issue only as an older
11436 instruction, and move it in front of any instructions that can
11437 dual-issue as younger, while preserving the relative order of all
11438 other instructions in the ready list. This is a hueuristic to help
11439 dual-issue in later cycles, by postponing issue of more flexible
11440 instructions. This heuristic may affect dual issue opportunities
11441 in the current cycle. */
11442 static void
11443 cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11444 int clock)
11445 {
11446 int i;
11447 int first_older_only = -1, first_younger = -1;
11448
11449 if (verbose > 5)
11450 fprintf (file,
11451 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11452 clock,
11453 *n_readyp);
11454
11455 /* Traverse the ready list from the head (the instruction to issue
11456 first), and looking for the first instruction that can issue as
11457 younger and the first instruction that can dual-issue only as
11458 older. */
11459 for (i = *n_readyp - 1; i >= 0; i--)
11460 {
11461 rtx insn = ready[i];
11462 if (cortexa7_older_only (insn))
11463 {
11464 first_older_only = i;
11465 if (verbose > 5)
11466 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11467 break;
11468 }
11469 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11470 first_younger = i;
11471 }
11472
11473 /* Nothing to reorder because either no younger insn found or insn
11474 that can dual-issue only as older appears before any insn that
11475 can dual-issue as younger. */
11476 if (first_younger == -1)
11477 {
11478 if (verbose > 5)
11479 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11480 return;
11481 }
11482
11483 /* Nothing to reorder because no older-only insn in the ready list. */
11484 if (first_older_only == -1)
11485 {
11486 if (verbose > 5)
11487 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11488 return;
11489 }
11490
11491 /* Move first_older_only insn before first_younger. */
11492 if (verbose > 5)
11493 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11494 INSN_UID(ready [first_older_only]),
11495 INSN_UID(ready [first_younger]));
11496 rtx first_older_only_insn = ready [first_older_only];
11497 for (i = first_older_only; i < first_younger; i++)
11498 {
11499 ready[i] = ready[i+1];
11500 }
11501
11502 ready[i] = first_older_only_insn;
11503 return;
11504 }
11505
11506 /* Implement TARGET_SCHED_REORDER. */
11507 static int
11508 arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11509 int clock)
11510 {
11511 switch (arm_tune)
11512 {
11513 case cortexa7:
11514 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11515 break;
11516 default:
11517 /* Do nothing for other cores. */
11518 break;
11519 }
11520
11521 return arm_issue_rate ();
11522 }
11523
11524 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11525 It corrects the value of COST based on the relationship between
11526 INSN and DEP through the dependence LINK. It returns the new
11527 value. There is a per-core adjust_cost hook to adjust scheduler costs
11528 and the per-core hook can choose to completely override the generic
11529 adjust_cost function. Only put bits of code into arm_adjust_cost that
11530 are common across all cores. */
11531 static int
11532 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
11533 {
11534 rtx i_pat, d_pat;
11535
11536 /* When generating Thumb-1 code, we want to place flag-setting operations
11537 close to a conditional branch which depends on them, so that we can
11538 omit the comparison. */
11539 if (TARGET_THUMB1
11540 && REG_NOTE_KIND (link) == 0
11541 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11542 && recog_memoized (dep) >= 0
11543 && get_attr_conds (dep) == CONDS_SET)
11544 return 0;
11545
11546 if (current_tune->sched_adjust_cost != NULL)
11547 {
11548 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11549 return cost;
11550 }
11551
11552 /* XXX Is this strictly true? */
11553 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11554 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11555 return 0;
11556
11557 /* Call insns don't incur a stall, even if they follow a load. */
11558 if (REG_NOTE_KIND (link) == 0
11559 && CALL_P (insn))
11560 return 1;
11561
11562 if ((i_pat = single_set (insn)) != NULL
11563 && MEM_P (SET_SRC (i_pat))
11564 && (d_pat = single_set (dep)) != NULL
11565 && MEM_P (SET_DEST (d_pat)))
11566 {
11567 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11568 /* This is a load after a store, there is no conflict if the load reads
11569 from a cached area. Assume that loads from the stack, and from the
11570 constant pool are cached, and that others will miss. This is a
11571 hack. */
11572
11573 if ((GET_CODE (src_mem) == SYMBOL_REF
11574 && CONSTANT_POOL_ADDRESS_P (src_mem))
11575 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11576 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11577 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11578 return 1;
11579 }
11580
11581 return cost;
11582 }
11583
11584 int
11585 arm_max_conditional_execute (void)
11586 {
11587 return max_insns_skipped;
11588 }
11589
11590 static int
11591 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11592 {
11593 if (TARGET_32BIT)
11594 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11595 else
11596 return (optimize > 0) ? 2 : 0;
11597 }
11598
11599 static int
11600 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11601 {
11602 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11603 }
11604
11605 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11606 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11607 sequences of non-executed instructions in IT blocks probably take the same
11608 amount of time as executed instructions (and the IT instruction itself takes
11609 space in icache). This function was experimentally determined to give good
11610 results on a popular embedded benchmark. */
11611
11612 static int
11613 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11614 {
11615 return (TARGET_32BIT && speed_p) ? 1
11616 : arm_default_branch_cost (speed_p, predictable_p);
11617 }
11618
11619 static bool fp_consts_inited = false;
11620
11621 static REAL_VALUE_TYPE value_fp0;
11622
11623 static void
11624 init_fp_table (void)
11625 {
11626 REAL_VALUE_TYPE r;
11627
11628 r = REAL_VALUE_ATOF ("0", DFmode);
11629 value_fp0 = r;
11630 fp_consts_inited = true;
11631 }
11632
11633 /* Return TRUE if rtx X is a valid immediate FP constant. */
11634 int
11635 arm_const_double_rtx (rtx x)
11636 {
11637 REAL_VALUE_TYPE r;
11638
11639 if (!fp_consts_inited)
11640 init_fp_table ();
11641
11642 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11643 if (REAL_VALUE_MINUS_ZERO (r))
11644 return 0;
11645
11646 if (REAL_VALUES_EQUAL (r, value_fp0))
11647 return 1;
11648
11649 return 0;
11650 }
11651
11652 /* VFPv3 has a fairly wide range of representable immediates, formed from
11653 "quarter-precision" floating-point values. These can be evaluated using this
11654 formula (with ^ for exponentiation):
11655
11656 -1^s * n * 2^-r
11657
11658 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11659 16 <= n <= 31 and 0 <= r <= 7.
11660
11661 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11662
11663 - A (most-significant) is the sign bit.
11664 - BCD are the exponent (encoded as r XOR 3).
11665 - EFGH are the mantissa (encoded as n - 16).
11666 */
11667
11668 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11669 fconst[sd] instruction, or -1 if X isn't suitable. */
11670 static int
11671 vfp3_const_double_index (rtx x)
11672 {
11673 REAL_VALUE_TYPE r, m;
11674 int sign, exponent;
11675 unsigned HOST_WIDE_INT mantissa, mant_hi;
11676 unsigned HOST_WIDE_INT mask;
11677 HOST_WIDE_INT m1, m2;
11678 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11679
11680 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11681 return -1;
11682
11683 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11684
11685 /* We can't represent these things, so detect them first. */
11686 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11687 return -1;
11688
11689 /* Extract sign, exponent and mantissa. */
11690 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11691 r = real_value_abs (&r);
11692 exponent = REAL_EXP (&r);
11693 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11694 highest (sign) bit, with a fixed binary point at bit point_pos.
11695 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11696 bits for the mantissa, this may fail (low bits would be lost). */
11697 real_ldexp (&m, &r, point_pos - exponent);
11698 REAL_VALUE_TO_INT (&m1, &m2, m);
11699 mantissa = m1;
11700 mant_hi = m2;
11701
11702 /* If there are bits set in the low part of the mantissa, we can't
11703 represent this value. */
11704 if (mantissa != 0)
11705 return -1;
11706
11707 /* Now make it so that mantissa contains the most-significant bits, and move
11708 the point_pos to indicate that the least-significant bits have been
11709 discarded. */
11710 point_pos -= HOST_BITS_PER_WIDE_INT;
11711 mantissa = mant_hi;
11712
11713 /* We can permit four significant bits of mantissa only, plus a high bit
11714 which is always 1. */
11715 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
11716 if ((mantissa & mask) != 0)
11717 return -1;
11718
11719 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11720 mantissa >>= point_pos - 5;
11721
11722 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11723 floating-point immediate zero with Neon using an integer-zero load, but
11724 that case is handled elsewhere.) */
11725 if (mantissa == 0)
11726 return -1;
11727
11728 gcc_assert (mantissa >= 16 && mantissa <= 31);
11729
11730 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11731 normalized significands are in the range [1, 2). (Our mantissa is shifted
11732 left 4 places at this point relative to normalized IEEE754 values). GCC
11733 internally uses [0.5, 1) (see real.c), so the exponent returned from
11734 REAL_EXP must be altered. */
11735 exponent = 5 - exponent;
11736
11737 if (exponent < 0 || exponent > 7)
11738 return -1;
11739
11740 /* Sign, mantissa and exponent are now in the correct form to plug into the
11741 formula described in the comment above. */
11742 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11743 }
11744
11745 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11746 int
11747 vfp3_const_double_rtx (rtx x)
11748 {
11749 if (!TARGET_VFP3)
11750 return 0;
11751
11752 return vfp3_const_double_index (x) != -1;
11753 }
11754
11755 /* Recognize immediates which can be used in various Neon instructions. Legal
11756 immediates are described by the following table (for VMVN variants, the
11757 bitwise inverse of the constant shown is recognized. In either case, VMOV
11758 is output and the correct instruction to use for a given constant is chosen
11759 by the assembler). The constant shown is replicated across all elements of
11760 the destination vector.
11761
11762 insn elems variant constant (binary)
11763 ---- ----- ------- -----------------
11764 vmov i32 0 00000000 00000000 00000000 abcdefgh
11765 vmov i32 1 00000000 00000000 abcdefgh 00000000
11766 vmov i32 2 00000000 abcdefgh 00000000 00000000
11767 vmov i32 3 abcdefgh 00000000 00000000 00000000
11768 vmov i16 4 00000000 abcdefgh
11769 vmov i16 5 abcdefgh 00000000
11770 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11771 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11772 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11773 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11774 vmvn i16 10 00000000 abcdefgh
11775 vmvn i16 11 abcdefgh 00000000
11776 vmov i32 12 00000000 00000000 abcdefgh 11111111
11777 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11778 vmov i32 14 00000000 abcdefgh 11111111 11111111
11779 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11780 vmov i8 16 abcdefgh
11781 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11782 eeeeeeee ffffffff gggggggg hhhhhhhh
11783 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11784 vmov f32 19 00000000 00000000 00000000 00000000
11785
11786 For case 18, B = !b. Representable values are exactly those accepted by
11787 vfp3_const_double_index, but are output as floating-point numbers rather
11788 than indices.
11789
11790 For case 19, we will change it to vmov.i32 when assembling.
11791
11792 Variants 0-5 (inclusive) may also be used as immediates for the second
11793 operand of VORR/VBIC instructions.
11794
11795 The INVERSE argument causes the bitwise inverse of the given operand to be
11796 recognized instead (used for recognizing legal immediates for the VAND/VORN
11797 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11798 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11799 output, rather than the real insns vbic/vorr).
11800
11801 INVERSE makes no difference to the recognition of float vectors.
11802
11803 The return value is the variant of immediate as shown in the above table, or
11804 -1 if the given value doesn't match any of the listed patterns.
11805 */
11806 static int
11807 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
11808 rtx *modconst, int *elementwidth)
11809 {
11810 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11811 matches = 1; \
11812 for (i = 0; i < idx; i += (STRIDE)) \
11813 if (!(TEST)) \
11814 matches = 0; \
11815 if (matches) \
11816 { \
11817 immtype = (CLASS); \
11818 elsize = (ELSIZE); \
11819 break; \
11820 }
11821
11822 unsigned int i, elsize = 0, idx = 0, n_elts;
11823 unsigned int innersize;
11824 unsigned char bytes[16];
11825 int immtype = -1, matches;
11826 unsigned int invmask = inverse ? 0xff : 0;
11827 bool vector = GET_CODE (op) == CONST_VECTOR;
11828
11829 if (vector)
11830 {
11831 n_elts = CONST_VECTOR_NUNITS (op);
11832 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
11833 }
11834 else
11835 {
11836 n_elts = 1;
11837 if (mode == VOIDmode)
11838 mode = DImode;
11839 innersize = GET_MODE_SIZE (mode);
11840 }
11841
11842 /* Vectors of float constants. */
11843 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11844 {
11845 rtx el0 = CONST_VECTOR_ELT (op, 0);
11846 REAL_VALUE_TYPE r0;
11847
11848 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11849 return -1;
11850
11851 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
11852
11853 for (i = 1; i < n_elts; i++)
11854 {
11855 rtx elt = CONST_VECTOR_ELT (op, i);
11856 REAL_VALUE_TYPE re;
11857
11858 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
11859
11860 if (!REAL_VALUES_EQUAL (r0, re))
11861 return -1;
11862 }
11863
11864 if (modconst)
11865 *modconst = CONST_VECTOR_ELT (op, 0);
11866
11867 if (elementwidth)
11868 *elementwidth = 0;
11869
11870 if (el0 == CONST0_RTX (GET_MODE (el0)))
11871 return 19;
11872 else
11873 return 18;
11874 }
11875
11876 /* Splat vector constant out into a byte vector. */
11877 for (i = 0; i < n_elts; i++)
11878 {
11879 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11880 unsigned HOST_WIDE_INT elpart;
11881 unsigned int part, parts;
11882
11883 if (CONST_INT_P (el))
11884 {
11885 elpart = INTVAL (el);
11886 parts = 1;
11887 }
11888 else if (CONST_DOUBLE_P (el))
11889 {
11890 elpart = CONST_DOUBLE_LOW (el);
11891 parts = 2;
11892 }
11893 else
11894 gcc_unreachable ();
11895
11896 for (part = 0; part < parts; part++)
11897 {
11898 unsigned int byte;
11899 for (byte = 0; byte < innersize; byte++)
11900 {
11901 bytes[idx++] = (elpart & 0xff) ^ invmask;
11902 elpart >>= BITS_PER_UNIT;
11903 }
11904 if (CONST_DOUBLE_P (el))
11905 elpart = CONST_DOUBLE_HIGH (el);
11906 }
11907 }
11908
11909 /* Sanity check. */
11910 gcc_assert (idx == GET_MODE_SIZE (mode));
11911
11912 do
11913 {
11914 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11915 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11916
11917 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11918 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11919
11920 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11921 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11922
11923 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11924 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11925
11926 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11927
11928 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11929
11930 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11931 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11932
11933 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11934 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11935
11936 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11937 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11938
11939 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11940 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11941
11942 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11943
11944 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11945
11946 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11947 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11948
11949 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11950 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11951
11952 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11953 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11954
11955 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11956 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11957
11958 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11959
11960 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11961 && bytes[i] == bytes[(i + 8) % idx]);
11962 }
11963 while (0);
11964
11965 if (immtype == -1)
11966 return -1;
11967
11968 if (elementwidth)
11969 *elementwidth = elsize;
11970
11971 if (modconst)
11972 {
11973 unsigned HOST_WIDE_INT imm = 0;
11974
11975 /* Un-invert bytes of recognized vector, if necessary. */
11976 if (invmask != 0)
11977 for (i = 0; i < idx; i++)
11978 bytes[i] ^= invmask;
11979
11980 if (immtype == 17)
11981 {
11982 /* FIXME: Broken on 32-bit H_W_I hosts. */
11983 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11984
11985 for (i = 0; i < 8; i++)
11986 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11987 << (i * BITS_PER_UNIT);
11988
11989 *modconst = GEN_INT (imm);
11990 }
11991 else
11992 {
11993 unsigned HOST_WIDE_INT imm = 0;
11994
11995 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11996 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11997
11998 *modconst = GEN_INT (imm);
11999 }
12000 }
12001
12002 return immtype;
12003 #undef CHECK
12004 }
12005
12006 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12007 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12008 float elements), and a modified constant (whatever should be output for a
12009 VMOV) in *MODCONST. */
12010
12011 int
12012 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
12013 rtx *modconst, int *elementwidth)
12014 {
12015 rtx tmpconst;
12016 int tmpwidth;
12017 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12018
12019 if (retval == -1)
12020 return 0;
12021
12022 if (modconst)
12023 *modconst = tmpconst;
12024
12025 if (elementwidth)
12026 *elementwidth = tmpwidth;
12027
12028 return 1;
12029 }
12030
12031 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12032 the immediate is valid, write a constant suitable for using as an operand
12033 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12034 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12035
12036 int
12037 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
12038 rtx *modconst, int *elementwidth)
12039 {
12040 rtx tmpconst;
12041 int tmpwidth;
12042 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12043
12044 if (retval < 0 || retval > 5)
12045 return 0;
12046
12047 if (modconst)
12048 *modconst = tmpconst;
12049
12050 if (elementwidth)
12051 *elementwidth = tmpwidth;
12052
12053 return 1;
12054 }
12055
12056 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12057 the immediate is valid, write a constant suitable for using as an operand
12058 to VSHR/VSHL to *MODCONST and the corresponding element width to
12059 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12060 because they have different limitations. */
12061
12062 int
12063 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
12064 rtx *modconst, int *elementwidth,
12065 bool isleftshift)
12066 {
12067 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12068 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12069 unsigned HOST_WIDE_INT last_elt = 0;
12070 unsigned HOST_WIDE_INT maxshift;
12071
12072 /* Split vector constant out into a byte vector. */
12073 for (i = 0; i < n_elts; i++)
12074 {
12075 rtx el = CONST_VECTOR_ELT (op, i);
12076 unsigned HOST_WIDE_INT elpart;
12077
12078 if (CONST_INT_P (el))
12079 elpart = INTVAL (el);
12080 else if (CONST_DOUBLE_P (el))
12081 return 0;
12082 else
12083 gcc_unreachable ();
12084
12085 if (i != 0 && elpart != last_elt)
12086 return 0;
12087
12088 last_elt = elpart;
12089 }
12090
12091 /* Shift less than element size. */
12092 maxshift = innersize * 8;
12093
12094 if (isleftshift)
12095 {
12096 /* Left shift immediate value can be from 0 to <size>-1. */
12097 if (last_elt >= maxshift)
12098 return 0;
12099 }
12100 else
12101 {
12102 /* Right shift immediate value can be from 1 to <size>. */
12103 if (last_elt == 0 || last_elt > maxshift)
12104 return 0;
12105 }
12106
12107 if (elementwidth)
12108 *elementwidth = innersize * 8;
12109
12110 if (modconst)
12111 *modconst = CONST_VECTOR_ELT (op, 0);
12112
12113 return 1;
12114 }
12115
12116 /* Return a string suitable for output of Neon immediate logic operation
12117 MNEM. */
12118
12119 char *
12120 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
12121 int inverse, int quad)
12122 {
12123 int width, is_valid;
12124 static char templ[40];
12125
12126 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12127
12128 gcc_assert (is_valid != 0);
12129
12130 if (quad)
12131 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12132 else
12133 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12134
12135 return templ;
12136 }
12137
12138 /* Return a string suitable for output of Neon immediate shift operation
12139 (VSHR or VSHL) MNEM. */
12140
12141 char *
12142 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12143 enum machine_mode mode, int quad,
12144 bool isleftshift)
12145 {
12146 int width, is_valid;
12147 static char templ[40];
12148
12149 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12150 gcc_assert (is_valid != 0);
12151
12152 if (quad)
12153 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12154 else
12155 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12156
12157 return templ;
12158 }
12159
12160 /* Output a sequence of pairwise operations to implement a reduction.
12161 NOTE: We do "too much work" here, because pairwise operations work on two
12162 registers-worth of operands in one go. Unfortunately we can't exploit those
12163 extra calculations to do the full operation in fewer steps, I don't think.
12164 Although all vector elements of the result but the first are ignored, we
12165 actually calculate the same result in each of the elements. An alternative
12166 such as initially loading a vector with zero to use as each of the second
12167 operands would use up an additional register and take an extra instruction,
12168 for no particular gain. */
12169
12170 void
12171 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
12172 rtx (*reduc) (rtx, rtx, rtx))
12173 {
12174 enum machine_mode inner = GET_MODE_INNER (mode);
12175 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12176 rtx tmpsum = op1;
12177
12178 for (i = parts / 2; i >= 1; i /= 2)
12179 {
12180 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12181 emit_insn (reduc (dest, tmpsum, tmpsum));
12182 tmpsum = dest;
12183 }
12184 }
12185
12186 /* If VALS is a vector constant that can be loaded into a register
12187 using VDUP, generate instructions to do so and return an RTX to
12188 assign to the register. Otherwise return NULL_RTX. */
12189
12190 static rtx
12191 neon_vdup_constant (rtx vals)
12192 {
12193 enum machine_mode mode = GET_MODE (vals);
12194 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12195 int n_elts = GET_MODE_NUNITS (mode);
12196 bool all_same = true;
12197 rtx x;
12198 int i;
12199
12200 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12201 return NULL_RTX;
12202
12203 for (i = 0; i < n_elts; ++i)
12204 {
12205 x = XVECEXP (vals, 0, i);
12206 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12207 all_same = false;
12208 }
12209
12210 if (!all_same)
12211 /* The elements are not all the same. We could handle repeating
12212 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12213 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12214 vdup.i16). */
12215 return NULL_RTX;
12216
12217 /* We can load this constant by using VDUP and a constant in a
12218 single ARM register. This will be cheaper than a vector
12219 load. */
12220
12221 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12222 return gen_rtx_VEC_DUPLICATE (mode, x);
12223 }
12224
12225 /* Generate code to load VALS, which is a PARALLEL containing only
12226 constants (for vec_init) or CONST_VECTOR, efficiently into a
12227 register. Returns an RTX to copy into the register, or NULL_RTX
12228 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12229
12230 rtx
12231 neon_make_constant (rtx vals)
12232 {
12233 enum machine_mode mode = GET_MODE (vals);
12234 rtx target;
12235 rtx const_vec = NULL_RTX;
12236 int n_elts = GET_MODE_NUNITS (mode);
12237 int n_const = 0;
12238 int i;
12239
12240 if (GET_CODE (vals) == CONST_VECTOR)
12241 const_vec = vals;
12242 else if (GET_CODE (vals) == PARALLEL)
12243 {
12244 /* A CONST_VECTOR must contain only CONST_INTs and
12245 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12246 Only store valid constants in a CONST_VECTOR. */
12247 for (i = 0; i < n_elts; ++i)
12248 {
12249 rtx x = XVECEXP (vals, 0, i);
12250 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12251 n_const++;
12252 }
12253 if (n_const == n_elts)
12254 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12255 }
12256 else
12257 gcc_unreachable ();
12258
12259 if (const_vec != NULL
12260 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12261 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12262 return const_vec;
12263 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12264 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12265 pipeline cycle; creating the constant takes one or two ARM
12266 pipeline cycles. */
12267 return target;
12268 else if (const_vec != NULL_RTX)
12269 /* Load from constant pool. On Cortex-A8 this takes two cycles
12270 (for either double or quad vectors). We can not take advantage
12271 of single-cycle VLD1 because we need a PC-relative addressing
12272 mode. */
12273 return const_vec;
12274 else
12275 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12276 We can not construct an initializer. */
12277 return NULL_RTX;
12278 }
12279
12280 /* Initialize vector TARGET to VALS. */
12281
12282 void
12283 neon_expand_vector_init (rtx target, rtx vals)
12284 {
12285 enum machine_mode mode = GET_MODE (target);
12286 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12287 int n_elts = GET_MODE_NUNITS (mode);
12288 int n_var = 0, one_var = -1;
12289 bool all_same = true;
12290 rtx x, mem;
12291 int i;
12292
12293 for (i = 0; i < n_elts; ++i)
12294 {
12295 x = XVECEXP (vals, 0, i);
12296 if (!CONSTANT_P (x))
12297 ++n_var, one_var = i;
12298
12299 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12300 all_same = false;
12301 }
12302
12303 if (n_var == 0)
12304 {
12305 rtx constant = neon_make_constant (vals);
12306 if (constant != NULL_RTX)
12307 {
12308 emit_move_insn (target, constant);
12309 return;
12310 }
12311 }
12312
12313 /* Splat a single non-constant element if we can. */
12314 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12315 {
12316 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12317 emit_insn (gen_rtx_SET (VOIDmode, target,
12318 gen_rtx_VEC_DUPLICATE (mode, x)));
12319 return;
12320 }
12321
12322 /* One field is non-constant. Load constant then overwrite varying
12323 field. This is more efficient than using the stack. */
12324 if (n_var == 1)
12325 {
12326 rtx copy = copy_rtx (vals);
12327 rtx index = GEN_INT (one_var);
12328
12329 /* Load constant part of vector, substitute neighboring value for
12330 varying element. */
12331 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12332 neon_expand_vector_init (target, copy);
12333
12334 /* Insert variable. */
12335 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12336 switch (mode)
12337 {
12338 case V8QImode:
12339 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12340 break;
12341 case V16QImode:
12342 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12343 break;
12344 case V4HImode:
12345 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12346 break;
12347 case V8HImode:
12348 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12349 break;
12350 case V2SImode:
12351 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12352 break;
12353 case V4SImode:
12354 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12355 break;
12356 case V2SFmode:
12357 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12358 break;
12359 case V4SFmode:
12360 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12361 break;
12362 case V2DImode:
12363 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12364 break;
12365 default:
12366 gcc_unreachable ();
12367 }
12368 return;
12369 }
12370
12371 /* Construct the vector in memory one field at a time
12372 and load the whole vector. */
12373 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12374 for (i = 0; i < n_elts; i++)
12375 emit_move_insn (adjust_address_nv (mem, inner_mode,
12376 i * GET_MODE_SIZE (inner_mode)),
12377 XVECEXP (vals, 0, i));
12378 emit_move_insn (target, mem);
12379 }
12380
12381 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12382 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12383 reported source locations are bogus. */
12384
12385 static void
12386 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12387 const char *err)
12388 {
12389 HOST_WIDE_INT lane;
12390
12391 gcc_assert (CONST_INT_P (operand));
12392
12393 lane = INTVAL (operand);
12394
12395 if (lane < low || lane >= high)
12396 error (err);
12397 }
12398
12399 /* Bounds-check lanes. */
12400
12401 void
12402 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12403 {
12404 bounds_check (operand, low, high, "lane out of range");
12405 }
12406
12407 /* Bounds-check constants. */
12408
12409 void
12410 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12411 {
12412 bounds_check (operand, low, high, "constant out of range");
12413 }
12414
12415 HOST_WIDE_INT
12416 neon_element_bits (enum machine_mode mode)
12417 {
12418 if (mode == DImode)
12419 return GET_MODE_BITSIZE (mode);
12420 else
12421 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12422 }
12423
12424 \f
12425 /* Predicates for `match_operand' and `match_operator'. */
12426
12427 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12428 WB is true if full writeback address modes are allowed and is false
12429 if limited writeback address modes (POST_INC and PRE_DEC) are
12430 allowed. */
12431
12432 int
12433 arm_coproc_mem_operand (rtx op, bool wb)
12434 {
12435 rtx ind;
12436
12437 /* Reject eliminable registers. */
12438 if (! (reload_in_progress || reload_completed)
12439 && ( reg_mentioned_p (frame_pointer_rtx, op)
12440 || reg_mentioned_p (arg_pointer_rtx, op)
12441 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12442 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12443 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12444 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12445 return FALSE;
12446
12447 /* Constants are converted into offsets from labels. */
12448 if (!MEM_P (op))
12449 return FALSE;
12450
12451 ind = XEXP (op, 0);
12452
12453 if (reload_completed
12454 && (GET_CODE (ind) == LABEL_REF
12455 || (GET_CODE (ind) == CONST
12456 && GET_CODE (XEXP (ind, 0)) == PLUS
12457 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12458 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12459 return TRUE;
12460
12461 /* Match: (mem (reg)). */
12462 if (REG_P (ind))
12463 return arm_address_register_rtx_p (ind, 0);
12464
12465 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12466 acceptable in any case (subject to verification by
12467 arm_address_register_rtx_p). We need WB to be true to accept
12468 PRE_INC and POST_DEC. */
12469 if (GET_CODE (ind) == POST_INC
12470 || GET_CODE (ind) == PRE_DEC
12471 || (wb
12472 && (GET_CODE (ind) == PRE_INC
12473 || GET_CODE (ind) == POST_DEC)))
12474 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12475
12476 if (wb
12477 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12478 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12479 && GET_CODE (XEXP (ind, 1)) == PLUS
12480 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12481 ind = XEXP (ind, 1);
12482
12483 /* Match:
12484 (plus (reg)
12485 (const)). */
12486 if (GET_CODE (ind) == PLUS
12487 && REG_P (XEXP (ind, 0))
12488 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12489 && CONST_INT_P (XEXP (ind, 1))
12490 && INTVAL (XEXP (ind, 1)) > -1024
12491 && INTVAL (XEXP (ind, 1)) < 1024
12492 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12493 return TRUE;
12494
12495 return FALSE;
12496 }
12497
12498 /* Return TRUE if OP is a memory operand which we can load or store a vector
12499 to/from. TYPE is one of the following values:
12500 0 - Vector load/stor (vldr)
12501 1 - Core registers (ldm)
12502 2 - Element/structure loads (vld1)
12503 */
12504 int
12505 neon_vector_mem_operand (rtx op, int type, bool strict)
12506 {
12507 rtx ind;
12508
12509 /* Reject eliminable registers. */
12510 if (! (reload_in_progress || reload_completed)
12511 && ( reg_mentioned_p (frame_pointer_rtx, op)
12512 || reg_mentioned_p (arg_pointer_rtx, op)
12513 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12514 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12515 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12516 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12517 return !strict;
12518
12519 /* Constants are converted into offsets from labels. */
12520 if (!MEM_P (op))
12521 return FALSE;
12522
12523 ind = XEXP (op, 0);
12524
12525 if (reload_completed
12526 && (GET_CODE (ind) == LABEL_REF
12527 || (GET_CODE (ind) == CONST
12528 && GET_CODE (XEXP (ind, 0)) == PLUS
12529 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12530 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12531 return TRUE;
12532
12533 /* Match: (mem (reg)). */
12534 if (REG_P (ind))
12535 return arm_address_register_rtx_p (ind, 0);
12536
12537 /* Allow post-increment with Neon registers. */
12538 if ((type != 1 && GET_CODE (ind) == POST_INC)
12539 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12540 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12541
12542 /* FIXME: vld1 allows register post-modify. */
12543
12544 /* Match:
12545 (plus (reg)
12546 (const)). */
12547 if (type == 0
12548 && GET_CODE (ind) == PLUS
12549 && REG_P (XEXP (ind, 0))
12550 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12551 && CONST_INT_P (XEXP (ind, 1))
12552 && INTVAL (XEXP (ind, 1)) > -1024
12553 /* For quad modes, we restrict the constant offset to be slightly less
12554 than what the instruction format permits. We have no such constraint
12555 on double mode offsets. (This must match arm_legitimate_index_p.) */
12556 && (INTVAL (XEXP (ind, 1))
12557 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12558 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12559 return TRUE;
12560
12561 return FALSE;
12562 }
12563
12564 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12565 type. */
12566 int
12567 neon_struct_mem_operand (rtx op)
12568 {
12569 rtx ind;
12570
12571 /* Reject eliminable registers. */
12572 if (! (reload_in_progress || reload_completed)
12573 && ( reg_mentioned_p (frame_pointer_rtx, op)
12574 || reg_mentioned_p (arg_pointer_rtx, op)
12575 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12576 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12577 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12578 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12579 return FALSE;
12580
12581 /* Constants are converted into offsets from labels. */
12582 if (!MEM_P (op))
12583 return FALSE;
12584
12585 ind = XEXP (op, 0);
12586
12587 if (reload_completed
12588 && (GET_CODE (ind) == LABEL_REF
12589 || (GET_CODE (ind) == CONST
12590 && GET_CODE (XEXP (ind, 0)) == PLUS
12591 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12592 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12593 return TRUE;
12594
12595 /* Match: (mem (reg)). */
12596 if (REG_P (ind))
12597 return arm_address_register_rtx_p (ind, 0);
12598
12599 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12600 if (GET_CODE (ind) == POST_INC
12601 || GET_CODE (ind) == PRE_DEC)
12602 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12603
12604 return FALSE;
12605 }
12606
12607 /* Return true if X is a register that will be eliminated later on. */
12608 int
12609 arm_eliminable_register (rtx x)
12610 {
12611 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12612 || REGNO (x) == ARG_POINTER_REGNUM
12613 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12614 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12615 }
12616
12617 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12618 coprocessor registers. Otherwise return NO_REGS. */
12619
12620 enum reg_class
12621 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
12622 {
12623 if (mode == HFmode)
12624 {
12625 if (!TARGET_NEON_FP16)
12626 return GENERAL_REGS;
12627 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12628 return NO_REGS;
12629 return GENERAL_REGS;
12630 }
12631
12632 /* The neon move patterns handle all legitimate vector and struct
12633 addresses. */
12634 if (TARGET_NEON
12635 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12636 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12637 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12638 || VALID_NEON_STRUCT_MODE (mode)))
12639 return NO_REGS;
12640
12641 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12642 return NO_REGS;
12643
12644 return GENERAL_REGS;
12645 }
12646
12647 /* Values which must be returned in the most-significant end of the return
12648 register. */
12649
12650 static bool
12651 arm_return_in_msb (const_tree valtype)
12652 {
12653 return (TARGET_AAPCS_BASED
12654 && BYTES_BIG_ENDIAN
12655 && (AGGREGATE_TYPE_P (valtype)
12656 || TREE_CODE (valtype) == COMPLEX_TYPE
12657 || FIXED_POINT_TYPE_P (valtype)));
12658 }
12659
12660 /* Return TRUE if X references a SYMBOL_REF. */
12661 int
12662 symbol_mentioned_p (rtx x)
12663 {
12664 const char * fmt;
12665 int i;
12666
12667 if (GET_CODE (x) == SYMBOL_REF)
12668 return 1;
12669
12670 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12671 are constant offsets, not symbols. */
12672 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12673 return 0;
12674
12675 fmt = GET_RTX_FORMAT (GET_CODE (x));
12676
12677 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12678 {
12679 if (fmt[i] == 'E')
12680 {
12681 int j;
12682
12683 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12684 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12685 return 1;
12686 }
12687 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12688 return 1;
12689 }
12690
12691 return 0;
12692 }
12693
12694 /* Return TRUE if X references a LABEL_REF. */
12695 int
12696 label_mentioned_p (rtx x)
12697 {
12698 const char * fmt;
12699 int i;
12700
12701 if (GET_CODE (x) == LABEL_REF)
12702 return 1;
12703
12704 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12705 instruction, but they are constant offsets, not symbols. */
12706 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12707 return 0;
12708
12709 fmt = GET_RTX_FORMAT (GET_CODE (x));
12710 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12711 {
12712 if (fmt[i] == 'E')
12713 {
12714 int j;
12715
12716 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12717 if (label_mentioned_p (XVECEXP (x, i, j)))
12718 return 1;
12719 }
12720 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12721 return 1;
12722 }
12723
12724 return 0;
12725 }
12726
12727 int
12728 tls_mentioned_p (rtx x)
12729 {
12730 switch (GET_CODE (x))
12731 {
12732 case CONST:
12733 return tls_mentioned_p (XEXP (x, 0));
12734
12735 case UNSPEC:
12736 if (XINT (x, 1) == UNSPEC_TLS)
12737 return 1;
12738
12739 default:
12740 return 0;
12741 }
12742 }
12743
12744 /* Must not copy any rtx that uses a pc-relative address. */
12745
12746 static int
12747 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
12748 {
12749 if (GET_CODE (*x) == UNSPEC
12750 && (XINT (*x, 1) == UNSPEC_PIC_BASE
12751 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
12752 return 1;
12753 return 0;
12754 }
12755
12756 static bool
12757 arm_cannot_copy_insn_p (rtx insn)
12758 {
12759 /* The tls call insn cannot be copied, as it is paired with a data
12760 word. */
12761 if (recog_memoized (insn) == CODE_FOR_tlscall)
12762 return true;
12763
12764 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
12765 }
12766
12767 enum rtx_code
12768 minmax_code (rtx x)
12769 {
12770 enum rtx_code code = GET_CODE (x);
12771
12772 switch (code)
12773 {
12774 case SMAX:
12775 return GE;
12776 case SMIN:
12777 return LE;
12778 case UMIN:
12779 return LEU;
12780 case UMAX:
12781 return GEU;
12782 default:
12783 gcc_unreachable ();
12784 }
12785 }
12786
12787 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12788
12789 bool
12790 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12791 int *mask, bool *signed_sat)
12792 {
12793 /* The high bound must be a power of two minus one. */
12794 int log = exact_log2 (INTVAL (hi_bound) + 1);
12795 if (log == -1)
12796 return false;
12797
12798 /* The low bound is either zero (for usat) or one less than the
12799 negation of the high bound (for ssat). */
12800 if (INTVAL (lo_bound) == 0)
12801 {
12802 if (mask)
12803 *mask = log;
12804 if (signed_sat)
12805 *signed_sat = false;
12806
12807 return true;
12808 }
12809
12810 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12811 {
12812 if (mask)
12813 *mask = log + 1;
12814 if (signed_sat)
12815 *signed_sat = true;
12816
12817 return true;
12818 }
12819
12820 return false;
12821 }
12822
12823 /* Return 1 if memory locations are adjacent. */
12824 int
12825 adjacent_mem_locations (rtx a, rtx b)
12826 {
12827 /* We don't guarantee to preserve the order of these memory refs. */
12828 if (volatile_refs_p (a) || volatile_refs_p (b))
12829 return 0;
12830
12831 if ((REG_P (XEXP (a, 0))
12832 || (GET_CODE (XEXP (a, 0)) == PLUS
12833 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12834 && (REG_P (XEXP (b, 0))
12835 || (GET_CODE (XEXP (b, 0)) == PLUS
12836 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12837 {
12838 HOST_WIDE_INT val0 = 0, val1 = 0;
12839 rtx reg0, reg1;
12840 int val_diff;
12841
12842 if (GET_CODE (XEXP (a, 0)) == PLUS)
12843 {
12844 reg0 = XEXP (XEXP (a, 0), 0);
12845 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12846 }
12847 else
12848 reg0 = XEXP (a, 0);
12849
12850 if (GET_CODE (XEXP (b, 0)) == PLUS)
12851 {
12852 reg1 = XEXP (XEXP (b, 0), 0);
12853 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12854 }
12855 else
12856 reg1 = XEXP (b, 0);
12857
12858 /* Don't accept any offset that will require multiple
12859 instructions to handle, since this would cause the
12860 arith_adjacentmem pattern to output an overlong sequence. */
12861 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12862 return 0;
12863
12864 /* Don't allow an eliminable register: register elimination can make
12865 the offset too large. */
12866 if (arm_eliminable_register (reg0))
12867 return 0;
12868
12869 val_diff = val1 - val0;
12870
12871 if (arm_ld_sched)
12872 {
12873 /* If the target has load delay slots, then there's no benefit
12874 to using an ldm instruction unless the offset is zero and
12875 we are optimizing for size. */
12876 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12877 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12878 && (val_diff == 4 || val_diff == -4));
12879 }
12880
12881 return ((REGNO (reg0) == REGNO (reg1))
12882 && (val_diff == 4 || val_diff == -4));
12883 }
12884
12885 return 0;
12886 }
12887
12888 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12889 for load operations, false for store operations. CONSECUTIVE is true
12890 if the register numbers in the operation must be consecutive in the register
12891 bank. RETURN_PC is true if value is to be loaded in PC.
12892 The pattern we are trying to match for load is:
12893 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12894 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12895 :
12896 :
12897 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12898 ]
12899 where
12900 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12901 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12902 3. If consecutive is TRUE, then for kth register being loaded,
12903 REGNO (R_dk) = REGNO (R_d0) + k.
12904 The pattern for store is similar. */
12905 bool
12906 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
12907 bool consecutive, bool return_pc)
12908 {
12909 HOST_WIDE_INT count = XVECLEN (op, 0);
12910 rtx reg, mem, addr;
12911 unsigned regno;
12912 unsigned first_regno;
12913 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12914 rtx elt;
12915 bool addr_reg_in_reglist = false;
12916 bool update = false;
12917 int reg_increment;
12918 int offset_adj;
12919 int regs_per_val;
12920
12921 /* If not in SImode, then registers must be consecutive
12922 (e.g., VLDM instructions for DFmode). */
12923 gcc_assert ((mode == SImode) || consecutive);
12924 /* Setting return_pc for stores is illegal. */
12925 gcc_assert (!return_pc || load);
12926
12927 /* Set up the increments and the regs per val based on the mode. */
12928 reg_increment = GET_MODE_SIZE (mode);
12929 regs_per_val = reg_increment / 4;
12930 offset_adj = return_pc ? 1 : 0;
12931
12932 if (count <= 1
12933 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12934 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12935 return false;
12936
12937 /* Check if this is a write-back. */
12938 elt = XVECEXP (op, 0, offset_adj);
12939 if (GET_CODE (SET_SRC (elt)) == PLUS)
12940 {
12941 i++;
12942 base = 1;
12943 update = true;
12944
12945 /* The offset adjustment must be the number of registers being
12946 popped times the size of a single register. */
12947 if (!REG_P (SET_DEST (elt))
12948 || !REG_P (XEXP (SET_SRC (elt), 0))
12949 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12950 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12951 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12952 ((count - 1 - offset_adj) * reg_increment))
12953 return false;
12954 }
12955
12956 i = i + offset_adj;
12957 base = base + offset_adj;
12958 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12959 success depends on the type: VLDM can do just one reg,
12960 LDM must do at least two. */
12961 if ((count <= i) && (mode == SImode))
12962 return false;
12963
12964 elt = XVECEXP (op, 0, i - 1);
12965 if (GET_CODE (elt) != SET)
12966 return false;
12967
12968 if (load)
12969 {
12970 reg = SET_DEST (elt);
12971 mem = SET_SRC (elt);
12972 }
12973 else
12974 {
12975 reg = SET_SRC (elt);
12976 mem = SET_DEST (elt);
12977 }
12978
12979 if (!REG_P (reg) || !MEM_P (mem))
12980 return false;
12981
12982 regno = REGNO (reg);
12983 first_regno = regno;
12984 addr = XEXP (mem, 0);
12985 if (GET_CODE (addr) == PLUS)
12986 {
12987 if (!CONST_INT_P (XEXP (addr, 1)))
12988 return false;
12989
12990 offset = INTVAL (XEXP (addr, 1));
12991 addr = XEXP (addr, 0);
12992 }
12993
12994 if (!REG_P (addr))
12995 return false;
12996
12997 /* Don't allow SP to be loaded unless it is also the base register. It
12998 guarantees that SP is reset correctly when an LDM instruction
12999 is interrupted. Otherwise, we might end up with a corrupt stack. */
13000 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13001 return false;
13002
13003 for (; i < count; i++)
13004 {
13005 elt = XVECEXP (op, 0, i);
13006 if (GET_CODE (elt) != SET)
13007 return false;
13008
13009 if (load)
13010 {
13011 reg = SET_DEST (elt);
13012 mem = SET_SRC (elt);
13013 }
13014 else
13015 {
13016 reg = SET_SRC (elt);
13017 mem = SET_DEST (elt);
13018 }
13019
13020 if (!REG_P (reg)
13021 || GET_MODE (reg) != mode
13022 || REGNO (reg) <= regno
13023 || (consecutive
13024 && (REGNO (reg) !=
13025 (unsigned int) (first_regno + regs_per_val * (i - base))))
13026 /* Don't allow SP to be loaded unless it is also the base register. It
13027 guarantees that SP is reset correctly when an LDM instruction
13028 is interrupted. Otherwise, we might end up with a corrupt stack. */
13029 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13030 || !MEM_P (mem)
13031 || GET_MODE (mem) != mode
13032 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13033 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13034 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13035 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13036 offset + (i - base) * reg_increment))
13037 && (!REG_P (XEXP (mem, 0))
13038 || offset + (i - base) * reg_increment != 0)))
13039 return false;
13040
13041 regno = REGNO (reg);
13042 if (regno == REGNO (addr))
13043 addr_reg_in_reglist = true;
13044 }
13045
13046 if (load)
13047 {
13048 if (update && addr_reg_in_reglist)
13049 return false;
13050
13051 /* For Thumb-1, address register is always modified - either by write-back
13052 or by explicit load. If the pattern does not describe an update,
13053 then the address register must be in the list of loaded registers. */
13054 if (TARGET_THUMB1)
13055 return update || addr_reg_in_reglist;
13056 }
13057
13058 return true;
13059 }
13060
13061 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13062 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13063 instruction. ADD_OFFSET is nonzero if the base address register needs
13064 to be modified with an add instruction before we can use it. */
13065
13066 static bool
13067 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13068 int nops, HOST_WIDE_INT add_offset)
13069 {
13070 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13071 if the offset isn't small enough. The reason 2 ldrs are faster
13072 is because these ARMs are able to do more than one cache access
13073 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13074 whilst the ARM8 has a double bandwidth cache. This means that
13075 these cores can do both an instruction fetch and a data fetch in
13076 a single cycle, so the trick of calculating the address into a
13077 scratch register (one of the result regs) and then doing a load
13078 multiple actually becomes slower (and no smaller in code size).
13079 That is the transformation
13080
13081 ldr rd1, [rbase + offset]
13082 ldr rd2, [rbase + offset + 4]
13083
13084 to
13085
13086 add rd1, rbase, offset
13087 ldmia rd1, {rd1, rd2}
13088
13089 produces worse code -- '3 cycles + any stalls on rd2' instead of
13090 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13091 access per cycle, the first sequence could never complete in less
13092 than 6 cycles, whereas the ldm sequence would only take 5 and
13093 would make better use of sequential accesses if not hitting the
13094 cache.
13095
13096 We cheat here and test 'arm_ld_sched' which we currently know to
13097 only be true for the ARM8, ARM9 and StrongARM. If this ever
13098 changes, then the test below needs to be reworked. */
13099 if (nops == 2 && arm_ld_sched && add_offset != 0)
13100 return false;
13101
13102 /* XScale has load-store double instructions, but they have stricter
13103 alignment requirements than load-store multiple, so we cannot
13104 use them.
13105
13106 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13107 the pipeline until completion.
13108
13109 NREGS CYCLES
13110 1 3
13111 2 4
13112 3 5
13113 4 6
13114
13115 An ldr instruction takes 1-3 cycles, but does not block the
13116 pipeline.
13117
13118 NREGS CYCLES
13119 1 1-3
13120 2 2-6
13121 3 3-9
13122 4 4-12
13123
13124 Best case ldr will always win. However, the more ldr instructions
13125 we issue, the less likely we are to be able to schedule them well.
13126 Using ldr instructions also increases code size.
13127
13128 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13129 for counts of 3 or 4 regs. */
13130 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13131 return false;
13132 return true;
13133 }
13134
13135 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13136 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13137 an array ORDER which describes the sequence to use when accessing the
13138 offsets that produces an ascending order. In this sequence, each
13139 offset must be larger by exactly 4 than the previous one. ORDER[0]
13140 must have been filled in with the lowest offset by the caller.
13141 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13142 we use to verify that ORDER produces an ascending order of registers.
13143 Return true if it was possible to construct such an order, false if
13144 not. */
13145
13146 static bool
13147 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13148 int *unsorted_regs)
13149 {
13150 int i;
13151 for (i = 1; i < nops; i++)
13152 {
13153 int j;
13154
13155 order[i] = order[i - 1];
13156 for (j = 0; j < nops; j++)
13157 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13158 {
13159 /* We must find exactly one offset that is higher than the
13160 previous one by 4. */
13161 if (order[i] != order[i - 1])
13162 return false;
13163 order[i] = j;
13164 }
13165 if (order[i] == order[i - 1])
13166 return false;
13167 /* The register numbers must be ascending. */
13168 if (unsorted_regs != NULL
13169 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13170 return false;
13171 }
13172 return true;
13173 }
13174
13175 /* Used to determine in a peephole whether a sequence of load
13176 instructions can be changed into a load-multiple instruction.
13177 NOPS is the number of separate load instructions we are examining. The
13178 first NOPS entries in OPERANDS are the destination registers, the
13179 next NOPS entries are memory operands. If this function is
13180 successful, *BASE is set to the common base register of the memory
13181 accesses; *LOAD_OFFSET is set to the first memory location's offset
13182 from that base register.
13183 REGS is an array filled in with the destination register numbers.
13184 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13185 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13186 the sequence of registers in REGS matches the loads from ascending memory
13187 locations, and the function verifies that the register numbers are
13188 themselves ascending. If CHECK_REGS is false, the register numbers
13189 are stored in the order they are found in the operands. */
13190 static int
13191 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13192 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13193 {
13194 int unsorted_regs[MAX_LDM_STM_OPS];
13195 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13196 int order[MAX_LDM_STM_OPS];
13197 rtx base_reg_rtx = NULL;
13198 int base_reg = -1;
13199 int i, ldm_case;
13200
13201 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13202 easily extended if required. */
13203 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13204
13205 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13206
13207 /* Loop over the operands and check that the memory references are
13208 suitable (i.e. immediate offsets from the same base register). At
13209 the same time, extract the target register, and the memory
13210 offsets. */
13211 for (i = 0; i < nops; i++)
13212 {
13213 rtx reg;
13214 rtx offset;
13215
13216 /* Convert a subreg of a mem into the mem itself. */
13217 if (GET_CODE (operands[nops + i]) == SUBREG)
13218 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13219
13220 gcc_assert (MEM_P (operands[nops + i]));
13221
13222 /* Don't reorder volatile memory references; it doesn't seem worth
13223 looking for the case where the order is ok anyway. */
13224 if (MEM_VOLATILE_P (operands[nops + i]))
13225 return 0;
13226
13227 offset = const0_rtx;
13228
13229 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13230 || (GET_CODE (reg) == SUBREG
13231 && REG_P (reg = SUBREG_REG (reg))))
13232 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13233 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13234 || (GET_CODE (reg) == SUBREG
13235 && REG_P (reg = SUBREG_REG (reg))))
13236 && (CONST_INT_P (offset
13237 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13238 {
13239 if (i == 0)
13240 {
13241 base_reg = REGNO (reg);
13242 base_reg_rtx = reg;
13243 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13244 return 0;
13245 }
13246 else if (base_reg != (int) REGNO (reg))
13247 /* Not addressed from the same base register. */
13248 return 0;
13249
13250 unsorted_regs[i] = (REG_P (operands[i])
13251 ? REGNO (operands[i])
13252 : REGNO (SUBREG_REG (operands[i])));
13253
13254 /* If it isn't an integer register, or if it overwrites the
13255 base register but isn't the last insn in the list, then
13256 we can't do this. */
13257 if (unsorted_regs[i] < 0
13258 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13259 || unsorted_regs[i] > 14
13260 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13261 return 0;
13262
13263 /* Don't allow SP to be loaded unless it is also the base
13264 register. It guarantees that SP is reset correctly when
13265 an LDM instruction is interrupted. Otherwise, we might
13266 end up with a corrupt stack. */
13267 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13268 return 0;
13269
13270 unsorted_offsets[i] = INTVAL (offset);
13271 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13272 order[0] = i;
13273 }
13274 else
13275 /* Not a suitable memory address. */
13276 return 0;
13277 }
13278
13279 /* All the useful information has now been extracted from the
13280 operands into unsorted_regs and unsorted_offsets; additionally,
13281 order[0] has been set to the lowest offset in the list. Sort
13282 the offsets into order, verifying that they are adjacent, and
13283 check that the register numbers are ascending. */
13284 if (!compute_offset_order (nops, unsorted_offsets, order,
13285 check_regs ? unsorted_regs : NULL))
13286 return 0;
13287
13288 if (saved_order)
13289 memcpy (saved_order, order, sizeof order);
13290
13291 if (base)
13292 {
13293 *base = base_reg;
13294
13295 for (i = 0; i < nops; i++)
13296 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13297
13298 *load_offset = unsorted_offsets[order[0]];
13299 }
13300
13301 if (TARGET_THUMB1
13302 && !peep2_reg_dead_p (nops, base_reg_rtx))
13303 return 0;
13304
13305 if (unsorted_offsets[order[0]] == 0)
13306 ldm_case = 1; /* ldmia */
13307 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13308 ldm_case = 2; /* ldmib */
13309 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13310 ldm_case = 3; /* ldmda */
13311 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13312 ldm_case = 4; /* ldmdb */
13313 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13314 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13315 ldm_case = 5;
13316 else
13317 return 0;
13318
13319 if (!multiple_operation_profitable_p (false, nops,
13320 ldm_case == 5
13321 ? unsorted_offsets[order[0]] : 0))
13322 return 0;
13323
13324 return ldm_case;
13325 }
13326
13327 /* Used to determine in a peephole whether a sequence of store instructions can
13328 be changed into a store-multiple instruction.
13329 NOPS is the number of separate store instructions we are examining.
13330 NOPS_TOTAL is the total number of instructions recognized by the peephole
13331 pattern.
13332 The first NOPS entries in OPERANDS are the source registers, the next
13333 NOPS entries are memory operands. If this function is successful, *BASE is
13334 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13335 to the first memory location's offset from that base register. REGS is an
13336 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13337 likewise filled with the corresponding rtx's.
13338 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13339 numbers to an ascending order of stores.
13340 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13341 from ascending memory locations, and the function verifies that the register
13342 numbers are themselves ascending. If CHECK_REGS is false, the register
13343 numbers are stored in the order they are found in the operands. */
13344 static int
13345 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13346 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13347 HOST_WIDE_INT *load_offset, bool check_regs)
13348 {
13349 int unsorted_regs[MAX_LDM_STM_OPS];
13350 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13351 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13352 int order[MAX_LDM_STM_OPS];
13353 int base_reg = -1;
13354 rtx base_reg_rtx = NULL;
13355 int i, stm_case;
13356
13357 /* Write back of base register is currently only supported for Thumb 1. */
13358 int base_writeback = TARGET_THUMB1;
13359
13360 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13361 easily extended if required. */
13362 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13363
13364 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13365
13366 /* Loop over the operands and check that the memory references are
13367 suitable (i.e. immediate offsets from the same base register). At
13368 the same time, extract the target register, and the memory
13369 offsets. */
13370 for (i = 0; i < nops; i++)
13371 {
13372 rtx reg;
13373 rtx offset;
13374
13375 /* Convert a subreg of a mem into the mem itself. */
13376 if (GET_CODE (operands[nops + i]) == SUBREG)
13377 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13378
13379 gcc_assert (MEM_P (operands[nops + i]));
13380
13381 /* Don't reorder volatile memory references; it doesn't seem worth
13382 looking for the case where the order is ok anyway. */
13383 if (MEM_VOLATILE_P (operands[nops + i]))
13384 return 0;
13385
13386 offset = const0_rtx;
13387
13388 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13389 || (GET_CODE (reg) == SUBREG
13390 && REG_P (reg = SUBREG_REG (reg))))
13391 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13392 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13393 || (GET_CODE (reg) == SUBREG
13394 && REG_P (reg = SUBREG_REG (reg))))
13395 && (CONST_INT_P (offset
13396 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13397 {
13398 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13399 ? operands[i] : SUBREG_REG (operands[i]));
13400 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13401
13402 if (i == 0)
13403 {
13404 base_reg = REGNO (reg);
13405 base_reg_rtx = reg;
13406 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13407 return 0;
13408 }
13409 else if (base_reg != (int) REGNO (reg))
13410 /* Not addressed from the same base register. */
13411 return 0;
13412
13413 /* If it isn't an integer register, then we can't do this. */
13414 if (unsorted_regs[i] < 0
13415 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13416 /* The effects are unpredictable if the base register is
13417 both updated and stored. */
13418 || (base_writeback && unsorted_regs[i] == base_reg)
13419 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13420 || unsorted_regs[i] > 14)
13421 return 0;
13422
13423 unsorted_offsets[i] = INTVAL (offset);
13424 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13425 order[0] = i;
13426 }
13427 else
13428 /* Not a suitable memory address. */
13429 return 0;
13430 }
13431
13432 /* All the useful information has now been extracted from the
13433 operands into unsorted_regs and unsorted_offsets; additionally,
13434 order[0] has been set to the lowest offset in the list. Sort
13435 the offsets into order, verifying that they are adjacent, and
13436 check that the register numbers are ascending. */
13437 if (!compute_offset_order (nops, unsorted_offsets, order,
13438 check_regs ? unsorted_regs : NULL))
13439 return 0;
13440
13441 if (saved_order)
13442 memcpy (saved_order, order, sizeof order);
13443
13444 if (base)
13445 {
13446 *base = base_reg;
13447
13448 for (i = 0; i < nops; i++)
13449 {
13450 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13451 if (reg_rtxs)
13452 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13453 }
13454
13455 *load_offset = unsorted_offsets[order[0]];
13456 }
13457
13458 if (TARGET_THUMB1
13459 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13460 return 0;
13461
13462 if (unsorted_offsets[order[0]] == 0)
13463 stm_case = 1; /* stmia */
13464 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13465 stm_case = 2; /* stmib */
13466 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13467 stm_case = 3; /* stmda */
13468 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13469 stm_case = 4; /* stmdb */
13470 else
13471 return 0;
13472
13473 if (!multiple_operation_profitable_p (false, nops, 0))
13474 return 0;
13475
13476 return stm_case;
13477 }
13478 \f
13479 /* Routines for use in generating RTL. */
13480
13481 /* Generate a load-multiple instruction. COUNT is the number of loads in
13482 the instruction; REGS and MEMS are arrays containing the operands.
13483 BASEREG is the base register to be used in addressing the memory operands.
13484 WBACK_OFFSET is nonzero if the instruction should update the base
13485 register. */
13486
13487 static rtx
13488 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13489 HOST_WIDE_INT wback_offset)
13490 {
13491 int i = 0, j;
13492 rtx result;
13493
13494 if (!multiple_operation_profitable_p (false, count, 0))
13495 {
13496 rtx seq;
13497
13498 start_sequence ();
13499
13500 for (i = 0; i < count; i++)
13501 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13502
13503 if (wback_offset != 0)
13504 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13505
13506 seq = get_insns ();
13507 end_sequence ();
13508
13509 return seq;
13510 }
13511
13512 result = gen_rtx_PARALLEL (VOIDmode,
13513 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13514 if (wback_offset != 0)
13515 {
13516 XVECEXP (result, 0, 0)
13517 = gen_rtx_SET (VOIDmode, basereg,
13518 plus_constant (Pmode, basereg, wback_offset));
13519 i = 1;
13520 count++;
13521 }
13522
13523 for (j = 0; i < count; i++, j++)
13524 XVECEXP (result, 0, i)
13525 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13526
13527 return result;
13528 }
13529
13530 /* Generate a store-multiple instruction. COUNT is the number of stores in
13531 the instruction; REGS and MEMS are arrays containing the operands.
13532 BASEREG is the base register to be used in addressing the memory operands.
13533 WBACK_OFFSET is nonzero if the instruction should update the base
13534 register. */
13535
13536 static rtx
13537 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13538 HOST_WIDE_INT wback_offset)
13539 {
13540 int i = 0, j;
13541 rtx result;
13542
13543 if (GET_CODE (basereg) == PLUS)
13544 basereg = XEXP (basereg, 0);
13545
13546 if (!multiple_operation_profitable_p (false, count, 0))
13547 {
13548 rtx seq;
13549
13550 start_sequence ();
13551
13552 for (i = 0; i < count; i++)
13553 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13554
13555 if (wback_offset != 0)
13556 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13557
13558 seq = get_insns ();
13559 end_sequence ();
13560
13561 return seq;
13562 }
13563
13564 result = gen_rtx_PARALLEL (VOIDmode,
13565 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13566 if (wback_offset != 0)
13567 {
13568 XVECEXP (result, 0, 0)
13569 = gen_rtx_SET (VOIDmode, basereg,
13570 plus_constant (Pmode, basereg, wback_offset));
13571 i = 1;
13572 count++;
13573 }
13574
13575 for (j = 0; i < count; i++, j++)
13576 XVECEXP (result, 0, i)
13577 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13578
13579 return result;
13580 }
13581
13582 /* Generate either a load-multiple or a store-multiple instruction. This
13583 function can be used in situations where we can start with a single MEM
13584 rtx and adjust its address upwards.
13585 COUNT is the number of operations in the instruction, not counting a
13586 possible update of the base register. REGS is an array containing the
13587 register operands.
13588 BASEREG is the base register to be used in addressing the memory operands,
13589 which are constructed from BASEMEM.
13590 WRITE_BACK specifies whether the generated instruction should include an
13591 update of the base register.
13592 OFFSETP is used to pass an offset to and from this function; this offset
13593 is not used when constructing the address (instead BASEMEM should have an
13594 appropriate offset in its address), it is used only for setting
13595 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13596
13597 static rtx
13598 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13599 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13600 {
13601 rtx mems[MAX_LDM_STM_OPS];
13602 HOST_WIDE_INT offset = *offsetp;
13603 int i;
13604
13605 gcc_assert (count <= MAX_LDM_STM_OPS);
13606
13607 if (GET_CODE (basereg) == PLUS)
13608 basereg = XEXP (basereg, 0);
13609
13610 for (i = 0; i < count; i++)
13611 {
13612 rtx addr = plus_constant (Pmode, basereg, i * 4);
13613 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13614 offset += 4;
13615 }
13616
13617 if (write_back)
13618 *offsetp = offset;
13619
13620 if (is_load)
13621 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13622 write_back ? 4 * count : 0);
13623 else
13624 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13625 write_back ? 4 * count : 0);
13626 }
13627
13628 rtx
13629 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13630 rtx basemem, HOST_WIDE_INT *offsetp)
13631 {
13632 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13633 offsetp);
13634 }
13635
13636 rtx
13637 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13638 rtx basemem, HOST_WIDE_INT *offsetp)
13639 {
13640 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13641 offsetp);
13642 }
13643
13644 /* Called from a peephole2 expander to turn a sequence of loads into an
13645 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13646 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13647 is true if we can reorder the registers because they are used commutatively
13648 subsequently.
13649 Returns true iff we could generate a new instruction. */
13650
13651 bool
13652 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13653 {
13654 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13655 rtx mems[MAX_LDM_STM_OPS];
13656 int i, j, base_reg;
13657 rtx base_reg_rtx;
13658 HOST_WIDE_INT offset;
13659 int write_back = FALSE;
13660 int ldm_case;
13661 rtx addr;
13662
13663 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13664 &base_reg, &offset, !sort_regs);
13665
13666 if (ldm_case == 0)
13667 return false;
13668
13669 if (sort_regs)
13670 for (i = 0; i < nops - 1; i++)
13671 for (j = i + 1; j < nops; j++)
13672 if (regs[i] > regs[j])
13673 {
13674 int t = regs[i];
13675 regs[i] = regs[j];
13676 regs[j] = t;
13677 }
13678 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13679
13680 if (TARGET_THUMB1)
13681 {
13682 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13683 gcc_assert (ldm_case == 1 || ldm_case == 5);
13684 write_back = TRUE;
13685 }
13686
13687 if (ldm_case == 5)
13688 {
13689 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13690 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13691 offset = 0;
13692 if (!TARGET_THUMB1)
13693 {
13694 base_reg = regs[0];
13695 base_reg_rtx = newbase;
13696 }
13697 }
13698
13699 for (i = 0; i < nops; i++)
13700 {
13701 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13702 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13703 SImode, addr, 0);
13704 }
13705 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13706 write_back ? offset + i * 4 : 0));
13707 return true;
13708 }
13709
13710 /* Called from a peephole2 expander to turn a sequence of stores into an
13711 STM instruction. OPERANDS are the operands found by the peephole matcher;
13712 NOPS indicates how many separate stores we are trying to combine.
13713 Returns true iff we could generate a new instruction. */
13714
13715 bool
13716 gen_stm_seq (rtx *operands, int nops)
13717 {
13718 int i;
13719 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13720 rtx mems[MAX_LDM_STM_OPS];
13721 int base_reg;
13722 rtx base_reg_rtx;
13723 HOST_WIDE_INT offset;
13724 int write_back = FALSE;
13725 int stm_case;
13726 rtx addr;
13727 bool base_reg_dies;
13728
13729 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13730 mem_order, &base_reg, &offset, true);
13731
13732 if (stm_case == 0)
13733 return false;
13734
13735 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13736
13737 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13738 if (TARGET_THUMB1)
13739 {
13740 gcc_assert (base_reg_dies);
13741 write_back = TRUE;
13742 }
13743
13744 if (stm_case == 5)
13745 {
13746 gcc_assert (base_reg_dies);
13747 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13748 offset = 0;
13749 }
13750
13751 addr = plus_constant (Pmode, base_reg_rtx, offset);
13752
13753 for (i = 0; i < nops; i++)
13754 {
13755 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13756 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13757 SImode, addr, 0);
13758 }
13759 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13760 write_back ? offset + i * 4 : 0));
13761 return true;
13762 }
13763
13764 /* Called from a peephole2 expander to turn a sequence of stores that are
13765 preceded by constant loads into an STM instruction. OPERANDS are the
13766 operands found by the peephole matcher; NOPS indicates how many
13767 separate stores we are trying to combine; there are 2 * NOPS
13768 instructions in the peephole.
13769 Returns true iff we could generate a new instruction. */
13770
13771 bool
13772 gen_const_stm_seq (rtx *operands, int nops)
13773 {
13774 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13775 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13776 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13777 rtx mems[MAX_LDM_STM_OPS];
13778 int base_reg;
13779 rtx base_reg_rtx;
13780 HOST_WIDE_INT offset;
13781 int write_back = FALSE;
13782 int stm_case;
13783 rtx addr;
13784 bool base_reg_dies;
13785 int i, j;
13786 HARD_REG_SET allocated;
13787
13788 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13789 mem_order, &base_reg, &offset, false);
13790
13791 if (stm_case == 0)
13792 return false;
13793
13794 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13795
13796 /* If the same register is used more than once, try to find a free
13797 register. */
13798 CLEAR_HARD_REG_SET (allocated);
13799 for (i = 0; i < nops; i++)
13800 {
13801 for (j = i + 1; j < nops; j++)
13802 if (regs[i] == regs[j])
13803 {
13804 rtx t = peep2_find_free_register (0, nops * 2,
13805 TARGET_THUMB1 ? "l" : "r",
13806 SImode, &allocated);
13807 if (t == NULL_RTX)
13808 return false;
13809 reg_rtxs[i] = t;
13810 regs[i] = REGNO (t);
13811 }
13812 }
13813
13814 /* Compute an ordering that maps the register numbers to an ascending
13815 sequence. */
13816 reg_order[0] = 0;
13817 for (i = 0; i < nops; i++)
13818 if (regs[i] < regs[reg_order[0]])
13819 reg_order[0] = i;
13820
13821 for (i = 1; i < nops; i++)
13822 {
13823 int this_order = reg_order[i - 1];
13824 for (j = 0; j < nops; j++)
13825 if (regs[j] > regs[reg_order[i - 1]]
13826 && (this_order == reg_order[i - 1]
13827 || regs[j] < regs[this_order]))
13828 this_order = j;
13829 reg_order[i] = this_order;
13830 }
13831
13832 /* Ensure that registers that must be live after the instruction end
13833 up with the correct value. */
13834 for (i = 0; i < nops; i++)
13835 {
13836 int this_order = reg_order[i];
13837 if ((this_order != mem_order[i]
13838 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13839 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13840 return false;
13841 }
13842
13843 /* Load the constants. */
13844 for (i = 0; i < nops; i++)
13845 {
13846 rtx op = operands[2 * nops + mem_order[i]];
13847 sorted_regs[i] = regs[reg_order[i]];
13848 emit_move_insn (reg_rtxs[reg_order[i]], op);
13849 }
13850
13851 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13852
13853 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13854 if (TARGET_THUMB1)
13855 {
13856 gcc_assert (base_reg_dies);
13857 write_back = TRUE;
13858 }
13859
13860 if (stm_case == 5)
13861 {
13862 gcc_assert (base_reg_dies);
13863 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13864 offset = 0;
13865 }
13866
13867 addr = plus_constant (Pmode, base_reg_rtx, offset);
13868
13869 for (i = 0; i < nops; i++)
13870 {
13871 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13872 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13873 SImode, addr, 0);
13874 }
13875 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13876 write_back ? offset + i * 4 : 0));
13877 return true;
13878 }
13879
13880 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13881 unaligned copies on processors which support unaligned semantics for those
13882 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13883 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13884 An interleave factor of 1 (the minimum) will perform no interleaving.
13885 Load/store multiple are used for aligned addresses where possible. */
13886
13887 static void
13888 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13889 HOST_WIDE_INT length,
13890 unsigned int interleave_factor)
13891 {
13892 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13893 int *regnos = XALLOCAVEC (int, interleave_factor);
13894 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13895 HOST_WIDE_INT i, j;
13896 HOST_WIDE_INT remaining = length, words;
13897 rtx halfword_tmp = NULL, byte_tmp = NULL;
13898 rtx dst, src;
13899 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13900 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13901 HOST_WIDE_INT srcoffset, dstoffset;
13902 HOST_WIDE_INT src_autoinc, dst_autoinc;
13903 rtx mem, addr;
13904
13905 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13906
13907 /* Use hard registers if we have aligned source or destination so we can use
13908 load/store multiple with contiguous registers. */
13909 if (dst_aligned || src_aligned)
13910 for (i = 0; i < interleave_factor; i++)
13911 regs[i] = gen_rtx_REG (SImode, i);
13912 else
13913 for (i = 0; i < interleave_factor; i++)
13914 regs[i] = gen_reg_rtx (SImode);
13915
13916 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13917 src = copy_addr_to_reg (XEXP (srcbase, 0));
13918
13919 srcoffset = dstoffset = 0;
13920
13921 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13922 For copying the last bytes we want to subtract this offset again. */
13923 src_autoinc = dst_autoinc = 0;
13924
13925 for (i = 0; i < interleave_factor; i++)
13926 regnos[i] = i;
13927
13928 /* Copy BLOCK_SIZE_BYTES chunks. */
13929
13930 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13931 {
13932 /* Load words. */
13933 if (src_aligned && interleave_factor > 1)
13934 {
13935 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13936 TRUE, srcbase, &srcoffset));
13937 src_autoinc += UNITS_PER_WORD * interleave_factor;
13938 }
13939 else
13940 {
13941 for (j = 0; j < interleave_factor; j++)
13942 {
13943 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13944 - src_autoinc));
13945 mem = adjust_automodify_address (srcbase, SImode, addr,
13946 srcoffset + j * UNITS_PER_WORD);
13947 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13948 }
13949 srcoffset += block_size_bytes;
13950 }
13951
13952 /* Store words. */
13953 if (dst_aligned && interleave_factor > 1)
13954 {
13955 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13956 TRUE, dstbase, &dstoffset));
13957 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13958 }
13959 else
13960 {
13961 for (j = 0; j < interleave_factor; j++)
13962 {
13963 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13964 - dst_autoinc));
13965 mem = adjust_automodify_address (dstbase, SImode, addr,
13966 dstoffset + j * UNITS_PER_WORD);
13967 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13968 }
13969 dstoffset += block_size_bytes;
13970 }
13971
13972 remaining -= block_size_bytes;
13973 }
13974
13975 /* Copy any whole words left (note these aren't interleaved with any
13976 subsequent halfword/byte load/stores in the interests of simplicity). */
13977
13978 words = remaining / UNITS_PER_WORD;
13979
13980 gcc_assert (words < interleave_factor);
13981
13982 if (src_aligned && words > 1)
13983 {
13984 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13985 &srcoffset));
13986 src_autoinc += UNITS_PER_WORD * words;
13987 }
13988 else
13989 {
13990 for (j = 0; j < words; j++)
13991 {
13992 addr = plus_constant (Pmode, src,
13993 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13994 mem = adjust_automodify_address (srcbase, SImode, addr,
13995 srcoffset + j * UNITS_PER_WORD);
13996 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13997 }
13998 srcoffset += words * UNITS_PER_WORD;
13999 }
14000
14001 if (dst_aligned && words > 1)
14002 {
14003 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14004 &dstoffset));
14005 dst_autoinc += words * UNITS_PER_WORD;
14006 }
14007 else
14008 {
14009 for (j = 0; j < words; j++)
14010 {
14011 addr = plus_constant (Pmode, dst,
14012 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14013 mem = adjust_automodify_address (dstbase, SImode, addr,
14014 dstoffset + j * UNITS_PER_WORD);
14015 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14016 }
14017 dstoffset += words * UNITS_PER_WORD;
14018 }
14019
14020 remaining -= words * UNITS_PER_WORD;
14021
14022 gcc_assert (remaining < 4);
14023
14024 /* Copy a halfword if necessary. */
14025
14026 if (remaining >= 2)
14027 {
14028 halfword_tmp = gen_reg_rtx (SImode);
14029
14030 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14031 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14032 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14033
14034 /* Either write out immediately, or delay until we've loaded the last
14035 byte, depending on interleave factor. */
14036 if (interleave_factor == 1)
14037 {
14038 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14039 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14040 emit_insn (gen_unaligned_storehi (mem,
14041 gen_lowpart (HImode, halfword_tmp)));
14042 halfword_tmp = NULL;
14043 dstoffset += 2;
14044 }
14045
14046 remaining -= 2;
14047 srcoffset += 2;
14048 }
14049
14050 gcc_assert (remaining < 2);
14051
14052 /* Copy last byte. */
14053
14054 if ((remaining & 1) != 0)
14055 {
14056 byte_tmp = gen_reg_rtx (SImode);
14057
14058 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14059 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14060 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14061
14062 if (interleave_factor == 1)
14063 {
14064 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14065 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14066 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14067 byte_tmp = NULL;
14068 dstoffset++;
14069 }
14070
14071 remaining--;
14072 srcoffset++;
14073 }
14074
14075 /* Store last halfword if we haven't done so already. */
14076
14077 if (halfword_tmp)
14078 {
14079 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14080 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14081 emit_insn (gen_unaligned_storehi (mem,
14082 gen_lowpart (HImode, halfword_tmp)));
14083 dstoffset += 2;
14084 }
14085
14086 /* Likewise for last byte. */
14087
14088 if (byte_tmp)
14089 {
14090 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14091 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14092 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14093 dstoffset++;
14094 }
14095
14096 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14097 }
14098
14099 /* From mips_adjust_block_mem:
14100
14101 Helper function for doing a loop-based block operation on memory
14102 reference MEM. Each iteration of the loop will operate on LENGTH
14103 bytes of MEM.
14104
14105 Create a new base register for use within the loop and point it to
14106 the start of MEM. Create a new memory reference that uses this
14107 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14108
14109 static void
14110 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14111 rtx *loop_mem)
14112 {
14113 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14114
14115 /* Although the new mem does not refer to a known location,
14116 it does keep up to LENGTH bytes of alignment. */
14117 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14118 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14119 }
14120
14121 /* From mips_block_move_loop:
14122
14123 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14124 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14125 the memory regions do not overlap. */
14126
14127 static void
14128 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14129 unsigned int interleave_factor,
14130 HOST_WIDE_INT bytes_per_iter)
14131 {
14132 rtx label, src_reg, dest_reg, final_src, test;
14133 HOST_WIDE_INT leftover;
14134
14135 leftover = length % bytes_per_iter;
14136 length -= leftover;
14137
14138 /* Create registers and memory references for use within the loop. */
14139 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14140 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14141
14142 /* Calculate the value that SRC_REG should have after the last iteration of
14143 the loop. */
14144 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14145 0, 0, OPTAB_WIDEN);
14146
14147 /* Emit the start of the loop. */
14148 label = gen_label_rtx ();
14149 emit_label (label);
14150
14151 /* Emit the loop body. */
14152 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14153 interleave_factor);
14154
14155 /* Move on to the next block. */
14156 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14157 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14158
14159 /* Emit the loop condition. */
14160 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14161 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14162
14163 /* Mop up any left-over bytes. */
14164 if (leftover)
14165 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14166 }
14167
14168 /* Emit a block move when either the source or destination is unaligned (not
14169 aligned to a four-byte boundary). This may need further tuning depending on
14170 core type, optimize_size setting, etc. */
14171
14172 static int
14173 arm_movmemqi_unaligned (rtx *operands)
14174 {
14175 HOST_WIDE_INT length = INTVAL (operands[2]);
14176
14177 if (optimize_size)
14178 {
14179 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14180 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14181 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14182 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14183 or dst_aligned though: allow more interleaving in those cases since the
14184 resulting code can be smaller. */
14185 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14186 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14187
14188 if (length > 12)
14189 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14190 interleave_factor, bytes_per_iter);
14191 else
14192 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14193 interleave_factor);
14194 }
14195 else
14196 {
14197 /* Note that the loop created by arm_block_move_unaligned_loop may be
14198 subject to loop unrolling, which makes tuning this condition a little
14199 redundant. */
14200 if (length > 32)
14201 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14202 else
14203 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14204 }
14205
14206 return 1;
14207 }
14208
14209 int
14210 arm_gen_movmemqi (rtx *operands)
14211 {
14212 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14213 HOST_WIDE_INT srcoffset, dstoffset;
14214 int i;
14215 rtx src, dst, srcbase, dstbase;
14216 rtx part_bytes_reg = NULL;
14217 rtx mem;
14218
14219 if (!CONST_INT_P (operands[2])
14220 || !CONST_INT_P (operands[3])
14221 || INTVAL (operands[2]) > 64)
14222 return 0;
14223
14224 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14225 return arm_movmemqi_unaligned (operands);
14226
14227 if (INTVAL (operands[3]) & 3)
14228 return 0;
14229
14230 dstbase = operands[0];
14231 srcbase = operands[1];
14232
14233 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14234 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14235
14236 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14237 out_words_to_go = INTVAL (operands[2]) / 4;
14238 last_bytes = INTVAL (operands[2]) & 3;
14239 dstoffset = srcoffset = 0;
14240
14241 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14242 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14243
14244 for (i = 0; in_words_to_go >= 2; i+=4)
14245 {
14246 if (in_words_to_go > 4)
14247 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14248 TRUE, srcbase, &srcoffset));
14249 else
14250 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14251 src, FALSE, srcbase,
14252 &srcoffset));
14253
14254 if (out_words_to_go)
14255 {
14256 if (out_words_to_go > 4)
14257 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14258 TRUE, dstbase, &dstoffset));
14259 else if (out_words_to_go != 1)
14260 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14261 out_words_to_go, dst,
14262 (last_bytes == 0
14263 ? FALSE : TRUE),
14264 dstbase, &dstoffset));
14265 else
14266 {
14267 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14268 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14269 if (last_bytes != 0)
14270 {
14271 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14272 dstoffset += 4;
14273 }
14274 }
14275 }
14276
14277 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14278 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14279 }
14280
14281 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14282 if (out_words_to_go)
14283 {
14284 rtx sreg;
14285
14286 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14287 sreg = copy_to_reg (mem);
14288
14289 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14290 emit_move_insn (mem, sreg);
14291 in_words_to_go--;
14292
14293 gcc_assert (!in_words_to_go); /* Sanity check */
14294 }
14295
14296 if (in_words_to_go)
14297 {
14298 gcc_assert (in_words_to_go > 0);
14299
14300 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14301 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14302 }
14303
14304 gcc_assert (!last_bytes || part_bytes_reg);
14305
14306 if (BYTES_BIG_ENDIAN && last_bytes)
14307 {
14308 rtx tmp = gen_reg_rtx (SImode);
14309
14310 /* The bytes we want are in the top end of the word. */
14311 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14312 GEN_INT (8 * (4 - last_bytes))));
14313 part_bytes_reg = tmp;
14314
14315 while (last_bytes)
14316 {
14317 mem = adjust_automodify_address (dstbase, QImode,
14318 plus_constant (Pmode, dst,
14319 last_bytes - 1),
14320 dstoffset + last_bytes - 1);
14321 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14322
14323 if (--last_bytes)
14324 {
14325 tmp = gen_reg_rtx (SImode);
14326 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14327 part_bytes_reg = tmp;
14328 }
14329 }
14330
14331 }
14332 else
14333 {
14334 if (last_bytes > 1)
14335 {
14336 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14337 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14338 last_bytes -= 2;
14339 if (last_bytes)
14340 {
14341 rtx tmp = gen_reg_rtx (SImode);
14342 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14343 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14344 part_bytes_reg = tmp;
14345 dstoffset += 2;
14346 }
14347 }
14348
14349 if (last_bytes)
14350 {
14351 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14352 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14353 }
14354 }
14355
14356 return 1;
14357 }
14358
14359 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14360 by mode size. */
14361 inline static rtx
14362 next_consecutive_mem (rtx mem)
14363 {
14364 enum machine_mode mode = GET_MODE (mem);
14365 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14366 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14367
14368 return adjust_automodify_address (mem, mode, addr, offset);
14369 }
14370
14371 /* Copy using LDRD/STRD instructions whenever possible.
14372 Returns true upon success. */
14373 bool
14374 gen_movmem_ldrd_strd (rtx *operands)
14375 {
14376 unsigned HOST_WIDE_INT len;
14377 HOST_WIDE_INT align;
14378 rtx src, dst, base;
14379 rtx reg0;
14380 bool src_aligned, dst_aligned;
14381 bool src_volatile, dst_volatile;
14382
14383 gcc_assert (CONST_INT_P (operands[2]));
14384 gcc_assert (CONST_INT_P (operands[3]));
14385
14386 len = UINTVAL (operands[2]);
14387 if (len > 64)
14388 return false;
14389
14390 /* Maximum alignment we can assume for both src and dst buffers. */
14391 align = INTVAL (operands[3]);
14392
14393 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14394 return false;
14395
14396 /* Place src and dst addresses in registers
14397 and update the corresponding mem rtx. */
14398 dst = operands[0];
14399 dst_volatile = MEM_VOLATILE_P (dst);
14400 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14401 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14402 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14403
14404 src = operands[1];
14405 src_volatile = MEM_VOLATILE_P (src);
14406 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14407 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14408 src = adjust_automodify_address (src, VOIDmode, base, 0);
14409
14410 if (!unaligned_access && !(src_aligned && dst_aligned))
14411 return false;
14412
14413 if (src_volatile || dst_volatile)
14414 return false;
14415
14416 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14417 if (!(dst_aligned || src_aligned))
14418 return arm_gen_movmemqi (operands);
14419
14420 src = adjust_address (src, DImode, 0);
14421 dst = adjust_address (dst, DImode, 0);
14422 while (len >= 8)
14423 {
14424 len -= 8;
14425 reg0 = gen_reg_rtx (DImode);
14426 if (src_aligned)
14427 emit_move_insn (reg0, src);
14428 else
14429 emit_insn (gen_unaligned_loaddi (reg0, src));
14430
14431 if (dst_aligned)
14432 emit_move_insn (dst, reg0);
14433 else
14434 emit_insn (gen_unaligned_storedi (dst, reg0));
14435
14436 src = next_consecutive_mem (src);
14437 dst = next_consecutive_mem (dst);
14438 }
14439
14440 gcc_assert (len < 8);
14441 if (len >= 4)
14442 {
14443 /* More than a word but less than a double-word to copy. Copy a word. */
14444 reg0 = gen_reg_rtx (SImode);
14445 src = adjust_address (src, SImode, 0);
14446 dst = adjust_address (dst, SImode, 0);
14447 if (src_aligned)
14448 emit_move_insn (reg0, src);
14449 else
14450 emit_insn (gen_unaligned_loadsi (reg0, src));
14451
14452 if (dst_aligned)
14453 emit_move_insn (dst, reg0);
14454 else
14455 emit_insn (gen_unaligned_storesi (dst, reg0));
14456
14457 src = next_consecutive_mem (src);
14458 dst = next_consecutive_mem (dst);
14459 len -= 4;
14460 }
14461
14462 if (len == 0)
14463 return true;
14464
14465 /* Copy the remaining bytes. */
14466 if (len >= 2)
14467 {
14468 dst = adjust_address (dst, HImode, 0);
14469 src = adjust_address (src, HImode, 0);
14470 reg0 = gen_reg_rtx (SImode);
14471 if (src_aligned)
14472 emit_insn (gen_zero_extendhisi2 (reg0, src));
14473 else
14474 emit_insn (gen_unaligned_loadhiu (reg0, src));
14475
14476 if (dst_aligned)
14477 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14478 else
14479 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14480
14481 src = next_consecutive_mem (src);
14482 dst = next_consecutive_mem (dst);
14483 if (len == 2)
14484 return true;
14485 }
14486
14487 dst = adjust_address (dst, QImode, 0);
14488 src = adjust_address (src, QImode, 0);
14489 reg0 = gen_reg_rtx (QImode);
14490 emit_move_insn (reg0, src);
14491 emit_move_insn (dst, reg0);
14492 return true;
14493 }
14494
14495 /* Select a dominance comparison mode if possible for a test of the general
14496 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14497 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14498 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14499 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14500 In all cases OP will be either EQ or NE, but we don't need to know which
14501 here. If we are unable to support a dominance comparison we return
14502 CC mode. This will then fail to match for the RTL expressions that
14503 generate this call. */
14504 enum machine_mode
14505 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14506 {
14507 enum rtx_code cond1, cond2;
14508 int swapped = 0;
14509
14510 /* Currently we will probably get the wrong result if the individual
14511 comparisons are not simple. This also ensures that it is safe to
14512 reverse a comparison if necessary. */
14513 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14514 != CCmode)
14515 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14516 != CCmode))
14517 return CCmode;
14518
14519 /* The if_then_else variant of this tests the second condition if the
14520 first passes, but is true if the first fails. Reverse the first
14521 condition to get a true "inclusive-or" expression. */
14522 if (cond_or == DOM_CC_NX_OR_Y)
14523 cond1 = reverse_condition (cond1);
14524
14525 /* If the comparisons are not equal, and one doesn't dominate the other,
14526 then we can't do this. */
14527 if (cond1 != cond2
14528 && !comparison_dominates_p (cond1, cond2)
14529 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14530 return CCmode;
14531
14532 if (swapped)
14533 {
14534 enum rtx_code temp = cond1;
14535 cond1 = cond2;
14536 cond2 = temp;
14537 }
14538
14539 switch (cond1)
14540 {
14541 case EQ:
14542 if (cond_or == DOM_CC_X_AND_Y)
14543 return CC_DEQmode;
14544
14545 switch (cond2)
14546 {
14547 case EQ: return CC_DEQmode;
14548 case LE: return CC_DLEmode;
14549 case LEU: return CC_DLEUmode;
14550 case GE: return CC_DGEmode;
14551 case GEU: return CC_DGEUmode;
14552 default: gcc_unreachable ();
14553 }
14554
14555 case LT:
14556 if (cond_or == DOM_CC_X_AND_Y)
14557 return CC_DLTmode;
14558
14559 switch (cond2)
14560 {
14561 case LT:
14562 return CC_DLTmode;
14563 case LE:
14564 return CC_DLEmode;
14565 case NE:
14566 return CC_DNEmode;
14567 default:
14568 gcc_unreachable ();
14569 }
14570
14571 case GT:
14572 if (cond_or == DOM_CC_X_AND_Y)
14573 return CC_DGTmode;
14574
14575 switch (cond2)
14576 {
14577 case GT:
14578 return CC_DGTmode;
14579 case GE:
14580 return CC_DGEmode;
14581 case NE:
14582 return CC_DNEmode;
14583 default:
14584 gcc_unreachable ();
14585 }
14586
14587 case LTU:
14588 if (cond_or == DOM_CC_X_AND_Y)
14589 return CC_DLTUmode;
14590
14591 switch (cond2)
14592 {
14593 case LTU:
14594 return CC_DLTUmode;
14595 case LEU:
14596 return CC_DLEUmode;
14597 case NE:
14598 return CC_DNEmode;
14599 default:
14600 gcc_unreachable ();
14601 }
14602
14603 case GTU:
14604 if (cond_or == DOM_CC_X_AND_Y)
14605 return CC_DGTUmode;
14606
14607 switch (cond2)
14608 {
14609 case GTU:
14610 return CC_DGTUmode;
14611 case GEU:
14612 return CC_DGEUmode;
14613 case NE:
14614 return CC_DNEmode;
14615 default:
14616 gcc_unreachable ();
14617 }
14618
14619 /* The remaining cases only occur when both comparisons are the
14620 same. */
14621 case NE:
14622 gcc_assert (cond1 == cond2);
14623 return CC_DNEmode;
14624
14625 case LE:
14626 gcc_assert (cond1 == cond2);
14627 return CC_DLEmode;
14628
14629 case GE:
14630 gcc_assert (cond1 == cond2);
14631 return CC_DGEmode;
14632
14633 case LEU:
14634 gcc_assert (cond1 == cond2);
14635 return CC_DLEUmode;
14636
14637 case GEU:
14638 gcc_assert (cond1 == cond2);
14639 return CC_DGEUmode;
14640
14641 default:
14642 gcc_unreachable ();
14643 }
14644 }
14645
14646 enum machine_mode
14647 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14648 {
14649 /* All floating point compares return CCFP if it is an equality
14650 comparison, and CCFPE otherwise. */
14651 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14652 {
14653 switch (op)
14654 {
14655 case EQ:
14656 case NE:
14657 case UNORDERED:
14658 case ORDERED:
14659 case UNLT:
14660 case UNLE:
14661 case UNGT:
14662 case UNGE:
14663 case UNEQ:
14664 case LTGT:
14665 return CCFPmode;
14666
14667 case LT:
14668 case LE:
14669 case GT:
14670 case GE:
14671 return CCFPEmode;
14672
14673 default:
14674 gcc_unreachable ();
14675 }
14676 }
14677
14678 /* A compare with a shifted operand. Because of canonicalization, the
14679 comparison will have to be swapped when we emit the assembler. */
14680 if (GET_MODE (y) == SImode
14681 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14682 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14683 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14684 || GET_CODE (x) == ROTATERT))
14685 return CC_SWPmode;
14686
14687 /* This operation is performed swapped, but since we only rely on the Z
14688 flag we don't need an additional mode. */
14689 if (GET_MODE (y) == SImode
14690 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14691 && GET_CODE (x) == NEG
14692 && (op == EQ || op == NE))
14693 return CC_Zmode;
14694
14695 /* This is a special case that is used by combine to allow a
14696 comparison of a shifted byte load to be split into a zero-extend
14697 followed by a comparison of the shifted integer (only valid for
14698 equalities and unsigned inequalities). */
14699 if (GET_MODE (x) == SImode
14700 && GET_CODE (x) == ASHIFT
14701 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14702 && GET_CODE (XEXP (x, 0)) == SUBREG
14703 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14704 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14705 && (op == EQ || op == NE
14706 || op == GEU || op == GTU || op == LTU || op == LEU)
14707 && CONST_INT_P (y))
14708 return CC_Zmode;
14709
14710 /* A construct for a conditional compare, if the false arm contains
14711 0, then both conditions must be true, otherwise either condition
14712 must be true. Not all conditions are possible, so CCmode is
14713 returned if it can't be done. */
14714 if (GET_CODE (x) == IF_THEN_ELSE
14715 && (XEXP (x, 2) == const0_rtx
14716 || XEXP (x, 2) == const1_rtx)
14717 && COMPARISON_P (XEXP (x, 0))
14718 && COMPARISON_P (XEXP (x, 1)))
14719 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14720 INTVAL (XEXP (x, 2)));
14721
14722 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14723 if (GET_CODE (x) == AND
14724 && (op == EQ || op == NE)
14725 && COMPARISON_P (XEXP (x, 0))
14726 && COMPARISON_P (XEXP (x, 1)))
14727 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14728 DOM_CC_X_AND_Y);
14729
14730 if (GET_CODE (x) == IOR
14731 && (op == EQ || op == NE)
14732 && COMPARISON_P (XEXP (x, 0))
14733 && COMPARISON_P (XEXP (x, 1)))
14734 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14735 DOM_CC_X_OR_Y);
14736
14737 /* An operation (on Thumb) where we want to test for a single bit.
14738 This is done by shifting that bit up into the top bit of a
14739 scratch register; we can then branch on the sign bit. */
14740 if (TARGET_THUMB1
14741 && GET_MODE (x) == SImode
14742 && (op == EQ || op == NE)
14743 && GET_CODE (x) == ZERO_EXTRACT
14744 && XEXP (x, 1) == const1_rtx)
14745 return CC_Nmode;
14746
14747 /* An operation that sets the condition codes as a side-effect, the
14748 V flag is not set correctly, so we can only use comparisons where
14749 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14750 instead.) */
14751 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14752 if (GET_MODE (x) == SImode
14753 && y == const0_rtx
14754 && (op == EQ || op == NE || op == LT || op == GE)
14755 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14756 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14757 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14758 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14759 || GET_CODE (x) == LSHIFTRT
14760 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14761 || GET_CODE (x) == ROTATERT
14762 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14763 return CC_NOOVmode;
14764
14765 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14766 return CC_Zmode;
14767
14768 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14769 && GET_CODE (x) == PLUS
14770 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14771 return CC_Cmode;
14772
14773 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14774 {
14775 switch (op)
14776 {
14777 case EQ:
14778 case NE:
14779 /* A DImode comparison against zero can be implemented by
14780 or'ing the two halves together. */
14781 if (y == const0_rtx)
14782 return CC_Zmode;
14783
14784 /* We can do an equality test in three Thumb instructions. */
14785 if (!TARGET_32BIT)
14786 return CC_Zmode;
14787
14788 /* FALLTHROUGH */
14789
14790 case LTU:
14791 case LEU:
14792 case GTU:
14793 case GEU:
14794 /* DImode unsigned comparisons can be implemented by cmp +
14795 cmpeq without a scratch register. Not worth doing in
14796 Thumb-2. */
14797 if (TARGET_32BIT)
14798 return CC_CZmode;
14799
14800 /* FALLTHROUGH */
14801
14802 case LT:
14803 case LE:
14804 case GT:
14805 case GE:
14806 /* DImode signed and unsigned comparisons can be implemented
14807 by cmp + sbcs with a scratch register, but that does not
14808 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14809 gcc_assert (op != EQ && op != NE);
14810 return CC_NCVmode;
14811
14812 default:
14813 gcc_unreachable ();
14814 }
14815 }
14816
14817 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14818 return GET_MODE (x);
14819
14820 return CCmode;
14821 }
14822
14823 /* X and Y are two things to compare using CODE. Emit the compare insn and
14824 return the rtx for register 0 in the proper mode. FP means this is a
14825 floating point compare: I don't think that it is needed on the arm. */
14826 rtx
14827 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14828 {
14829 enum machine_mode mode;
14830 rtx cc_reg;
14831 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14832
14833 /* We might have X as a constant, Y as a register because of the predicates
14834 used for cmpdi. If so, force X to a register here. */
14835 if (dimode_comparison && !REG_P (x))
14836 x = force_reg (DImode, x);
14837
14838 mode = SELECT_CC_MODE (code, x, y);
14839 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14840
14841 if (dimode_comparison
14842 && mode != CC_CZmode)
14843 {
14844 rtx clobber, set;
14845
14846 /* To compare two non-zero values for equality, XOR them and
14847 then compare against zero. Not used for ARM mode; there
14848 CC_CZmode is cheaper. */
14849 if (mode == CC_Zmode && y != const0_rtx)
14850 {
14851 gcc_assert (!reload_completed);
14852 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14853 y = const0_rtx;
14854 }
14855
14856 /* A scratch register is required. */
14857 if (reload_completed)
14858 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14859 else
14860 scratch = gen_rtx_SCRATCH (SImode);
14861
14862 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14863 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
14864 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14865 }
14866 else
14867 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14868
14869 return cc_reg;
14870 }
14871
14872 /* Generate a sequence of insns that will generate the correct return
14873 address mask depending on the physical architecture that the program
14874 is running on. */
14875 rtx
14876 arm_gen_return_addr_mask (void)
14877 {
14878 rtx reg = gen_reg_rtx (Pmode);
14879
14880 emit_insn (gen_return_addr_mask (reg));
14881 return reg;
14882 }
14883
14884 void
14885 arm_reload_in_hi (rtx *operands)
14886 {
14887 rtx ref = operands[1];
14888 rtx base, scratch;
14889 HOST_WIDE_INT offset = 0;
14890
14891 if (GET_CODE (ref) == SUBREG)
14892 {
14893 offset = SUBREG_BYTE (ref);
14894 ref = SUBREG_REG (ref);
14895 }
14896
14897 if (REG_P (ref))
14898 {
14899 /* We have a pseudo which has been spilt onto the stack; there
14900 are two cases here: the first where there is a simple
14901 stack-slot replacement and a second where the stack-slot is
14902 out of range, or is used as a subreg. */
14903 if (reg_equiv_mem (REGNO (ref)))
14904 {
14905 ref = reg_equiv_mem (REGNO (ref));
14906 base = find_replacement (&XEXP (ref, 0));
14907 }
14908 else
14909 /* The slot is out of range, or was dressed up in a SUBREG. */
14910 base = reg_equiv_address (REGNO (ref));
14911 }
14912 else
14913 base = find_replacement (&XEXP (ref, 0));
14914
14915 /* Handle the case where the address is too complex to be offset by 1. */
14916 if (GET_CODE (base) == MINUS
14917 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14918 {
14919 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14920
14921 emit_set_insn (base_plus, base);
14922 base = base_plus;
14923 }
14924 else if (GET_CODE (base) == PLUS)
14925 {
14926 /* The addend must be CONST_INT, or we would have dealt with it above. */
14927 HOST_WIDE_INT hi, lo;
14928
14929 offset += INTVAL (XEXP (base, 1));
14930 base = XEXP (base, 0);
14931
14932 /* Rework the address into a legal sequence of insns. */
14933 /* Valid range for lo is -4095 -> 4095 */
14934 lo = (offset >= 0
14935 ? (offset & 0xfff)
14936 : -((-offset) & 0xfff));
14937
14938 /* Corner case, if lo is the max offset then we would be out of range
14939 once we have added the additional 1 below, so bump the msb into the
14940 pre-loading insn(s). */
14941 if (lo == 4095)
14942 lo &= 0x7ff;
14943
14944 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14945 ^ (HOST_WIDE_INT) 0x80000000)
14946 - (HOST_WIDE_INT) 0x80000000);
14947
14948 gcc_assert (hi + lo == offset);
14949
14950 if (hi != 0)
14951 {
14952 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14953
14954 /* Get the base address; addsi3 knows how to handle constants
14955 that require more than one insn. */
14956 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14957 base = base_plus;
14958 offset = lo;
14959 }
14960 }
14961
14962 /* Operands[2] may overlap operands[0] (though it won't overlap
14963 operands[1]), that's why we asked for a DImode reg -- so we can
14964 use the bit that does not overlap. */
14965 if (REGNO (operands[2]) == REGNO (operands[0]))
14966 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14967 else
14968 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14969
14970 emit_insn (gen_zero_extendqisi2 (scratch,
14971 gen_rtx_MEM (QImode,
14972 plus_constant (Pmode, base,
14973 offset))));
14974 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14975 gen_rtx_MEM (QImode,
14976 plus_constant (Pmode, base,
14977 offset + 1))));
14978 if (!BYTES_BIG_ENDIAN)
14979 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14980 gen_rtx_IOR (SImode,
14981 gen_rtx_ASHIFT
14982 (SImode,
14983 gen_rtx_SUBREG (SImode, operands[0], 0),
14984 GEN_INT (8)),
14985 scratch));
14986 else
14987 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14988 gen_rtx_IOR (SImode,
14989 gen_rtx_ASHIFT (SImode, scratch,
14990 GEN_INT (8)),
14991 gen_rtx_SUBREG (SImode, operands[0], 0)));
14992 }
14993
14994 /* Handle storing a half-word to memory during reload by synthesizing as two
14995 byte stores. Take care not to clobber the input values until after we
14996 have moved them somewhere safe. This code assumes that if the DImode
14997 scratch in operands[2] overlaps either the input value or output address
14998 in some way, then that value must die in this insn (we absolutely need
14999 two scratch registers for some corner cases). */
15000 void
15001 arm_reload_out_hi (rtx *operands)
15002 {
15003 rtx ref = operands[0];
15004 rtx outval = operands[1];
15005 rtx base, scratch;
15006 HOST_WIDE_INT offset = 0;
15007
15008 if (GET_CODE (ref) == SUBREG)
15009 {
15010 offset = SUBREG_BYTE (ref);
15011 ref = SUBREG_REG (ref);
15012 }
15013
15014 if (REG_P (ref))
15015 {
15016 /* We have a pseudo which has been spilt onto the stack; there
15017 are two cases here: the first where there is a simple
15018 stack-slot replacement and a second where the stack-slot is
15019 out of range, or is used as a subreg. */
15020 if (reg_equiv_mem (REGNO (ref)))
15021 {
15022 ref = reg_equiv_mem (REGNO (ref));
15023 base = find_replacement (&XEXP (ref, 0));
15024 }
15025 else
15026 /* The slot is out of range, or was dressed up in a SUBREG. */
15027 base = reg_equiv_address (REGNO (ref));
15028 }
15029 else
15030 base = find_replacement (&XEXP (ref, 0));
15031
15032 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15033
15034 /* Handle the case where the address is too complex to be offset by 1. */
15035 if (GET_CODE (base) == MINUS
15036 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15037 {
15038 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15039
15040 /* Be careful not to destroy OUTVAL. */
15041 if (reg_overlap_mentioned_p (base_plus, outval))
15042 {
15043 /* Updating base_plus might destroy outval, see if we can
15044 swap the scratch and base_plus. */
15045 if (!reg_overlap_mentioned_p (scratch, outval))
15046 {
15047 rtx tmp = scratch;
15048 scratch = base_plus;
15049 base_plus = tmp;
15050 }
15051 else
15052 {
15053 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15054
15055 /* Be conservative and copy OUTVAL into the scratch now,
15056 this should only be necessary if outval is a subreg
15057 of something larger than a word. */
15058 /* XXX Might this clobber base? I can't see how it can,
15059 since scratch is known to overlap with OUTVAL, and
15060 must be wider than a word. */
15061 emit_insn (gen_movhi (scratch_hi, outval));
15062 outval = scratch_hi;
15063 }
15064 }
15065
15066 emit_set_insn (base_plus, base);
15067 base = base_plus;
15068 }
15069 else if (GET_CODE (base) == PLUS)
15070 {
15071 /* The addend must be CONST_INT, or we would have dealt with it above. */
15072 HOST_WIDE_INT hi, lo;
15073
15074 offset += INTVAL (XEXP (base, 1));
15075 base = XEXP (base, 0);
15076
15077 /* Rework the address into a legal sequence of insns. */
15078 /* Valid range for lo is -4095 -> 4095 */
15079 lo = (offset >= 0
15080 ? (offset & 0xfff)
15081 : -((-offset) & 0xfff));
15082
15083 /* Corner case, if lo is the max offset then we would be out of range
15084 once we have added the additional 1 below, so bump the msb into the
15085 pre-loading insn(s). */
15086 if (lo == 4095)
15087 lo &= 0x7ff;
15088
15089 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15090 ^ (HOST_WIDE_INT) 0x80000000)
15091 - (HOST_WIDE_INT) 0x80000000);
15092
15093 gcc_assert (hi + lo == offset);
15094
15095 if (hi != 0)
15096 {
15097 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15098
15099 /* Be careful not to destroy OUTVAL. */
15100 if (reg_overlap_mentioned_p (base_plus, outval))
15101 {
15102 /* Updating base_plus might destroy outval, see if we
15103 can swap the scratch and base_plus. */
15104 if (!reg_overlap_mentioned_p (scratch, outval))
15105 {
15106 rtx tmp = scratch;
15107 scratch = base_plus;
15108 base_plus = tmp;
15109 }
15110 else
15111 {
15112 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15113
15114 /* Be conservative and copy outval into scratch now,
15115 this should only be necessary if outval is a
15116 subreg of something larger than a word. */
15117 /* XXX Might this clobber base? I can't see how it
15118 can, since scratch is known to overlap with
15119 outval. */
15120 emit_insn (gen_movhi (scratch_hi, outval));
15121 outval = scratch_hi;
15122 }
15123 }
15124
15125 /* Get the base address; addsi3 knows how to handle constants
15126 that require more than one insn. */
15127 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15128 base = base_plus;
15129 offset = lo;
15130 }
15131 }
15132
15133 if (BYTES_BIG_ENDIAN)
15134 {
15135 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15136 plus_constant (Pmode, base,
15137 offset + 1)),
15138 gen_lowpart (QImode, outval)));
15139 emit_insn (gen_lshrsi3 (scratch,
15140 gen_rtx_SUBREG (SImode, outval, 0),
15141 GEN_INT (8)));
15142 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15143 offset)),
15144 gen_lowpart (QImode, scratch)));
15145 }
15146 else
15147 {
15148 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15149 offset)),
15150 gen_lowpart (QImode, outval)));
15151 emit_insn (gen_lshrsi3 (scratch,
15152 gen_rtx_SUBREG (SImode, outval, 0),
15153 GEN_INT (8)));
15154 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15155 plus_constant (Pmode, base,
15156 offset + 1)),
15157 gen_lowpart (QImode, scratch)));
15158 }
15159 }
15160
15161 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15162 (padded to the size of a word) should be passed in a register. */
15163
15164 static bool
15165 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
15166 {
15167 if (TARGET_AAPCS_BASED)
15168 return must_pass_in_stack_var_size (mode, type);
15169 else
15170 return must_pass_in_stack_var_size_or_pad (mode, type);
15171 }
15172
15173
15174 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15175 Return true if an argument passed on the stack should be padded upwards,
15176 i.e. if the least-significant byte has useful data.
15177 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15178 aggregate types are placed in the lowest memory address. */
15179
15180 bool
15181 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15182 {
15183 if (!TARGET_AAPCS_BASED)
15184 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15185
15186 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15187 return false;
15188
15189 return true;
15190 }
15191
15192
15193 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15194 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15195 register has useful data, and return the opposite if the most
15196 significant byte does. */
15197
15198 bool
15199 arm_pad_reg_upward (enum machine_mode mode,
15200 tree type, int first ATTRIBUTE_UNUSED)
15201 {
15202 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15203 {
15204 /* For AAPCS, small aggregates, small fixed-point types,
15205 and small complex types are always padded upwards. */
15206 if (type)
15207 {
15208 if ((AGGREGATE_TYPE_P (type)
15209 || TREE_CODE (type) == COMPLEX_TYPE
15210 || FIXED_POINT_TYPE_P (type))
15211 && int_size_in_bytes (type) <= 4)
15212 return true;
15213 }
15214 else
15215 {
15216 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15217 && GET_MODE_SIZE (mode) <= 4)
15218 return true;
15219 }
15220 }
15221
15222 /* Otherwise, use default padding. */
15223 return !BYTES_BIG_ENDIAN;
15224 }
15225
15226 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15227 assuming that the address in the base register is word aligned. */
15228 bool
15229 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15230 {
15231 HOST_WIDE_INT max_offset;
15232
15233 /* Offset must be a multiple of 4 in Thumb mode. */
15234 if (TARGET_THUMB2 && ((offset & 3) != 0))
15235 return false;
15236
15237 if (TARGET_THUMB2)
15238 max_offset = 1020;
15239 else if (TARGET_ARM)
15240 max_offset = 255;
15241 else
15242 return false;
15243
15244 return ((offset <= max_offset) && (offset >= -max_offset));
15245 }
15246
15247 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15248 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15249 Assumes that the address in the base register RN is word aligned. Pattern
15250 guarantees that both memory accesses use the same base register,
15251 the offsets are constants within the range, and the gap between the offsets is 4.
15252 If preload complete then check that registers are legal. WBACK indicates whether
15253 address is updated. LOAD indicates whether memory access is load or store. */
15254 bool
15255 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15256 bool wback, bool load)
15257 {
15258 unsigned int t, t2, n;
15259
15260 if (!reload_completed)
15261 return true;
15262
15263 if (!offset_ok_for_ldrd_strd (offset))
15264 return false;
15265
15266 t = REGNO (rt);
15267 t2 = REGNO (rt2);
15268 n = REGNO (rn);
15269
15270 if ((TARGET_THUMB2)
15271 && ((wback && (n == t || n == t2))
15272 || (t == SP_REGNUM)
15273 || (t == PC_REGNUM)
15274 || (t2 == SP_REGNUM)
15275 || (t2 == PC_REGNUM)
15276 || (!load && (n == PC_REGNUM))
15277 || (load && (t == t2))
15278 /* Triggers Cortex-M3 LDRD errata. */
15279 || (!wback && load && fix_cm3_ldrd && (n == t))))
15280 return false;
15281
15282 if ((TARGET_ARM)
15283 && ((wback && (n == t || n == t2))
15284 || (t2 == PC_REGNUM)
15285 || (t % 2 != 0) /* First destination register is not even. */
15286 || (t2 != t + 1)
15287 /* PC can be used as base register (for offset addressing only),
15288 but it is depricated. */
15289 || (n == PC_REGNUM)))
15290 return false;
15291
15292 return true;
15293 }
15294
15295 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15296 operand MEM's address contains an immediate offset from the base
15297 register and has no side effects, in which case it sets BASE and
15298 OFFSET accordingly. */
15299 static bool
15300 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15301 {
15302 rtx addr;
15303
15304 gcc_assert (base != NULL && offset != NULL);
15305
15306 /* TODO: Handle more general memory operand patterns, such as
15307 PRE_DEC and PRE_INC. */
15308
15309 if (side_effects_p (mem))
15310 return false;
15311
15312 /* Can't deal with subregs. */
15313 if (GET_CODE (mem) == SUBREG)
15314 return false;
15315
15316 gcc_assert (MEM_P (mem));
15317
15318 *offset = const0_rtx;
15319
15320 addr = XEXP (mem, 0);
15321
15322 /* If addr isn't valid for DImode, then we can't handle it. */
15323 if (!arm_legitimate_address_p (DImode, addr,
15324 reload_in_progress || reload_completed))
15325 return false;
15326
15327 if (REG_P (addr))
15328 {
15329 *base = addr;
15330 return true;
15331 }
15332 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15333 {
15334 *base = XEXP (addr, 0);
15335 *offset = XEXP (addr, 1);
15336 return (REG_P (*base) && CONST_INT_P (*offset));
15337 }
15338
15339 return false;
15340 }
15341
15342 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15343
15344 /* Called from a peephole2 to replace two word-size accesses with a
15345 single LDRD/STRD instruction. Returns true iff we can generate a
15346 new instruction sequence. That is, both accesses use the same base
15347 register and the gap between constant offsets is 4. This function
15348 may reorder its operands to match ldrd/strd RTL templates.
15349 OPERANDS are the operands found by the peephole matcher;
15350 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15351 corresponding memory operands. LOAD indicaates whether the access
15352 is load or store. CONST_STORE indicates a store of constant
15353 integer values held in OPERANDS[4,5] and assumes that the pattern
15354 is of length 4 insn, for the purpose of checking dead registers.
15355 COMMUTE indicates that register operands may be reordered. */
15356 bool
15357 gen_operands_ldrd_strd (rtx *operands, bool load,
15358 bool const_store, bool commute)
15359 {
15360 int nops = 2;
15361 HOST_WIDE_INT offsets[2], offset;
15362 rtx base = NULL_RTX;
15363 rtx cur_base, cur_offset, tmp;
15364 int i, gap;
15365 HARD_REG_SET regset;
15366
15367 gcc_assert (!const_store || !load);
15368 /* Check that the memory references are immediate offsets from the
15369 same base register. Extract the base register, the destination
15370 registers, and the corresponding memory offsets. */
15371 for (i = 0; i < nops; i++)
15372 {
15373 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15374 return false;
15375
15376 if (i == 0)
15377 base = cur_base;
15378 else if (REGNO (base) != REGNO (cur_base))
15379 return false;
15380
15381 offsets[i] = INTVAL (cur_offset);
15382 if (GET_CODE (operands[i]) == SUBREG)
15383 {
15384 tmp = SUBREG_REG (operands[i]);
15385 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15386 operands[i] = tmp;
15387 }
15388 }
15389
15390 /* Make sure there is no dependency between the individual loads. */
15391 if (load && REGNO (operands[0]) == REGNO (base))
15392 return false; /* RAW */
15393
15394 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15395 return false; /* WAW */
15396
15397 /* If the same input register is used in both stores
15398 when storing different constants, try to find a free register.
15399 For example, the code
15400 mov r0, 0
15401 str r0, [r2]
15402 mov r0, 1
15403 str r0, [r2, #4]
15404 can be transformed into
15405 mov r1, 0
15406 strd r1, r0, [r2]
15407 in Thumb mode assuming that r1 is free. */
15408 if (const_store
15409 && REGNO (operands[0]) == REGNO (operands[1])
15410 && INTVAL (operands[4]) != INTVAL (operands[5]))
15411 {
15412 if (TARGET_THUMB2)
15413 {
15414 CLEAR_HARD_REG_SET (regset);
15415 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15416 if (tmp == NULL_RTX)
15417 return false;
15418
15419 /* Use the new register in the first load to ensure that
15420 if the original input register is not dead after peephole,
15421 then it will have the correct constant value. */
15422 operands[0] = tmp;
15423 }
15424 else if (TARGET_ARM)
15425 {
15426 return false;
15427 int regno = REGNO (operands[0]);
15428 if (!peep2_reg_dead_p (4, operands[0]))
15429 {
15430 /* When the input register is even and is not dead after the
15431 pattern, it has to hold the second constant but we cannot
15432 form a legal STRD in ARM mode with this register as the second
15433 register. */
15434 if (regno % 2 == 0)
15435 return false;
15436
15437 /* Is regno-1 free? */
15438 SET_HARD_REG_SET (regset);
15439 CLEAR_HARD_REG_BIT(regset, regno - 1);
15440 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15441 if (tmp == NULL_RTX)
15442 return false;
15443
15444 operands[0] = tmp;
15445 }
15446 else
15447 {
15448 /* Find a DImode register. */
15449 CLEAR_HARD_REG_SET (regset);
15450 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15451 if (tmp != NULL_RTX)
15452 {
15453 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15454 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15455 }
15456 else
15457 {
15458 /* Can we use the input register to form a DI register? */
15459 SET_HARD_REG_SET (regset);
15460 CLEAR_HARD_REG_BIT(regset,
15461 regno % 2 == 0 ? regno + 1 : regno - 1);
15462 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15463 if (tmp == NULL_RTX)
15464 return false;
15465 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15466 }
15467 }
15468
15469 gcc_assert (operands[0] != NULL_RTX);
15470 gcc_assert (operands[1] != NULL_RTX);
15471 gcc_assert (REGNO (operands[0]) % 2 == 0);
15472 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15473 }
15474 }
15475
15476 /* Make sure the instructions are ordered with lower memory access first. */
15477 if (offsets[0] > offsets[1])
15478 {
15479 gap = offsets[0] - offsets[1];
15480 offset = offsets[1];
15481
15482 /* Swap the instructions such that lower memory is accessed first. */
15483 SWAP_RTX (operands[0], operands[1]);
15484 SWAP_RTX (operands[2], operands[3]);
15485 if (const_store)
15486 SWAP_RTX (operands[4], operands[5]);
15487 }
15488 else
15489 {
15490 gap = offsets[1] - offsets[0];
15491 offset = offsets[0];
15492 }
15493
15494 /* Make sure accesses are to consecutive memory locations. */
15495 if (gap != 4)
15496 return false;
15497
15498 /* Make sure we generate legal instructions. */
15499 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15500 false, load))
15501 return true;
15502
15503 /* In Thumb state, where registers are almost unconstrained, there
15504 is little hope to fix it. */
15505 if (TARGET_THUMB2)
15506 return false;
15507
15508 if (load && commute)
15509 {
15510 /* Try reordering registers. */
15511 SWAP_RTX (operands[0], operands[1]);
15512 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15513 false, load))
15514 return true;
15515 }
15516
15517 if (const_store)
15518 {
15519 /* If input registers are dead after this pattern, they can be
15520 reordered or replaced by other registers that are free in the
15521 current pattern. */
15522 if (!peep2_reg_dead_p (4, operands[0])
15523 || !peep2_reg_dead_p (4, operands[1]))
15524 return false;
15525
15526 /* Try to reorder the input registers. */
15527 /* For example, the code
15528 mov r0, 0
15529 mov r1, 1
15530 str r1, [r2]
15531 str r0, [r2, #4]
15532 can be transformed into
15533 mov r1, 0
15534 mov r0, 1
15535 strd r0, [r2]
15536 */
15537 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15538 false, false))
15539 {
15540 SWAP_RTX (operands[0], operands[1]);
15541 return true;
15542 }
15543
15544 /* Try to find a free DI register. */
15545 CLEAR_HARD_REG_SET (regset);
15546 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15547 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15548 while (true)
15549 {
15550 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15551 if (tmp == NULL_RTX)
15552 return false;
15553
15554 /* DREG must be an even-numbered register in DImode.
15555 Split it into SI registers. */
15556 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15557 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15558 gcc_assert (operands[0] != NULL_RTX);
15559 gcc_assert (operands[1] != NULL_RTX);
15560 gcc_assert (REGNO (operands[0]) % 2 == 0);
15561 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15562
15563 return (operands_ok_ldrd_strd (operands[0], operands[1],
15564 base, offset,
15565 false, load));
15566 }
15567 }
15568
15569 return false;
15570 }
15571 #undef SWAP_RTX
15572
15573
15574
15575 \f
15576 /* Print a symbolic form of X to the debug file, F. */
15577 static void
15578 arm_print_value (FILE *f, rtx x)
15579 {
15580 switch (GET_CODE (x))
15581 {
15582 case CONST_INT:
15583 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15584 return;
15585
15586 case CONST_DOUBLE:
15587 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15588 return;
15589
15590 case CONST_VECTOR:
15591 {
15592 int i;
15593
15594 fprintf (f, "<");
15595 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15596 {
15597 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15598 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15599 fputc (',', f);
15600 }
15601 fprintf (f, ">");
15602 }
15603 return;
15604
15605 case CONST_STRING:
15606 fprintf (f, "\"%s\"", XSTR (x, 0));
15607 return;
15608
15609 case SYMBOL_REF:
15610 fprintf (f, "`%s'", XSTR (x, 0));
15611 return;
15612
15613 case LABEL_REF:
15614 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15615 return;
15616
15617 case CONST:
15618 arm_print_value (f, XEXP (x, 0));
15619 return;
15620
15621 case PLUS:
15622 arm_print_value (f, XEXP (x, 0));
15623 fprintf (f, "+");
15624 arm_print_value (f, XEXP (x, 1));
15625 return;
15626
15627 case PC:
15628 fprintf (f, "pc");
15629 return;
15630
15631 default:
15632 fprintf (f, "????");
15633 return;
15634 }
15635 }
15636 \f
15637 /* Routines for manipulation of the constant pool. */
15638
15639 /* Arm instructions cannot load a large constant directly into a
15640 register; they have to come from a pc relative load. The constant
15641 must therefore be placed in the addressable range of the pc
15642 relative load. Depending on the precise pc relative load
15643 instruction the range is somewhere between 256 bytes and 4k. This
15644 means that we often have to dump a constant inside a function, and
15645 generate code to branch around it.
15646
15647 It is important to minimize this, since the branches will slow
15648 things down and make the code larger.
15649
15650 Normally we can hide the table after an existing unconditional
15651 branch so that there is no interruption of the flow, but in the
15652 worst case the code looks like this:
15653
15654 ldr rn, L1
15655 ...
15656 b L2
15657 align
15658 L1: .long value
15659 L2:
15660 ...
15661
15662 ldr rn, L3
15663 ...
15664 b L4
15665 align
15666 L3: .long value
15667 L4:
15668 ...
15669
15670 We fix this by performing a scan after scheduling, which notices
15671 which instructions need to have their operands fetched from the
15672 constant table and builds the table.
15673
15674 The algorithm starts by building a table of all the constants that
15675 need fixing up and all the natural barriers in the function (places
15676 where a constant table can be dropped without breaking the flow).
15677 For each fixup we note how far the pc-relative replacement will be
15678 able to reach and the offset of the instruction into the function.
15679
15680 Having built the table we then group the fixes together to form
15681 tables that are as large as possible (subject to addressing
15682 constraints) and emit each table of constants after the last
15683 barrier that is within range of all the instructions in the group.
15684 If a group does not contain a barrier, then we forcibly create one
15685 by inserting a jump instruction into the flow. Once the table has
15686 been inserted, the insns are then modified to reference the
15687 relevant entry in the pool.
15688
15689 Possible enhancements to the algorithm (not implemented) are:
15690
15691 1) For some processors and object formats, there may be benefit in
15692 aligning the pools to the start of cache lines; this alignment
15693 would need to be taken into account when calculating addressability
15694 of a pool. */
15695
15696 /* These typedefs are located at the start of this file, so that
15697 they can be used in the prototypes there. This comment is to
15698 remind readers of that fact so that the following structures
15699 can be understood more easily.
15700
15701 typedef struct minipool_node Mnode;
15702 typedef struct minipool_fixup Mfix; */
15703
15704 struct minipool_node
15705 {
15706 /* Doubly linked chain of entries. */
15707 Mnode * next;
15708 Mnode * prev;
15709 /* The maximum offset into the code that this entry can be placed. While
15710 pushing fixes for forward references, all entries are sorted in order
15711 of increasing max_address. */
15712 HOST_WIDE_INT max_address;
15713 /* Similarly for an entry inserted for a backwards ref. */
15714 HOST_WIDE_INT min_address;
15715 /* The number of fixes referencing this entry. This can become zero
15716 if we "unpush" an entry. In this case we ignore the entry when we
15717 come to emit the code. */
15718 int refcount;
15719 /* The offset from the start of the minipool. */
15720 HOST_WIDE_INT offset;
15721 /* The value in table. */
15722 rtx value;
15723 /* The mode of value. */
15724 enum machine_mode mode;
15725 /* The size of the value. With iWMMXt enabled
15726 sizes > 4 also imply an alignment of 8-bytes. */
15727 int fix_size;
15728 };
15729
15730 struct minipool_fixup
15731 {
15732 Mfix * next;
15733 rtx insn;
15734 HOST_WIDE_INT address;
15735 rtx * loc;
15736 enum machine_mode mode;
15737 int fix_size;
15738 rtx value;
15739 Mnode * minipool;
15740 HOST_WIDE_INT forwards;
15741 HOST_WIDE_INT backwards;
15742 };
15743
15744 /* Fixes less than a word need padding out to a word boundary. */
15745 #define MINIPOOL_FIX_SIZE(mode) \
15746 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15747
15748 static Mnode * minipool_vector_head;
15749 static Mnode * minipool_vector_tail;
15750 static rtx minipool_vector_label;
15751 static int minipool_pad;
15752
15753 /* The linked list of all minipool fixes required for this function. */
15754 Mfix * minipool_fix_head;
15755 Mfix * minipool_fix_tail;
15756 /* The fix entry for the current minipool, once it has been placed. */
15757 Mfix * minipool_barrier;
15758
15759 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15760 #define JUMP_TABLES_IN_TEXT_SECTION 0
15761 #endif
15762
15763 static HOST_WIDE_INT
15764 get_jump_table_size (rtx insn)
15765 {
15766 /* ADDR_VECs only take room if read-only data does into the text
15767 section. */
15768 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15769 {
15770 rtx body = PATTERN (insn);
15771 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15772 HOST_WIDE_INT size;
15773 HOST_WIDE_INT modesize;
15774
15775 modesize = GET_MODE_SIZE (GET_MODE (body));
15776 size = modesize * XVECLEN (body, elt);
15777 switch (modesize)
15778 {
15779 case 1:
15780 /* Round up size of TBB table to a halfword boundary. */
15781 size = (size + 1) & ~(HOST_WIDE_INT)1;
15782 break;
15783 case 2:
15784 /* No padding necessary for TBH. */
15785 break;
15786 case 4:
15787 /* Add two bytes for alignment on Thumb. */
15788 if (TARGET_THUMB)
15789 size += 2;
15790 break;
15791 default:
15792 gcc_unreachable ();
15793 }
15794 return size;
15795 }
15796
15797 return 0;
15798 }
15799
15800 /* Return the maximum amount of padding that will be inserted before
15801 label LABEL. */
15802
15803 static HOST_WIDE_INT
15804 get_label_padding (rtx label)
15805 {
15806 HOST_WIDE_INT align, min_insn_size;
15807
15808 align = 1 << label_to_alignment (label);
15809 min_insn_size = TARGET_THUMB ? 2 : 4;
15810 return align > min_insn_size ? align - min_insn_size : 0;
15811 }
15812
15813 /* Move a minipool fix MP from its current location to before MAX_MP.
15814 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15815 constraints may need updating. */
15816 static Mnode *
15817 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15818 HOST_WIDE_INT max_address)
15819 {
15820 /* The code below assumes these are different. */
15821 gcc_assert (mp != max_mp);
15822
15823 if (max_mp == NULL)
15824 {
15825 if (max_address < mp->max_address)
15826 mp->max_address = max_address;
15827 }
15828 else
15829 {
15830 if (max_address > max_mp->max_address - mp->fix_size)
15831 mp->max_address = max_mp->max_address - mp->fix_size;
15832 else
15833 mp->max_address = max_address;
15834
15835 /* Unlink MP from its current position. Since max_mp is non-null,
15836 mp->prev must be non-null. */
15837 mp->prev->next = mp->next;
15838 if (mp->next != NULL)
15839 mp->next->prev = mp->prev;
15840 else
15841 minipool_vector_tail = mp->prev;
15842
15843 /* Re-insert it before MAX_MP. */
15844 mp->next = max_mp;
15845 mp->prev = max_mp->prev;
15846 max_mp->prev = mp;
15847
15848 if (mp->prev != NULL)
15849 mp->prev->next = mp;
15850 else
15851 minipool_vector_head = mp;
15852 }
15853
15854 /* Save the new entry. */
15855 max_mp = mp;
15856
15857 /* Scan over the preceding entries and adjust their addresses as
15858 required. */
15859 while (mp->prev != NULL
15860 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15861 {
15862 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15863 mp = mp->prev;
15864 }
15865
15866 return max_mp;
15867 }
15868
15869 /* Add a constant to the minipool for a forward reference. Returns the
15870 node added or NULL if the constant will not fit in this pool. */
15871 static Mnode *
15872 add_minipool_forward_ref (Mfix *fix)
15873 {
15874 /* If set, max_mp is the first pool_entry that has a lower
15875 constraint than the one we are trying to add. */
15876 Mnode * max_mp = NULL;
15877 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15878 Mnode * mp;
15879
15880 /* If the minipool starts before the end of FIX->INSN then this FIX
15881 can not be placed into the current pool. Furthermore, adding the
15882 new constant pool entry may cause the pool to start FIX_SIZE bytes
15883 earlier. */
15884 if (minipool_vector_head &&
15885 (fix->address + get_attr_length (fix->insn)
15886 >= minipool_vector_head->max_address - fix->fix_size))
15887 return NULL;
15888
15889 /* Scan the pool to see if a constant with the same value has
15890 already been added. While we are doing this, also note the
15891 location where we must insert the constant if it doesn't already
15892 exist. */
15893 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15894 {
15895 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15896 && fix->mode == mp->mode
15897 && (!LABEL_P (fix->value)
15898 || (CODE_LABEL_NUMBER (fix->value)
15899 == CODE_LABEL_NUMBER (mp->value)))
15900 && rtx_equal_p (fix->value, mp->value))
15901 {
15902 /* More than one fix references this entry. */
15903 mp->refcount++;
15904 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15905 }
15906
15907 /* Note the insertion point if necessary. */
15908 if (max_mp == NULL
15909 && mp->max_address > max_address)
15910 max_mp = mp;
15911
15912 /* If we are inserting an 8-bytes aligned quantity and
15913 we have not already found an insertion point, then
15914 make sure that all such 8-byte aligned quantities are
15915 placed at the start of the pool. */
15916 if (ARM_DOUBLEWORD_ALIGN
15917 && max_mp == NULL
15918 && fix->fix_size >= 8
15919 && mp->fix_size < 8)
15920 {
15921 max_mp = mp;
15922 max_address = mp->max_address;
15923 }
15924 }
15925
15926 /* The value is not currently in the minipool, so we need to create
15927 a new entry for it. If MAX_MP is NULL, the entry will be put on
15928 the end of the list since the placement is less constrained than
15929 any existing entry. Otherwise, we insert the new fix before
15930 MAX_MP and, if necessary, adjust the constraints on the other
15931 entries. */
15932 mp = XNEW (Mnode);
15933 mp->fix_size = fix->fix_size;
15934 mp->mode = fix->mode;
15935 mp->value = fix->value;
15936 mp->refcount = 1;
15937 /* Not yet required for a backwards ref. */
15938 mp->min_address = -65536;
15939
15940 if (max_mp == NULL)
15941 {
15942 mp->max_address = max_address;
15943 mp->next = NULL;
15944 mp->prev = minipool_vector_tail;
15945
15946 if (mp->prev == NULL)
15947 {
15948 minipool_vector_head = mp;
15949 minipool_vector_label = gen_label_rtx ();
15950 }
15951 else
15952 mp->prev->next = mp;
15953
15954 minipool_vector_tail = mp;
15955 }
15956 else
15957 {
15958 if (max_address > max_mp->max_address - mp->fix_size)
15959 mp->max_address = max_mp->max_address - mp->fix_size;
15960 else
15961 mp->max_address = max_address;
15962
15963 mp->next = max_mp;
15964 mp->prev = max_mp->prev;
15965 max_mp->prev = mp;
15966 if (mp->prev != NULL)
15967 mp->prev->next = mp;
15968 else
15969 minipool_vector_head = mp;
15970 }
15971
15972 /* Save the new entry. */
15973 max_mp = mp;
15974
15975 /* Scan over the preceding entries and adjust their addresses as
15976 required. */
15977 while (mp->prev != NULL
15978 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15979 {
15980 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15981 mp = mp->prev;
15982 }
15983
15984 return max_mp;
15985 }
15986
15987 static Mnode *
15988 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15989 HOST_WIDE_INT min_address)
15990 {
15991 HOST_WIDE_INT offset;
15992
15993 /* The code below assumes these are different. */
15994 gcc_assert (mp != min_mp);
15995
15996 if (min_mp == NULL)
15997 {
15998 if (min_address > mp->min_address)
15999 mp->min_address = min_address;
16000 }
16001 else
16002 {
16003 /* We will adjust this below if it is too loose. */
16004 mp->min_address = min_address;
16005
16006 /* Unlink MP from its current position. Since min_mp is non-null,
16007 mp->next must be non-null. */
16008 mp->next->prev = mp->prev;
16009 if (mp->prev != NULL)
16010 mp->prev->next = mp->next;
16011 else
16012 minipool_vector_head = mp->next;
16013
16014 /* Reinsert it after MIN_MP. */
16015 mp->prev = min_mp;
16016 mp->next = min_mp->next;
16017 min_mp->next = mp;
16018 if (mp->next != NULL)
16019 mp->next->prev = mp;
16020 else
16021 minipool_vector_tail = mp;
16022 }
16023
16024 min_mp = mp;
16025
16026 offset = 0;
16027 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16028 {
16029 mp->offset = offset;
16030 if (mp->refcount > 0)
16031 offset += mp->fix_size;
16032
16033 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16034 mp->next->min_address = mp->min_address + mp->fix_size;
16035 }
16036
16037 return min_mp;
16038 }
16039
16040 /* Add a constant to the minipool for a backward reference. Returns the
16041 node added or NULL if the constant will not fit in this pool.
16042
16043 Note that the code for insertion for a backwards reference can be
16044 somewhat confusing because the calculated offsets for each fix do
16045 not take into account the size of the pool (which is still under
16046 construction. */
16047 static Mnode *
16048 add_minipool_backward_ref (Mfix *fix)
16049 {
16050 /* If set, min_mp is the last pool_entry that has a lower constraint
16051 than the one we are trying to add. */
16052 Mnode *min_mp = NULL;
16053 /* This can be negative, since it is only a constraint. */
16054 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16055 Mnode *mp;
16056
16057 /* If we can't reach the current pool from this insn, or if we can't
16058 insert this entry at the end of the pool without pushing other
16059 fixes out of range, then we don't try. This ensures that we
16060 can't fail later on. */
16061 if (min_address >= minipool_barrier->address
16062 || (minipool_vector_tail->min_address + fix->fix_size
16063 >= minipool_barrier->address))
16064 return NULL;
16065
16066 /* Scan the pool to see if a constant with the same value has
16067 already been added. While we are doing this, also note the
16068 location where we must insert the constant if it doesn't already
16069 exist. */
16070 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16071 {
16072 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16073 && fix->mode == mp->mode
16074 && (!LABEL_P (fix->value)
16075 || (CODE_LABEL_NUMBER (fix->value)
16076 == CODE_LABEL_NUMBER (mp->value)))
16077 && rtx_equal_p (fix->value, mp->value)
16078 /* Check that there is enough slack to move this entry to the
16079 end of the table (this is conservative). */
16080 && (mp->max_address
16081 > (minipool_barrier->address
16082 + minipool_vector_tail->offset
16083 + minipool_vector_tail->fix_size)))
16084 {
16085 mp->refcount++;
16086 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16087 }
16088
16089 if (min_mp != NULL)
16090 mp->min_address += fix->fix_size;
16091 else
16092 {
16093 /* Note the insertion point if necessary. */
16094 if (mp->min_address < min_address)
16095 {
16096 /* For now, we do not allow the insertion of 8-byte alignment
16097 requiring nodes anywhere but at the start of the pool. */
16098 if (ARM_DOUBLEWORD_ALIGN
16099 && fix->fix_size >= 8 && mp->fix_size < 8)
16100 return NULL;
16101 else
16102 min_mp = mp;
16103 }
16104 else if (mp->max_address
16105 < minipool_barrier->address + mp->offset + fix->fix_size)
16106 {
16107 /* Inserting before this entry would push the fix beyond
16108 its maximum address (which can happen if we have
16109 re-located a forwards fix); force the new fix to come
16110 after it. */
16111 if (ARM_DOUBLEWORD_ALIGN
16112 && fix->fix_size >= 8 && mp->fix_size < 8)
16113 return NULL;
16114 else
16115 {
16116 min_mp = mp;
16117 min_address = mp->min_address + fix->fix_size;
16118 }
16119 }
16120 /* Do not insert a non-8-byte aligned quantity before 8-byte
16121 aligned quantities. */
16122 else if (ARM_DOUBLEWORD_ALIGN
16123 && fix->fix_size < 8
16124 && mp->fix_size >= 8)
16125 {
16126 min_mp = mp;
16127 min_address = mp->min_address + fix->fix_size;
16128 }
16129 }
16130 }
16131
16132 /* We need to create a new entry. */
16133 mp = XNEW (Mnode);
16134 mp->fix_size = fix->fix_size;
16135 mp->mode = fix->mode;
16136 mp->value = fix->value;
16137 mp->refcount = 1;
16138 mp->max_address = minipool_barrier->address + 65536;
16139
16140 mp->min_address = min_address;
16141
16142 if (min_mp == NULL)
16143 {
16144 mp->prev = NULL;
16145 mp->next = minipool_vector_head;
16146
16147 if (mp->next == NULL)
16148 {
16149 minipool_vector_tail = mp;
16150 minipool_vector_label = gen_label_rtx ();
16151 }
16152 else
16153 mp->next->prev = mp;
16154
16155 minipool_vector_head = mp;
16156 }
16157 else
16158 {
16159 mp->next = min_mp->next;
16160 mp->prev = min_mp;
16161 min_mp->next = mp;
16162
16163 if (mp->next != NULL)
16164 mp->next->prev = mp;
16165 else
16166 minipool_vector_tail = mp;
16167 }
16168
16169 /* Save the new entry. */
16170 min_mp = mp;
16171
16172 if (mp->prev)
16173 mp = mp->prev;
16174 else
16175 mp->offset = 0;
16176
16177 /* Scan over the following entries and adjust their offsets. */
16178 while (mp->next != NULL)
16179 {
16180 if (mp->next->min_address < mp->min_address + mp->fix_size)
16181 mp->next->min_address = mp->min_address + mp->fix_size;
16182
16183 if (mp->refcount)
16184 mp->next->offset = mp->offset + mp->fix_size;
16185 else
16186 mp->next->offset = mp->offset;
16187
16188 mp = mp->next;
16189 }
16190
16191 return min_mp;
16192 }
16193
16194 static void
16195 assign_minipool_offsets (Mfix *barrier)
16196 {
16197 HOST_WIDE_INT offset = 0;
16198 Mnode *mp;
16199
16200 minipool_barrier = barrier;
16201
16202 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16203 {
16204 mp->offset = offset;
16205
16206 if (mp->refcount > 0)
16207 offset += mp->fix_size;
16208 }
16209 }
16210
16211 /* Output the literal table */
16212 static void
16213 dump_minipool (rtx scan)
16214 {
16215 Mnode * mp;
16216 Mnode * nmp;
16217 int align64 = 0;
16218
16219 if (ARM_DOUBLEWORD_ALIGN)
16220 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16221 if (mp->refcount > 0 && mp->fix_size >= 8)
16222 {
16223 align64 = 1;
16224 break;
16225 }
16226
16227 if (dump_file)
16228 fprintf (dump_file,
16229 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16230 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16231
16232 scan = emit_label_after (gen_label_rtx (), scan);
16233 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16234 scan = emit_label_after (minipool_vector_label, scan);
16235
16236 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16237 {
16238 if (mp->refcount > 0)
16239 {
16240 if (dump_file)
16241 {
16242 fprintf (dump_file,
16243 ";; Offset %u, min %ld, max %ld ",
16244 (unsigned) mp->offset, (unsigned long) mp->min_address,
16245 (unsigned long) mp->max_address);
16246 arm_print_value (dump_file, mp->value);
16247 fputc ('\n', dump_file);
16248 }
16249
16250 switch (mp->fix_size)
16251 {
16252 #ifdef HAVE_consttable_1
16253 case 1:
16254 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16255 break;
16256
16257 #endif
16258 #ifdef HAVE_consttable_2
16259 case 2:
16260 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16261 break;
16262
16263 #endif
16264 #ifdef HAVE_consttable_4
16265 case 4:
16266 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16267 break;
16268
16269 #endif
16270 #ifdef HAVE_consttable_8
16271 case 8:
16272 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16273 break;
16274
16275 #endif
16276 #ifdef HAVE_consttable_16
16277 case 16:
16278 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16279 break;
16280
16281 #endif
16282 default:
16283 gcc_unreachable ();
16284 }
16285 }
16286
16287 nmp = mp->next;
16288 free (mp);
16289 }
16290
16291 minipool_vector_head = minipool_vector_tail = NULL;
16292 scan = emit_insn_after (gen_consttable_end (), scan);
16293 scan = emit_barrier_after (scan);
16294 }
16295
16296 /* Return the cost of forcibly inserting a barrier after INSN. */
16297 static int
16298 arm_barrier_cost (rtx insn)
16299 {
16300 /* Basing the location of the pool on the loop depth is preferable,
16301 but at the moment, the basic block information seems to be
16302 corrupt by this stage of the compilation. */
16303 int base_cost = 50;
16304 rtx next = next_nonnote_insn (insn);
16305
16306 if (next != NULL && LABEL_P (next))
16307 base_cost -= 20;
16308
16309 switch (GET_CODE (insn))
16310 {
16311 case CODE_LABEL:
16312 /* It will always be better to place the table before the label, rather
16313 than after it. */
16314 return 50;
16315
16316 case INSN:
16317 case CALL_INSN:
16318 return base_cost;
16319
16320 case JUMP_INSN:
16321 return base_cost - 10;
16322
16323 default:
16324 return base_cost + 10;
16325 }
16326 }
16327
16328 /* Find the best place in the insn stream in the range
16329 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16330 Create the barrier by inserting a jump and add a new fix entry for
16331 it. */
16332 static Mfix *
16333 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16334 {
16335 HOST_WIDE_INT count = 0;
16336 rtx barrier;
16337 rtx from = fix->insn;
16338 /* The instruction after which we will insert the jump. */
16339 rtx selected = NULL;
16340 int selected_cost;
16341 /* The address at which the jump instruction will be placed. */
16342 HOST_WIDE_INT selected_address;
16343 Mfix * new_fix;
16344 HOST_WIDE_INT max_count = max_address - fix->address;
16345 rtx label = gen_label_rtx ();
16346
16347 selected_cost = arm_barrier_cost (from);
16348 selected_address = fix->address;
16349
16350 while (from && count < max_count)
16351 {
16352 rtx tmp;
16353 int new_cost;
16354
16355 /* This code shouldn't have been called if there was a natural barrier
16356 within range. */
16357 gcc_assert (!BARRIER_P (from));
16358
16359 /* Count the length of this insn. This must stay in sync with the
16360 code that pushes minipool fixes. */
16361 if (LABEL_P (from))
16362 count += get_label_padding (from);
16363 else
16364 count += get_attr_length (from);
16365
16366 /* If there is a jump table, add its length. */
16367 if (tablejump_p (from, NULL, &tmp))
16368 {
16369 count += get_jump_table_size (tmp);
16370
16371 /* Jump tables aren't in a basic block, so base the cost on
16372 the dispatch insn. If we select this location, we will
16373 still put the pool after the table. */
16374 new_cost = arm_barrier_cost (from);
16375
16376 if (count < max_count
16377 && (!selected || new_cost <= selected_cost))
16378 {
16379 selected = tmp;
16380 selected_cost = new_cost;
16381 selected_address = fix->address + count;
16382 }
16383
16384 /* Continue after the dispatch table. */
16385 from = NEXT_INSN (tmp);
16386 continue;
16387 }
16388
16389 new_cost = arm_barrier_cost (from);
16390
16391 if (count < max_count
16392 && (!selected || new_cost <= selected_cost))
16393 {
16394 selected = from;
16395 selected_cost = new_cost;
16396 selected_address = fix->address + count;
16397 }
16398
16399 from = NEXT_INSN (from);
16400 }
16401
16402 /* Make sure that we found a place to insert the jump. */
16403 gcc_assert (selected);
16404
16405 /* Make sure we do not split a call and its corresponding
16406 CALL_ARG_LOCATION note. */
16407 if (CALL_P (selected))
16408 {
16409 rtx next = NEXT_INSN (selected);
16410 if (next && NOTE_P (next)
16411 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16412 selected = next;
16413 }
16414
16415 /* Create a new JUMP_INSN that branches around a barrier. */
16416 from = emit_jump_insn_after (gen_jump (label), selected);
16417 JUMP_LABEL (from) = label;
16418 barrier = emit_barrier_after (from);
16419 emit_label_after (label, barrier);
16420
16421 /* Create a minipool barrier entry for the new barrier. */
16422 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16423 new_fix->insn = barrier;
16424 new_fix->address = selected_address;
16425 new_fix->next = fix->next;
16426 fix->next = new_fix;
16427
16428 return new_fix;
16429 }
16430
16431 /* Record that there is a natural barrier in the insn stream at
16432 ADDRESS. */
16433 static void
16434 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
16435 {
16436 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16437
16438 fix->insn = insn;
16439 fix->address = address;
16440
16441 fix->next = NULL;
16442 if (minipool_fix_head != NULL)
16443 minipool_fix_tail->next = fix;
16444 else
16445 minipool_fix_head = fix;
16446
16447 minipool_fix_tail = fix;
16448 }
16449
16450 /* Record INSN, which will need fixing up to load a value from the
16451 minipool. ADDRESS is the offset of the insn since the start of the
16452 function; LOC is a pointer to the part of the insn which requires
16453 fixing; VALUE is the constant that must be loaded, which is of type
16454 MODE. */
16455 static void
16456 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
16457 enum machine_mode mode, rtx value)
16458 {
16459 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16460
16461 fix->insn = insn;
16462 fix->address = address;
16463 fix->loc = loc;
16464 fix->mode = mode;
16465 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16466 fix->value = value;
16467 fix->forwards = get_attr_pool_range (insn);
16468 fix->backwards = get_attr_neg_pool_range (insn);
16469 fix->minipool = NULL;
16470
16471 /* If an insn doesn't have a range defined for it, then it isn't
16472 expecting to be reworked by this code. Better to stop now than
16473 to generate duff assembly code. */
16474 gcc_assert (fix->forwards || fix->backwards);
16475
16476 /* If an entry requires 8-byte alignment then assume all constant pools
16477 require 4 bytes of padding. Trying to do this later on a per-pool
16478 basis is awkward because existing pool entries have to be modified. */
16479 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16480 minipool_pad = 4;
16481
16482 if (dump_file)
16483 {
16484 fprintf (dump_file,
16485 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16486 GET_MODE_NAME (mode),
16487 INSN_UID (insn), (unsigned long) address,
16488 -1 * (long)fix->backwards, (long)fix->forwards);
16489 arm_print_value (dump_file, fix->value);
16490 fprintf (dump_file, "\n");
16491 }
16492
16493 /* Add it to the chain of fixes. */
16494 fix->next = NULL;
16495
16496 if (minipool_fix_head != NULL)
16497 minipool_fix_tail->next = fix;
16498 else
16499 minipool_fix_head = fix;
16500
16501 minipool_fix_tail = fix;
16502 }
16503
16504 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16505 Returns the number of insns needed, or 99 if we always want to synthesize
16506 the value. */
16507 int
16508 arm_max_const_double_inline_cost ()
16509 {
16510 /* Let the value get synthesized to avoid the use of literal pools. */
16511 if (arm_disable_literal_pool)
16512 return 99;
16513
16514 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16515 }
16516
16517 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16518 Returns the number of insns needed, or 99 if we don't know how to
16519 do it. */
16520 int
16521 arm_const_double_inline_cost (rtx val)
16522 {
16523 rtx lowpart, highpart;
16524 enum machine_mode mode;
16525
16526 mode = GET_MODE (val);
16527
16528 if (mode == VOIDmode)
16529 mode = DImode;
16530
16531 gcc_assert (GET_MODE_SIZE (mode) == 8);
16532
16533 lowpart = gen_lowpart (SImode, val);
16534 highpart = gen_highpart_mode (SImode, mode, val);
16535
16536 gcc_assert (CONST_INT_P (lowpart));
16537 gcc_assert (CONST_INT_P (highpart));
16538
16539 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16540 NULL_RTX, NULL_RTX, 0, 0)
16541 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16542 NULL_RTX, NULL_RTX, 0, 0));
16543 }
16544
16545 /* Return true if it is worthwhile to split a 64-bit constant into two
16546 32-bit operations. This is the case if optimizing for size, or
16547 if we have load delay slots, or if one 32-bit part can be done with
16548 a single data operation. */
16549 bool
16550 arm_const_double_by_parts (rtx val)
16551 {
16552 enum machine_mode mode = GET_MODE (val);
16553 rtx part;
16554
16555 if (optimize_size || arm_ld_sched)
16556 return true;
16557
16558 if (mode == VOIDmode)
16559 mode = DImode;
16560
16561 part = gen_highpart_mode (SImode, mode, val);
16562
16563 gcc_assert (CONST_INT_P (part));
16564
16565 if (const_ok_for_arm (INTVAL (part))
16566 || const_ok_for_arm (~INTVAL (part)))
16567 return true;
16568
16569 part = gen_lowpart (SImode, val);
16570
16571 gcc_assert (CONST_INT_P (part));
16572
16573 if (const_ok_for_arm (INTVAL (part))
16574 || const_ok_for_arm (~INTVAL (part)))
16575 return true;
16576
16577 return false;
16578 }
16579
16580 /* Return true if it is possible to inline both the high and low parts
16581 of a 64-bit constant into 32-bit data processing instructions. */
16582 bool
16583 arm_const_double_by_immediates (rtx val)
16584 {
16585 enum machine_mode mode = GET_MODE (val);
16586 rtx part;
16587
16588 if (mode == VOIDmode)
16589 mode = DImode;
16590
16591 part = gen_highpart_mode (SImode, mode, val);
16592
16593 gcc_assert (CONST_INT_P (part));
16594
16595 if (!const_ok_for_arm (INTVAL (part)))
16596 return false;
16597
16598 part = gen_lowpart (SImode, val);
16599
16600 gcc_assert (CONST_INT_P (part));
16601
16602 if (!const_ok_for_arm (INTVAL (part)))
16603 return false;
16604
16605 return true;
16606 }
16607
16608 /* Scan INSN and note any of its operands that need fixing.
16609 If DO_PUSHES is false we do not actually push any of the fixups
16610 needed. */
16611 static void
16612 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
16613 {
16614 int opno;
16615
16616 extract_insn (insn);
16617
16618 if (!constrain_operands (1))
16619 fatal_insn_not_found (insn);
16620
16621 if (recog_data.n_alternatives == 0)
16622 return;
16623
16624 /* Fill in recog_op_alt with information about the constraints of
16625 this insn. */
16626 preprocess_constraints ();
16627
16628 for (opno = 0; opno < recog_data.n_operands; opno++)
16629 {
16630 /* Things we need to fix can only occur in inputs. */
16631 if (recog_data.operand_type[opno] != OP_IN)
16632 continue;
16633
16634 /* If this alternative is a memory reference, then any mention
16635 of constants in this alternative is really to fool reload
16636 into allowing us to accept one there. We need to fix them up
16637 now so that we output the right code. */
16638 if (recog_op_alt[opno][which_alternative].memory_ok)
16639 {
16640 rtx op = recog_data.operand[opno];
16641
16642 if (CONSTANT_P (op))
16643 {
16644 if (do_pushes)
16645 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16646 recog_data.operand_mode[opno], op);
16647 }
16648 else if (MEM_P (op)
16649 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16650 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16651 {
16652 if (do_pushes)
16653 {
16654 rtx cop = avoid_constant_pool_reference (op);
16655
16656 /* Casting the address of something to a mode narrower
16657 than a word can cause avoid_constant_pool_reference()
16658 to return the pool reference itself. That's no good to
16659 us here. Lets just hope that we can use the
16660 constant pool value directly. */
16661 if (op == cop)
16662 cop = get_pool_constant (XEXP (op, 0));
16663
16664 push_minipool_fix (insn, address,
16665 recog_data.operand_loc[opno],
16666 recog_data.operand_mode[opno], cop);
16667 }
16668
16669 }
16670 }
16671 }
16672
16673 return;
16674 }
16675
16676 /* Rewrite move insn into subtract of 0 if the condition codes will
16677 be useful in next conditional jump insn. */
16678
16679 static void
16680 thumb1_reorg (void)
16681 {
16682 basic_block bb;
16683
16684 FOR_EACH_BB_FN (bb, cfun)
16685 {
16686 rtx dest, src;
16687 rtx pat, op0, set = NULL;
16688 rtx prev, insn = BB_END (bb);
16689 bool insn_clobbered = false;
16690
16691 while (insn != BB_HEAD (bb) && DEBUG_INSN_P (insn))
16692 insn = PREV_INSN (insn);
16693
16694 /* Find the last cbranchsi4_insn in basic block BB. */
16695 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
16696 continue;
16697
16698 /* Get the register with which we are comparing. */
16699 pat = PATTERN (insn);
16700 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
16701
16702 /* Find the first flag setting insn before INSN in basic block BB. */
16703 gcc_assert (insn != BB_HEAD (bb));
16704 for (prev = PREV_INSN (insn);
16705 (!insn_clobbered
16706 && prev != BB_HEAD (bb)
16707 && (NOTE_P (prev)
16708 || DEBUG_INSN_P (prev)
16709 || ((set = single_set (prev)) != NULL
16710 && get_attr_conds (prev) == CONDS_NOCOND)));
16711 prev = PREV_INSN (prev))
16712 {
16713 if (reg_set_p (op0, prev))
16714 insn_clobbered = true;
16715 }
16716
16717 /* Skip if op0 is clobbered by insn other than prev. */
16718 if (insn_clobbered)
16719 continue;
16720
16721 if (!set)
16722 continue;
16723
16724 dest = SET_DEST (set);
16725 src = SET_SRC (set);
16726 if (!low_register_operand (dest, SImode)
16727 || !low_register_operand (src, SImode))
16728 continue;
16729
16730 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
16731 in INSN. Both src and dest of the move insn are checked. */
16732 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
16733 {
16734 dest = copy_rtx (dest);
16735 src = copy_rtx (src);
16736 src = gen_rtx_MINUS (SImode, src, const0_rtx);
16737 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
16738 INSN_CODE (prev) = -1;
16739 /* Set test register in INSN to dest. */
16740 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
16741 INSN_CODE (insn) = -1;
16742 }
16743 }
16744 }
16745
16746 /* Convert instructions to their cc-clobbering variant if possible, since
16747 that allows us to use smaller encodings. */
16748
16749 static void
16750 thumb2_reorg (void)
16751 {
16752 basic_block bb;
16753 regset_head live;
16754
16755 INIT_REG_SET (&live);
16756
16757 /* We are freeing block_for_insn in the toplev to keep compatibility
16758 with old MDEP_REORGS that are not CFG based. Recompute it now. */
16759 compute_bb_for_insn ();
16760 df_analyze ();
16761
16762 FOR_EACH_BB_FN (bb, cfun)
16763 {
16764 rtx insn;
16765
16766 COPY_REG_SET (&live, DF_LR_OUT (bb));
16767 df_simulate_initialize_backwards (bb, &live);
16768 FOR_BB_INSNS_REVERSE (bb, insn)
16769 {
16770 if (NONJUMP_INSN_P (insn)
16771 && !REGNO_REG_SET_P (&live, CC_REGNUM)
16772 && GET_CODE (PATTERN (insn)) == SET)
16773 {
16774 enum {SKIP, CONV, SWAP_CONV} action = SKIP;
16775 rtx pat = PATTERN (insn);
16776 rtx dst = XEXP (pat, 0);
16777 rtx src = XEXP (pat, 1);
16778 rtx op0 = NULL_RTX, op1 = NULL_RTX;
16779
16780 if (!OBJECT_P (src))
16781 op0 = XEXP (src, 0);
16782
16783 if (BINARY_P (src))
16784 op1 = XEXP (src, 1);
16785
16786 if (low_register_operand (dst, SImode))
16787 {
16788 switch (GET_CODE (src))
16789 {
16790 case PLUS:
16791 /* Adding two registers and storing the result
16792 in the first source is already a 16-bit
16793 operation. */
16794 if (rtx_equal_p (dst, op0)
16795 && register_operand (op1, SImode))
16796 break;
16797
16798 if (low_register_operand (op0, SImode))
16799 {
16800 /* ADDS <Rd>,<Rn>,<Rm> */
16801 if (low_register_operand (op1, SImode))
16802 action = CONV;
16803 /* ADDS <Rdn>,#<imm8> */
16804 /* SUBS <Rdn>,#<imm8> */
16805 else if (rtx_equal_p (dst, op0)
16806 && CONST_INT_P (op1)
16807 && IN_RANGE (INTVAL (op1), -255, 255))
16808 action = CONV;
16809 /* ADDS <Rd>,<Rn>,#<imm3> */
16810 /* SUBS <Rd>,<Rn>,#<imm3> */
16811 else if (CONST_INT_P (op1)
16812 && IN_RANGE (INTVAL (op1), -7, 7))
16813 action = CONV;
16814 }
16815 /* ADCS <Rd>, <Rn> */
16816 else if (GET_CODE (XEXP (src, 0)) == PLUS
16817 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
16818 && low_register_operand (XEXP (XEXP (src, 0), 1),
16819 SImode)
16820 && COMPARISON_P (op1)
16821 && cc_register (XEXP (op1, 0), VOIDmode)
16822 && maybe_get_arm_condition_code (op1) == ARM_CS
16823 && XEXP (op1, 1) == const0_rtx)
16824 action = CONV;
16825 break;
16826
16827 case MINUS:
16828 /* RSBS <Rd>,<Rn>,#0
16829 Not handled here: see NEG below. */
16830 /* SUBS <Rd>,<Rn>,#<imm3>
16831 SUBS <Rdn>,#<imm8>
16832 Not handled here: see PLUS above. */
16833 /* SUBS <Rd>,<Rn>,<Rm> */
16834 if (low_register_operand (op0, SImode)
16835 && low_register_operand (op1, SImode))
16836 action = CONV;
16837 break;
16838
16839 case MULT:
16840 /* MULS <Rdm>,<Rn>,<Rdm>
16841 As an exception to the rule, this is only used
16842 when optimizing for size since MULS is slow on all
16843 known implementations. We do not even want to use
16844 MULS in cold code, if optimizing for speed, so we
16845 test the global flag here. */
16846 if (!optimize_size)
16847 break;
16848 /* else fall through. */
16849 case AND:
16850 case IOR:
16851 case XOR:
16852 /* ANDS <Rdn>,<Rm> */
16853 if (rtx_equal_p (dst, op0)
16854 && low_register_operand (op1, SImode))
16855 action = CONV;
16856 else if (rtx_equal_p (dst, op1)
16857 && low_register_operand (op0, SImode))
16858 action = SWAP_CONV;
16859 break;
16860
16861 case ASHIFTRT:
16862 case ASHIFT:
16863 case LSHIFTRT:
16864 /* ASRS <Rdn>,<Rm> */
16865 /* LSRS <Rdn>,<Rm> */
16866 /* LSLS <Rdn>,<Rm> */
16867 if (rtx_equal_p (dst, op0)
16868 && low_register_operand (op1, SImode))
16869 action = CONV;
16870 /* ASRS <Rd>,<Rm>,#<imm5> */
16871 /* LSRS <Rd>,<Rm>,#<imm5> */
16872 /* LSLS <Rd>,<Rm>,#<imm5> */
16873 else if (low_register_operand (op0, SImode)
16874 && CONST_INT_P (op1)
16875 && IN_RANGE (INTVAL (op1), 0, 31))
16876 action = CONV;
16877 break;
16878
16879 case ROTATERT:
16880 /* RORS <Rdn>,<Rm> */
16881 if (rtx_equal_p (dst, op0)
16882 && low_register_operand (op1, SImode))
16883 action = CONV;
16884 break;
16885
16886 case NOT:
16887 case NEG:
16888 /* MVNS <Rd>,<Rm> */
16889 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
16890 if (low_register_operand (op0, SImode))
16891 action = CONV;
16892 break;
16893
16894 case CONST_INT:
16895 /* MOVS <Rd>,#<imm8> */
16896 if (CONST_INT_P (src)
16897 && IN_RANGE (INTVAL (src), 0, 255))
16898 action = CONV;
16899 break;
16900
16901 case REG:
16902 /* MOVS and MOV<c> with registers have different
16903 encodings, so are not relevant here. */
16904 break;
16905
16906 default:
16907 break;
16908 }
16909 }
16910
16911 if (action != SKIP)
16912 {
16913 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
16914 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
16915 rtvec vec;
16916
16917 if (action == SWAP_CONV)
16918 {
16919 src = copy_rtx (src);
16920 XEXP (src, 0) = op1;
16921 XEXP (src, 1) = op0;
16922 pat = gen_rtx_SET (VOIDmode, dst, src);
16923 vec = gen_rtvec (2, pat, clobber);
16924 }
16925 else /* action == CONV */
16926 vec = gen_rtvec (2, pat, clobber);
16927
16928 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
16929 INSN_CODE (insn) = -1;
16930 }
16931 }
16932
16933 if (NONDEBUG_INSN_P (insn))
16934 df_simulate_one_insn_backwards (bb, insn, &live);
16935 }
16936 }
16937
16938 CLEAR_REG_SET (&live);
16939 }
16940
16941 /* Gcc puts the pool in the wrong place for ARM, since we can only
16942 load addresses a limited distance around the pc. We do some
16943 special munging to move the constant pool values to the correct
16944 point in the code. */
16945 static void
16946 arm_reorg (void)
16947 {
16948 rtx insn;
16949 HOST_WIDE_INT address = 0;
16950 Mfix * fix;
16951
16952 if (TARGET_THUMB1)
16953 thumb1_reorg ();
16954 else if (TARGET_THUMB2)
16955 thumb2_reorg ();
16956
16957 /* Ensure all insns that must be split have been split at this point.
16958 Otherwise, the pool placement code below may compute incorrect
16959 insn lengths. Note that when optimizing, all insns have already
16960 been split at this point. */
16961 if (!optimize)
16962 split_all_insns_noflow ();
16963
16964 minipool_fix_head = minipool_fix_tail = NULL;
16965
16966 /* The first insn must always be a note, or the code below won't
16967 scan it properly. */
16968 insn = get_insns ();
16969 gcc_assert (NOTE_P (insn));
16970 minipool_pad = 0;
16971
16972 /* Scan all the insns and record the operands that will need fixing. */
16973 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
16974 {
16975 if (BARRIER_P (insn))
16976 push_minipool_barrier (insn, address);
16977 else if (INSN_P (insn))
16978 {
16979 rtx table;
16980
16981 note_invalid_constants (insn, address, true);
16982 address += get_attr_length (insn);
16983
16984 /* If the insn is a vector jump, add the size of the table
16985 and skip the table. */
16986 if (tablejump_p (insn, NULL, &table))
16987 {
16988 address += get_jump_table_size (table);
16989 insn = table;
16990 }
16991 }
16992 else if (LABEL_P (insn))
16993 /* Add the worst-case padding due to alignment. We don't add
16994 the _current_ padding because the minipool insertions
16995 themselves might change it. */
16996 address += get_label_padding (insn);
16997 }
16998
16999 fix = minipool_fix_head;
17000
17001 /* Now scan the fixups and perform the required changes. */
17002 while (fix)
17003 {
17004 Mfix * ftmp;
17005 Mfix * fdel;
17006 Mfix * last_added_fix;
17007 Mfix * last_barrier = NULL;
17008 Mfix * this_fix;
17009
17010 /* Skip any further barriers before the next fix. */
17011 while (fix && BARRIER_P (fix->insn))
17012 fix = fix->next;
17013
17014 /* No more fixes. */
17015 if (fix == NULL)
17016 break;
17017
17018 last_added_fix = NULL;
17019
17020 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17021 {
17022 if (BARRIER_P (ftmp->insn))
17023 {
17024 if (ftmp->address >= minipool_vector_head->max_address)
17025 break;
17026
17027 last_barrier = ftmp;
17028 }
17029 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17030 break;
17031
17032 last_added_fix = ftmp; /* Keep track of the last fix added. */
17033 }
17034
17035 /* If we found a barrier, drop back to that; any fixes that we
17036 could have reached but come after the barrier will now go in
17037 the next mini-pool. */
17038 if (last_barrier != NULL)
17039 {
17040 /* Reduce the refcount for those fixes that won't go into this
17041 pool after all. */
17042 for (fdel = last_barrier->next;
17043 fdel && fdel != ftmp;
17044 fdel = fdel->next)
17045 {
17046 fdel->minipool->refcount--;
17047 fdel->minipool = NULL;
17048 }
17049
17050 ftmp = last_barrier;
17051 }
17052 else
17053 {
17054 /* ftmp is first fix that we can't fit into this pool and
17055 there no natural barriers that we could use. Insert a
17056 new barrier in the code somewhere between the previous
17057 fix and this one, and arrange to jump around it. */
17058 HOST_WIDE_INT max_address;
17059
17060 /* The last item on the list of fixes must be a barrier, so
17061 we can never run off the end of the list of fixes without
17062 last_barrier being set. */
17063 gcc_assert (ftmp);
17064
17065 max_address = minipool_vector_head->max_address;
17066 /* Check that there isn't another fix that is in range that
17067 we couldn't fit into this pool because the pool was
17068 already too large: we need to put the pool before such an
17069 instruction. The pool itself may come just after the
17070 fix because create_fix_barrier also allows space for a
17071 jump instruction. */
17072 if (ftmp->address < max_address)
17073 max_address = ftmp->address + 1;
17074
17075 last_barrier = create_fix_barrier (last_added_fix, max_address);
17076 }
17077
17078 assign_minipool_offsets (last_barrier);
17079
17080 while (ftmp)
17081 {
17082 if (!BARRIER_P (ftmp->insn)
17083 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17084 == NULL))
17085 break;
17086
17087 ftmp = ftmp->next;
17088 }
17089
17090 /* Scan over the fixes we have identified for this pool, fixing them
17091 up and adding the constants to the pool itself. */
17092 for (this_fix = fix; this_fix && ftmp != this_fix;
17093 this_fix = this_fix->next)
17094 if (!BARRIER_P (this_fix->insn))
17095 {
17096 rtx addr
17097 = plus_constant (Pmode,
17098 gen_rtx_LABEL_REF (VOIDmode,
17099 minipool_vector_label),
17100 this_fix->minipool->offset);
17101 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17102 }
17103
17104 dump_minipool (last_barrier->insn);
17105 fix = ftmp;
17106 }
17107
17108 /* From now on we must synthesize any constants that we can't handle
17109 directly. This can happen if the RTL gets split during final
17110 instruction generation. */
17111 after_arm_reorg = 1;
17112
17113 /* Free the minipool memory. */
17114 obstack_free (&minipool_obstack, minipool_startobj);
17115 }
17116 \f
17117 /* Routines to output assembly language. */
17118
17119 /* If the rtx is the correct value then return the string of the number.
17120 In this way we can ensure that valid double constants are generated even
17121 when cross compiling. */
17122 const char *
17123 fp_immediate_constant (rtx x)
17124 {
17125 REAL_VALUE_TYPE r;
17126
17127 if (!fp_consts_inited)
17128 init_fp_table ();
17129
17130 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17131
17132 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
17133 return "0";
17134 }
17135
17136 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
17137 static const char *
17138 fp_const_from_val (REAL_VALUE_TYPE *r)
17139 {
17140 if (!fp_consts_inited)
17141 init_fp_table ();
17142
17143 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17144 return "0";
17145 }
17146
17147 /* OPERANDS[0] is the entire list of insns that constitute pop,
17148 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17149 is in the list, UPDATE is true iff the list contains explicit
17150 update of base register. */
17151 void
17152 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17153 bool update)
17154 {
17155 int i;
17156 char pattern[100];
17157 int offset;
17158 const char *conditional;
17159 int num_saves = XVECLEN (operands[0], 0);
17160 unsigned int regno;
17161 unsigned int regno_base = REGNO (operands[1]);
17162
17163 offset = 0;
17164 offset += update ? 1 : 0;
17165 offset += return_pc ? 1 : 0;
17166
17167 /* Is the base register in the list? */
17168 for (i = offset; i < num_saves; i++)
17169 {
17170 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17171 /* If SP is in the list, then the base register must be SP. */
17172 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17173 /* If base register is in the list, there must be no explicit update. */
17174 if (regno == regno_base)
17175 gcc_assert (!update);
17176 }
17177
17178 conditional = reverse ? "%?%D0" : "%?%d0";
17179 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17180 {
17181 /* Output pop (not stmfd) because it has a shorter encoding. */
17182 gcc_assert (update);
17183 sprintf (pattern, "pop%s\t{", conditional);
17184 }
17185 else
17186 {
17187 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17188 It's just a convention, their semantics are identical. */
17189 if (regno_base == SP_REGNUM)
17190 sprintf (pattern, "ldm%sfd\t", conditional);
17191 else if (TARGET_UNIFIED_ASM)
17192 sprintf (pattern, "ldmia%s\t", conditional);
17193 else
17194 sprintf (pattern, "ldm%sia\t", conditional);
17195
17196 strcat (pattern, reg_names[regno_base]);
17197 if (update)
17198 strcat (pattern, "!, {");
17199 else
17200 strcat (pattern, ", {");
17201 }
17202
17203 /* Output the first destination register. */
17204 strcat (pattern,
17205 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17206
17207 /* Output the rest of the destination registers. */
17208 for (i = offset + 1; i < num_saves; i++)
17209 {
17210 strcat (pattern, ", ");
17211 strcat (pattern,
17212 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17213 }
17214
17215 strcat (pattern, "}");
17216
17217 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17218 strcat (pattern, "^");
17219
17220 output_asm_insn (pattern, &cond);
17221 }
17222
17223
17224 /* Output the assembly for a store multiple. */
17225
17226 const char *
17227 vfp_output_fstmd (rtx * operands)
17228 {
17229 char pattern[100];
17230 int p;
17231 int base;
17232 int i;
17233
17234 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
17235 p = strlen (pattern);
17236
17237 gcc_assert (REG_P (operands[1]));
17238
17239 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17240 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17241 {
17242 p += sprintf (&pattern[p], ", d%d", base + i);
17243 }
17244 strcpy (&pattern[p], "}");
17245
17246 output_asm_insn (pattern, operands);
17247 return "";
17248 }
17249
17250
17251 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17252 number of bytes pushed. */
17253
17254 static int
17255 vfp_emit_fstmd (int base_reg, int count)
17256 {
17257 rtx par;
17258 rtx dwarf;
17259 rtx tmp, reg;
17260 int i;
17261
17262 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17263 register pairs are stored by a store multiple insn. We avoid this
17264 by pushing an extra pair. */
17265 if (count == 2 && !arm_arch6)
17266 {
17267 if (base_reg == LAST_VFP_REGNUM - 3)
17268 base_reg -= 2;
17269 count++;
17270 }
17271
17272 /* FSTMD may not store more than 16 doubleword registers at once. Split
17273 larger stores into multiple parts (up to a maximum of two, in
17274 practice). */
17275 if (count > 16)
17276 {
17277 int saved;
17278 /* NOTE: base_reg is an internal register number, so each D register
17279 counts as 2. */
17280 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17281 saved += vfp_emit_fstmd (base_reg, 16);
17282 return saved;
17283 }
17284
17285 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17286 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17287
17288 reg = gen_rtx_REG (DFmode, base_reg);
17289 base_reg += 2;
17290
17291 XVECEXP (par, 0, 0)
17292 = gen_rtx_SET (VOIDmode,
17293 gen_frame_mem
17294 (BLKmode,
17295 gen_rtx_PRE_MODIFY (Pmode,
17296 stack_pointer_rtx,
17297 plus_constant
17298 (Pmode, stack_pointer_rtx,
17299 - (count * 8)))
17300 ),
17301 gen_rtx_UNSPEC (BLKmode,
17302 gen_rtvec (1, reg),
17303 UNSPEC_PUSH_MULT));
17304
17305 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17306 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17307 RTX_FRAME_RELATED_P (tmp) = 1;
17308 XVECEXP (dwarf, 0, 0) = tmp;
17309
17310 tmp = gen_rtx_SET (VOIDmode,
17311 gen_frame_mem (DFmode, stack_pointer_rtx),
17312 reg);
17313 RTX_FRAME_RELATED_P (tmp) = 1;
17314 XVECEXP (dwarf, 0, 1) = tmp;
17315
17316 for (i = 1; i < count; i++)
17317 {
17318 reg = gen_rtx_REG (DFmode, base_reg);
17319 base_reg += 2;
17320 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17321
17322 tmp = gen_rtx_SET (VOIDmode,
17323 gen_frame_mem (DFmode,
17324 plus_constant (Pmode,
17325 stack_pointer_rtx,
17326 i * 8)),
17327 reg);
17328 RTX_FRAME_RELATED_P (tmp) = 1;
17329 XVECEXP (dwarf, 0, i + 1) = tmp;
17330 }
17331
17332 par = emit_insn (par);
17333 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17334 RTX_FRAME_RELATED_P (par) = 1;
17335
17336 return count * 8;
17337 }
17338
17339 /* Emit a call instruction with pattern PAT. ADDR is the address of
17340 the call target. */
17341
17342 void
17343 arm_emit_call_insn (rtx pat, rtx addr)
17344 {
17345 rtx insn;
17346
17347 insn = emit_call_insn (pat);
17348
17349 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17350 If the call might use such an entry, add a use of the PIC register
17351 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17352 if (TARGET_VXWORKS_RTP
17353 && flag_pic
17354 && GET_CODE (addr) == SYMBOL_REF
17355 && (SYMBOL_REF_DECL (addr)
17356 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17357 : !SYMBOL_REF_LOCAL_P (addr)))
17358 {
17359 require_pic_register ();
17360 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17361 }
17362 }
17363
17364 /* Output a 'call' insn. */
17365 const char *
17366 output_call (rtx *operands)
17367 {
17368 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17369
17370 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17371 if (REGNO (operands[0]) == LR_REGNUM)
17372 {
17373 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17374 output_asm_insn ("mov%?\t%0, %|lr", operands);
17375 }
17376
17377 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17378
17379 if (TARGET_INTERWORK || arm_arch4t)
17380 output_asm_insn ("bx%?\t%0", operands);
17381 else
17382 output_asm_insn ("mov%?\t%|pc, %0", operands);
17383
17384 return "";
17385 }
17386
17387 /* Output a 'call' insn that is a reference in memory. This is
17388 disabled for ARMv5 and we prefer a blx instead because otherwise
17389 there's a significant performance overhead. */
17390 const char *
17391 output_call_mem (rtx *operands)
17392 {
17393 gcc_assert (!arm_arch5);
17394 if (TARGET_INTERWORK)
17395 {
17396 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17397 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17398 output_asm_insn ("bx%?\t%|ip", operands);
17399 }
17400 else if (regno_use_in (LR_REGNUM, operands[0]))
17401 {
17402 /* LR is used in the memory address. We load the address in the
17403 first instruction. It's safe to use IP as the target of the
17404 load since the call will kill it anyway. */
17405 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17406 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17407 if (arm_arch4t)
17408 output_asm_insn ("bx%?\t%|ip", operands);
17409 else
17410 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17411 }
17412 else
17413 {
17414 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17415 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17416 }
17417
17418 return "";
17419 }
17420
17421
17422 /* Output a move from arm registers to arm registers of a long double
17423 OPERANDS[0] is the destination.
17424 OPERANDS[1] is the source. */
17425 const char *
17426 output_mov_long_double_arm_from_arm (rtx *operands)
17427 {
17428 /* We have to be careful here because the two might overlap. */
17429 int dest_start = REGNO (operands[0]);
17430 int src_start = REGNO (operands[1]);
17431 rtx ops[2];
17432 int i;
17433
17434 if (dest_start < src_start)
17435 {
17436 for (i = 0; i < 3; i++)
17437 {
17438 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17439 ops[1] = gen_rtx_REG (SImode, src_start + i);
17440 output_asm_insn ("mov%?\t%0, %1", ops);
17441 }
17442 }
17443 else
17444 {
17445 for (i = 2; i >= 0; i--)
17446 {
17447 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17448 ops[1] = gen_rtx_REG (SImode, src_start + i);
17449 output_asm_insn ("mov%?\t%0, %1", ops);
17450 }
17451 }
17452
17453 return "";
17454 }
17455
17456 void
17457 arm_emit_movpair (rtx dest, rtx src)
17458 {
17459 /* If the src is an immediate, simplify it. */
17460 if (CONST_INT_P (src))
17461 {
17462 HOST_WIDE_INT val = INTVAL (src);
17463 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17464 if ((val >> 16) & 0x0000ffff)
17465 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17466 GEN_INT (16)),
17467 GEN_INT ((val >> 16) & 0x0000ffff));
17468 return;
17469 }
17470 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17471 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17472 }
17473
17474 /* Output a move between double words. It must be REG<-MEM
17475 or MEM<-REG. */
17476 const char *
17477 output_move_double (rtx *operands, bool emit, int *count)
17478 {
17479 enum rtx_code code0 = GET_CODE (operands[0]);
17480 enum rtx_code code1 = GET_CODE (operands[1]);
17481 rtx otherops[3];
17482 if (count)
17483 *count = 1;
17484
17485 /* The only case when this might happen is when
17486 you are looking at the length of a DImode instruction
17487 that has an invalid constant in it. */
17488 if (code0 == REG && code1 != MEM)
17489 {
17490 gcc_assert (!emit);
17491 *count = 2;
17492 return "";
17493 }
17494
17495 if (code0 == REG)
17496 {
17497 unsigned int reg0 = REGNO (operands[0]);
17498
17499 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17500
17501 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17502
17503 switch (GET_CODE (XEXP (operands[1], 0)))
17504 {
17505 case REG:
17506
17507 if (emit)
17508 {
17509 if (TARGET_LDRD
17510 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17511 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17512 else
17513 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17514 }
17515 break;
17516
17517 case PRE_INC:
17518 gcc_assert (TARGET_LDRD);
17519 if (emit)
17520 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17521 break;
17522
17523 case PRE_DEC:
17524 if (emit)
17525 {
17526 if (TARGET_LDRD)
17527 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17528 else
17529 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17530 }
17531 break;
17532
17533 case POST_INC:
17534 if (emit)
17535 {
17536 if (TARGET_LDRD)
17537 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17538 else
17539 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17540 }
17541 break;
17542
17543 case POST_DEC:
17544 gcc_assert (TARGET_LDRD);
17545 if (emit)
17546 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17547 break;
17548
17549 case PRE_MODIFY:
17550 case POST_MODIFY:
17551 /* Autoicrement addressing modes should never have overlapping
17552 base and destination registers, and overlapping index registers
17553 are already prohibited, so this doesn't need to worry about
17554 fix_cm3_ldrd. */
17555 otherops[0] = operands[0];
17556 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17557 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17558
17559 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17560 {
17561 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17562 {
17563 /* Registers overlap so split out the increment. */
17564 if (emit)
17565 {
17566 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17567 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17568 }
17569 if (count)
17570 *count = 2;
17571 }
17572 else
17573 {
17574 /* Use a single insn if we can.
17575 FIXME: IWMMXT allows offsets larger than ldrd can
17576 handle, fix these up with a pair of ldr. */
17577 if (TARGET_THUMB2
17578 || !CONST_INT_P (otherops[2])
17579 || (INTVAL (otherops[2]) > -256
17580 && INTVAL (otherops[2]) < 256))
17581 {
17582 if (emit)
17583 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
17584 }
17585 else
17586 {
17587 if (emit)
17588 {
17589 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17590 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17591 }
17592 if (count)
17593 *count = 2;
17594
17595 }
17596 }
17597 }
17598 else
17599 {
17600 /* Use a single insn if we can.
17601 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17602 fix these up with a pair of ldr. */
17603 if (TARGET_THUMB2
17604 || !CONST_INT_P (otherops[2])
17605 || (INTVAL (otherops[2]) > -256
17606 && INTVAL (otherops[2]) < 256))
17607 {
17608 if (emit)
17609 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
17610 }
17611 else
17612 {
17613 if (emit)
17614 {
17615 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17616 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
17617 }
17618 if (count)
17619 *count = 2;
17620 }
17621 }
17622 break;
17623
17624 case LABEL_REF:
17625 case CONST:
17626 /* We might be able to use ldrd %0, %1 here. However the range is
17627 different to ldr/adr, and it is broken on some ARMv7-M
17628 implementations. */
17629 /* Use the second register of the pair to avoid problematic
17630 overlap. */
17631 otherops[1] = operands[1];
17632 if (emit)
17633 output_asm_insn ("adr%?\t%0, %1", otherops);
17634 operands[1] = otherops[0];
17635 if (emit)
17636 {
17637 if (TARGET_LDRD)
17638 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17639 else
17640 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
17641 }
17642
17643 if (count)
17644 *count = 2;
17645 break;
17646
17647 /* ??? This needs checking for thumb2. */
17648 default:
17649 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
17650 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
17651 {
17652 otherops[0] = operands[0];
17653 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
17654 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
17655
17656 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
17657 {
17658 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17659 {
17660 switch ((int) INTVAL (otherops[2]))
17661 {
17662 case -8:
17663 if (emit)
17664 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
17665 return "";
17666 case -4:
17667 if (TARGET_THUMB2)
17668 break;
17669 if (emit)
17670 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
17671 return "";
17672 case 4:
17673 if (TARGET_THUMB2)
17674 break;
17675 if (emit)
17676 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
17677 return "";
17678 }
17679 }
17680 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
17681 operands[1] = otherops[0];
17682 if (TARGET_LDRD
17683 && (REG_P (otherops[2])
17684 || TARGET_THUMB2
17685 || (CONST_INT_P (otherops[2])
17686 && INTVAL (otherops[2]) > -256
17687 && INTVAL (otherops[2]) < 256)))
17688 {
17689 if (reg_overlap_mentioned_p (operands[0],
17690 otherops[2]))
17691 {
17692 rtx tmp;
17693 /* Swap base and index registers over to
17694 avoid a conflict. */
17695 tmp = otherops[1];
17696 otherops[1] = otherops[2];
17697 otherops[2] = tmp;
17698 }
17699 /* If both registers conflict, it will usually
17700 have been fixed by a splitter. */
17701 if (reg_overlap_mentioned_p (operands[0], otherops[2])
17702 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
17703 {
17704 if (emit)
17705 {
17706 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17707 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17708 }
17709 if (count)
17710 *count = 2;
17711 }
17712 else
17713 {
17714 otherops[0] = operands[0];
17715 if (emit)
17716 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
17717 }
17718 return "";
17719 }
17720
17721 if (CONST_INT_P (otherops[2]))
17722 {
17723 if (emit)
17724 {
17725 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
17726 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
17727 else
17728 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17729 }
17730 }
17731 else
17732 {
17733 if (emit)
17734 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17735 }
17736 }
17737 else
17738 {
17739 if (emit)
17740 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
17741 }
17742
17743 if (count)
17744 *count = 2;
17745
17746 if (TARGET_LDRD)
17747 return "ldr%(d%)\t%0, [%1]";
17748
17749 return "ldm%(ia%)\t%1, %M0";
17750 }
17751 else
17752 {
17753 otherops[1] = adjust_address (operands[1], SImode, 4);
17754 /* Take care of overlapping base/data reg. */
17755 if (reg_mentioned_p (operands[0], operands[1]))
17756 {
17757 if (emit)
17758 {
17759 output_asm_insn ("ldr%?\t%0, %1", otherops);
17760 output_asm_insn ("ldr%?\t%0, %1", operands);
17761 }
17762 if (count)
17763 *count = 2;
17764
17765 }
17766 else
17767 {
17768 if (emit)
17769 {
17770 output_asm_insn ("ldr%?\t%0, %1", operands);
17771 output_asm_insn ("ldr%?\t%0, %1", otherops);
17772 }
17773 if (count)
17774 *count = 2;
17775 }
17776 }
17777 }
17778 }
17779 else
17780 {
17781 /* Constraints should ensure this. */
17782 gcc_assert (code0 == MEM && code1 == REG);
17783 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
17784 || (TARGET_ARM && TARGET_LDRD));
17785
17786 switch (GET_CODE (XEXP (operands[0], 0)))
17787 {
17788 case REG:
17789 if (emit)
17790 {
17791 if (TARGET_LDRD)
17792 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
17793 else
17794 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
17795 }
17796 break;
17797
17798 case PRE_INC:
17799 gcc_assert (TARGET_LDRD);
17800 if (emit)
17801 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
17802 break;
17803
17804 case PRE_DEC:
17805 if (emit)
17806 {
17807 if (TARGET_LDRD)
17808 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
17809 else
17810 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
17811 }
17812 break;
17813
17814 case POST_INC:
17815 if (emit)
17816 {
17817 if (TARGET_LDRD)
17818 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
17819 else
17820 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
17821 }
17822 break;
17823
17824 case POST_DEC:
17825 gcc_assert (TARGET_LDRD);
17826 if (emit)
17827 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
17828 break;
17829
17830 case PRE_MODIFY:
17831 case POST_MODIFY:
17832 otherops[0] = operands[1];
17833 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
17834 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
17835
17836 /* IWMMXT allows offsets larger than ldrd can handle,
17837 fix these up with a pair of ldr. */
17838 if (!TARGET_THUMB2
17839 && CONST_INT_P (otherops[2])
17840 && (INTVAL(otherops[2]) <= -256
17841 || INTVAL(otherops[2]) >= 256))
17842 {
17843 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
17844 {
17845 if (emit)
17846 {
17847 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
17848 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
17849 }
17850 if (count)
17851 *count = 2;
17852 }
17853 else
17854 {
17855 if (emit)
17856 {
17857 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
17858 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
17859 }
17860 if (count)
17861 *count = 2;
17862 }
17863 }
17864 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
17865 {
17866 if (emit)
17867 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
17868 }
17869 else
17870 {
17871 if (emit)
17872 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
17873 }
17874 break;
17875
17876 case PLUS:
17877 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
17878 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17879 {
17880 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
17881 {
17882 case -8:
17883 if (emit)
17884 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
17885 return "";
17886
17887 case -4:
17888 if (TARGET_THUMB2)
17889 break;
17890 if (emit)
17891 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
17892 return "";
17893
17894 case 4:
17895 if (TARGET_THUMB2)
17896 break;
17897 if (emit)
17898 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
17899 return "";
17900 }
17901 }
17902 if (TARGET_LDRD
17903 && (REG_P (otherops[2])
17904 || TARGET_THUMB2
17905 || (CONST_INT_P (otherops[2])
17906 && INTVAL (otherops[2]) > -256
17907 && INTVAL (otherops[2]) < 256)))
17908 {
17909 otherops[0] = operands[1];
17910 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
17911 if (emit)
17912 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
17913 return "";
17914 }
17915 /* Fall through */
17916
17917 default:
17918 otherops[0] = adjust_address (operands[0], SImode, 4);
17919 otherops[1] = operands[1];
17920 if (emit)
17921 {
17922 output_asm_insn ("str%?\t%1, %0", operands);
17923 output_asm_insn ("str%?\t%H1, %0", otherops);
17924 }
17925 if (count)
17926 *count = 2;
17927 }
17928 }
17929
17930 return "";
17931 }
17932
17933 /* Output a move, load or store for quad-word vectors in ARM registers. Only
17934 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
17935
17936 const char *
17937 output_move_quad (rtx *operands)
17938 {
17939 if (REG_P (operands[0]))
17940 {
17941 /* Load, or reg->reg move. */
17942
17943 if (MEM_P (operands[1]))
17944 {
17945 switch (GET_CODE (XEXP (operands[1], 0)))
17946 {
17947 case REG:
17948 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17949 break;
17950
17951 case LABEL_REF:
17952 case CONST:
17953 output_asm_insn ("adr%?\t%0, %1", operands);
17954 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
17955 break;
17956
17957 default:
17958 gcc_unreachable ();
17959 }
17960 }
17961 else
17962 {
17963 rtx ops[2];
17964 int dest, src, i;
17965
17966 gcc_assert (REG_P (operands[1]));
17967
17968 dest = REGNO (operands[0]);
17969 src = REGNO (operands[1]);
17970
17971 /* This seems pretty dumb, but hopefully GCC won't try to do it
17972 very often. */
17973 if (dest < src)
17974 for (i = 0; i < 4; i++)
17975 {
17976 ops[0] = gen_rtx_REG (SImode, dest + i);
17977 ops[1] = gen_rtx_REG (SImode, src + i);
17978 output_asm_insn ("mov%?\t%0, %1", ops);
17979 }
17980 else
17981 for (i = 3; i >= 0; i--)
17982 {
17983 ops[0] = gen_rtx_REG (SImode, dest + i);
17984 ops[1] = gen_rtx_REG (SImode, src + i);
17985 output_asm_insn ("mov%?\t%0, %1", ops);
17986 }
17987 }
17988 }
17989 else
17990 {
17991 gcc_assert (MEM_P (operands[0]));
17992 gcc_assert (REG_P (operands[1]));
17993 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
17994
17995 switch (GET_CODE (XEXP (operands[0], 0)))
17996 {
17997 case REG:
17998 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
17999 break;
18000
18001 default:
18002 gcc_unreachable ();
18003 }
18004 }
18005
18006 return "";
18007 }
18008
18009 /* Output a VFP load or store instruction. */
18010
18011 const char *
18012 output_move_vfp (rtx *operands)
18013 {
18014 rtx reg, mem, addr, ops[2];
18015 int load = REG_P (operands[0]);
18016 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18017 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18018 const char *templ;
18019 char buff[50];
18020 enum machine_mode mode;
18021
18022 reg = operands[!load];
18023 mem = operands[load];
18024
18025 mode = GET_MODE (reg);
18026
18027 gcc_assert (REG_P (reg));
18028 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18029 gcc_assert (mode == SFmode
18030 || mode == DFmode
18031 || mode == SImode
18032 || mode == DImode
18033 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18034 gcc_assert (MEM_P (mem));
18035
18036 addr = XEXP (mem, 0);
18037
18038 switch (GET_CODE (addr))
18039 {
18040 case PRE_DEC:
18041 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
18042 ops[0] = XEXP (addr, 0);
18043 ops[1] = reg;
18044 break;
18045
18046 case POST_INC:
18047 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
18048 ops[0] = XEXP (addr, 0);
18049 ops[1] = reg;
18050 break;
18051
18052 default:
18053 templ = "f%s%c%%?\t%%%s0, %%1%s";
18054 ops[0] = reg;
18055 ops[1] = mem;
18056 break;
18057 }
18058
18059 sprintf (buff, templ,
18060 load ? "ld" : "st",
18061 dp ? 'd' : 's',
18062 dp ? "P" : "",
18063 integer_p ? "\t%@ int" : "");
18064 output_asm_insn (buff, ops);
18065
18066 return "";
18067 }
18068
18069 /* Output a Neon double-word or quad-word load or store, or a load
18070 or store for larger structure modes.
18071
18072 WARNING: The ordering of elements is weird in big-endian mode,
18073 because the EABI requires that vectors stored in memory appear
18074 as though they were stored by a VSTM, as required by the EABI.
18075 GCC RTL defines element ordering based on in-memory order.
18076 This can be different from the architectural ordering of elements
18077 within a NEON register. The intrinsics defined in arm_neon.h use the
18078 NEON register element ordering, not the GCC RTL element ordering.
18079
18080 For example, the in-memory ordering of a big-endian a quadword
18081 vector with 16-bit elements when stored from register pair {d0,d1}
18082 will be (lowest address first, d0[N] is NEON register element N):
18083
18084 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18085
18086 When necessary, quadword registers (dN, dN+1) are moved to ARM
18087 registers from rN in the order:
18088
18089 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18090
18091 So that STM/LDM can be used on vectors in ARM registers, and the
18092 same memory layout will result as if VSTM/VLDM were used.
18093
18094 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18095 possible, which allows use of appropriate alignment tags.
18096 Note that the choice of "64" is independent of the actual vector
18097 element size; this size simply ensures that the behavior is
18098 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18099
18100 Due to limitations of those instructions, use of VST1.64/VLD1.64
18101 is not possible if:
18102 - the address contains PRE_DEC, or
18103 - the mode refers to more than 4 double-word registers
18104
18105 In those cases, it would be possible to replace VSTM/VLDM by a
18106 sequence of instructions; this is not currently implemented since
18107 this is not certain to actually improve performance. */
18108
18109 const char *
18110 output_move_neon (rtx *operands)
18111 {
18112 rtx reg, mem, addr, ops[2];
18113 int regno, nregs, load = REG_P (operands[0]);
18114 const char *templ;
18115 char buff[50];
18116 enum machine_mode mode;
18117
18118 reg = operands[!load];
18119 mem = operands[load];
18120
18121 mode = GET_MODE (reg);
18122
18123 gcc_assert (REG_P (reg));
18124 regno = REGNO (reg);
18125 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18126 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18127 || NEON_REGNO_OK_FOR_QUAD (regno));
18128 gcc_assert (VALID_NEON_DREG_MODE (mode)
18129 || VALID_NEON_QREG_MODE (mode)
18130 || VALID_NEON_STRUCT_MODE (mode));
18131 gcc_assert (MEM_P (mem));
18132
18133 addr = XEXP (mem, 0);
18134
18135 /* Strip off const from addresses like (const (plus (...))). */
18136 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18137 addr = XEXP (addr, 0);
18138
18139 switch (GET_CODE (addr))
18140 {
18141 case POST_INC:
18142 /* We have to use vldm / vstm for too-large modes. */
18143 if (nregs > 4)
18144 {
18145 templ = "v%smia%%?\t%%0!, %%h1";
18146 ops[0] = XEXP (addr, 0);
18147 }
18148 else
18149 {
18150 templ = "v%s1.64\t%%h1, %%A0";
18151 ops[0] = mem;
18152 }
18153 ops[1] = reg;
18154 break;
18155
18156 case PRE_DEC:
18157 /* We have to use vldm / vstm in this case, since there is no
18158 pre-decrement form of the vld1 / vst1 instructions. */
18159 templ = "v%smdb%%?\t%%0!, %%h1";
18160 ops[0] = XEXP (addr, 0);
18161 ops[1] = reg;
18162 break;
18163
18164 case POST_MODIFY:
18165 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18166 gcc_unreachable ();
18167
18168 case LABEL_REF:
18169 case PLUS:
18170 {
18171 int i;
18172 int overlap = -1;
18173 for (i = 0; i < nregs; i++)
18174 {
18175 /* We're only using DImode here because it's a convenient size. */
18176 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18177 ops[1] = adjust_address (mem, DImode, 8 * i);
18178 if (reg_overlap_mentioned_p (ops[0], mem))
18179 {
18180 gcc_assert (overlap == -1);
18181 overlap = i;
18182 }
18183 else
18184 {
18185 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18186 output_asm_insn (buff, ops);
18187 }
18188 }
18189 if (overlap != -1)
18190 {
18191 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18192 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18193 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18194 output_asm_insn (buff, ops);
18195 }
18196
18197 return "";
18198 }
18199
18200 default:
18201 /* We have to use vldm / vstm for too-large modes. */
18202 if (nregs > 4)
18203 templ = "v%smia%%?\t%%m0, %%h1";
18204 else
18205 templ = "v%s1.64\t%%h1, %%A0";
18206
18207 ops[0] = mem;
18208 ops[1] = reg;
18209 }
18210
18211 sprintf (buff, templ, load ? "ld" : "st");
18212 output_asm_insn (buff, ops);
18213
18214 return "";
18215 }
18216
18217 /* Compute and return the length of neon_mov<mode>, where <mode> is
18218 one of VSTRUCT modes: EI, OI, CI or XI. */
18219 int
18220 arm_attr_length_move_neon (rtx insn)
18221 {
18222 rtx reg, mem, addr;
18223 int load;
18224 enum machine_mode mode;
18225
18226 extract_insn_cached (insn);
18227
18228 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18229 {
18230 mode = GET_MODE (recog_data.operand[0]);
18231 switch (mode)
18232 {
18233 case EImode:
18234 case OImode:
18235 return 8;
18236 case CImode:
18237 return 12;
18238 case XImode:
18239 return 16;
18240 default:
18241 gcc_unreachable ();
18242 }
18243 }
18244
18245 load = REG_P (recog_data.operand[0]);
18246 reg = recog_data.operand[!load];
18247 mem = recog_data.operand[load];
18248
18249 gcc_assert (MEM_P (mem));
18250
18251 mode = GET_MODE (reg);
18252 addr = XEXP (mem, 0);
18253
18254 /* Strip off const from addresses like (const (plus (...))). */
18255 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18256 addr = XEXP (addr, 0);
18257
18258 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18259 {
18260 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18261 return insns * 4;
18262 }
18263 else
18264 return 4;
18265 }
18266
18267 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18268 return zero. */
18269
18270 int
18271 arm_address_offset_is_imm (rtx insn)
18272 {
18273 rtx mem, addr;
18274
18275 extract_insn_cached (insn);
18276
18277 if (REG_P (recog_data.operand[0]))
18278 return 0;
18279
18280 mem = recog_data.operand[0];
18281
18282 gcc_assert (MEM_P (mem));
18283
18284 addr = XEXP (mem, 0);
18285
18286 if (REG_P (addr)
18287 || (GET_CODE (addr) == PLUS
18288 && REG_P (XEXP (addr, 0))
18289 && CONST_INT_P (XEXP (addr, 1))))
18290 return 1;
18291 else
18292 return 0;
18293 }
18294
18295 /* Output an ADD r, s, #n where n may be too big for one instruction.
18296 If adding zero to one register, output nothing. */
18297 const char *
18298 output_add_immediate (rtx *operands)
18299 {
18300 HOST_WIDE_INT n = INTVAL (operands[2]);
18301
18302 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18303 {
18304 if (n < 0)
18305 output_multi_immediate (operands,
18306 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18307 -n);
18308 else
18309 output_multi_immediate (operands,
18310 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18311 n);
18312 }
18313
18314 return "";
18315 }
18316
18317 /* Output a multiple immediate operation.
18318 OPERANDS is the vector of operands referred to in the output patterns.
18319 INSTR1 is the output pattern to use for the first constant.
18320 INSTR2 is the output pattern to use for subsequent constants.
18321 IMMED_OP is the index of the constant slot in OPERANDS.
18322 N is the constant value. */
18323 static const char *
18324 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18325 int immed_op, HOST_WIDE_INT n)
18326 {
18327 #if HOST_BITS_PER_WIDE_INT > 32
18328 n &= 0xffffffff;
18329 #endif
18330
18331 if (n == 0)
18332 {
18333 /* Quick and easy output. */
18334 operands[immed_op] = const0_rtx;
18335 output_asm_insn (instr1, operands);
18336 }
18337 else
18338 {
18339 int i;
18340 const char * instr = instr1;
18341
18342 /* Note that n is never zero here (which would give no output). */
18343 for (i = 0; i < 32; i += 2)
18344 {
18345 if (n & (3 << i))
18346 {
18347 operands[immed_op] = GEN_INT (n & (255 << i));
18348 output_asm_insn (instr, operands);
18349 instr = instr2;
18350 i += 6;
18351 }
18352 }
18353 }
18354
18355 return "";
18356 }
18357
18358 /* Return the name of a shifter operation. */
18359 static const char *
18360 arm_shift_nmem(enum rtx_code code)
18361 {
18362 switch (code)
18363 {
18364 case ASHIFT:
18365 return ARM_LSL_NAME;
18366
18367 case ASHIFTRT:
18368 return "asr";
18369
18370 case LSHIFTRT:
18371 return "lsr";
18372
18373 case ROTATERT:
18374 return "ror";
18375
18376 default:
18377 abort();
18378 }
18379 }
18380
18381 /* Return the appropriate ARM instruction for the operation code.
18382 The returned result should not be overwritten. OP is the rtx of the
18383 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18384 was shifted. */
18385 const char *
18386 arithmetic_instr (rtx op, int shift_first_arg)
18387 {
18388 switch (GET_CODE (op))
18389 {
18390 case PLUS:
18391 return "add";
18392
18393 case MINUS:
18394 return shift_first_arg ? "rsb" : "sub";
18395
18396 case IOR:
18397 return "orr";
18398
18399 case XOR:
18400 return "eor";
18401
18402 case AND:
18403 return "and";
18404
18405 case ASHIFT:
18406 case ASHIFTRT:
18407 case LSHIFTRT:
18408 case ROTATERT:
18409 return arm_shift_nmem(GET_CODE(op));
18410
18411 default:
18412 gcc_unreachable ();
18413 }
18414 }
18415
18416 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18417 for the operation code. The returned result should not be overwritten.
18418 OP is the rtx code of the shift.
18419 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18420 shift. */
18421 static const char *
18422 shift_op (rtx op, HOST_WIDE_INT *amountp)
18423 {
18424 const char * mnem;
18425 enum rtx_code code = GET_CODE (op);
18426
18427 switch (code)
18428 {
18429 case ROTATE:
18430 if (!CONST_INT_P (XEXP (op, 1)))
18431 {
18432 output_operand_lossage ("invalid shift operand");
18433 return NULL;
18434 }
18435
18436 code = ROTATERT;
18437 *amountp = 32 - INTVAL (XEXP (op, 1));
18438 mnem = "ror";
18439 break;
18440
18441 case ASHIFT:
18442 case ASHIFTRT:
18443 case LSHIFTRT:
18444 case ROTATERT:
18445 mnem = arm_shift_nmem(code);
18446 if (CONST_INT_P (XEXP (op, 1)))
18447 {
18448 *amountp = INTVAL (XEXP (op, 1));
18449 }
18450 else if (REG_P (XEXP (op, 1)))
18451 {
18452 *amountp = -1;
18453 return mnem;
18454 }
18455 else
18456 {
18457 output_operand_lossage ("invalid shift operand");
18458 return NULL;
18459 }
18460 break;
18461
18462 case MULT:
18463 /* We never have to worry about the amount being other than a
18464 power of 2, since this case can never be reloaded from a reg. */
18465 if (!CONST_INT_P (XEXP (op, 1)))
18466 {
18467 output_operand_lossage ("invalid shift operand");
18468 return NULL;
18469 }
18470
18471 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18472
18473 /* Amount must be a power of two. */
18474 if (*amountp & (*amountp - 1))
18475 {
18476 output_operand_lossage ("invalid shift operand");
18477 return NULL;
18478 }
18479
18480 *amountp = int_log2 (*amountp);
18481 return ARM_LSL_NAME;
18482
18483 default:
18484 output_operand_lossage ("invalid shift operand");
18485 return NULL;
18486 }
18487
18488 /* This is not 100% correct, but follows from the desire to merge
18489 multiplication by a power of 2 with the recognizer for a
18490 shift. >=32 is not a valid shift for "lsl", so we must try and
18491 output a shift that produces the correct arithmetical result.
18492 Using lsr #32 is identical except for the fact that the carry bit
18493 is not set correctly if we set the flags; but we never use the
18494 carry bit from such an operation, so we can ignore that. */
18495 if (code == ROTATERT)
18496 /* Rotate is just modulo 32. */
18497 *amountp &= 31;
18498 else if (*amountp != (*amountp & 31))
18499 {
18500 if (code == ASHIFT)
18501 mnem = "lsr";
18502 *amountp = 32;
18503 }
18504
18505 /* Shifts of 0 are no-ops. */
18506 if (*amountp == 0)
18507 return NULL;
18508
18509 return mnem;
18510 }
18511
18512 /* Obtain the shift from the POWER of two. */
18513
18514 static HOST_WIDE_INT
18515 int_log2 (HOST_WIDE_INT power)
18516 {
18517 HOST_WIDE_INT shift = 0;
18518
18519 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18520 {
18521 gcc_assert (shift <= 31);
18522 shift++;
18523 }
18524
18525 return shift;
18526 }
18527
18528 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18529 because /bin/as is horribly restrictive. The judgement about
18530 whether or not each character is 'printable' (and can be output as
18531 is) or not (and must be printed with an octal escape) must be made
18532 with reference to the *host* character set -- the situation is
18533 similar to that discussed in the comments above pp_c_char in
18534 c-pretty-print.c. */
18535
18536 #define MAX_ASCII_LEN 51
18537
18538 void
18539 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18540 {
18541 int i;
18542 int len_so_far = 0;
18543
18544 fputs ("\t.ascii\t\"", stream);
18545
18546 for (i = 0; i < len; i++)
18547 {
18548 int c = p[i];
18549
18550 if (len_so_far >= MAX_ASCII_LEN)
18551 {
18552 fputs ("\"\n\t.ascii\t\"", stream);
18553 len_so_far = 0;
18554 }
18555
18556 if (ISPRINT (c))
18557 {
18558 if (c == '\\' || c == '\"')
18559 {
18560 putc ('\\', stream);
18561 len_so_far++;
18562 }
18563 putc (c, stream);
18564 len_so_far++;
18565 }
18566 else
18567 {
18568 fprintf (stream, "\\%03o", c);
18569 len_so_far += 4;
18570 }
18571 }
18572
18573 fputs ("\"\n", stream);
18574 }
18575 \f
18576 /* Compute the register save mask for registers 0 through 12
18577 inclusive. This code is used by arm_compute_save_reg_mask. */
18578
18579 static unsigned long
18580 arm_compute_save_reg0_reg12_mask (void)
18581 {
18582 unsigned long func_type = arm_current_func_type ();
18583 unsigned long save_reg_mask = 0;
18584 unsigned int reg;
18585
18586 if (IS_INTERRUPT (func_type))
18587 {
18588 unsigned int max_reg;
18589 /* Interrupt functions must not corrupt any registers,
18590 even call clobbered ones. If this is a leaf function
18591 we can just examine the registers used by the RTL, but
18592 otherwise we have to assume that whatever function is
18593 called might clobber anything, and so we have to save
18594 all the call-clobbered registers as well. */
18595 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18596 /* FIQ handlers have registers r8 - r12 banked, so
18597 we only need to check r0 - r7, Normal ISRs only
18598 bank r14 and r15, so we must check up to r12.
18599 r13 is the stack pointer which is always preserved,
18600 so we do not need to consider it here. */
18601 max_reg = 7;
18602 else
18603 max_reg = 12;
18604
18605 for (reg = 0; reg <= max_reg; reg++)
18606 if (df_regs_ever_live_p (reg)
18607 || (! crtl->is_leaf && call_used_regs[reg]))
18608 save_reg_mask |= (1 << reg);
18609
18610 /* Also save the pic base register if necessary. */
18611 if (flag_pic
18612 && !TARGET_SINGLE_PIC_BASE
18613 && arm_pic_register != INVALID_REGNUM
18614 && crtl->uses_pic_offset_table)
18615 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18616 }
18617 else if (IS_VOLATILE(func_type))
18618 {
18619 /* For noreturn functions we historically omitted register saves
18620 altogether. However this really messes up debugging. As a
18621 compromise save just the frame pointers. Combined with the link
18622 register saved elsewhere this should be sufficient to get
18623 a backtrace. */
18624 if (frame_pointer_needed)
18625 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18626 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
18627 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18628 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
18629 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
18630 }
18631 else
18632 {
18633 /* In the normal case we only need to save those registers
18634 which are call saved and which are used by this function. */
18635 for (reg = 0; reg <= 11; reg++)
18636 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
18637 save_reg_mask |= (1 << reg);
18638
18639 /* Handle the frame pointer as a special case. */
18640 if (frame_pointer_needed)
18641 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18642
18643 /* If we aren't loading the PIC register,
18644 don't stack it even though it may be live. */
18645 if (flag_pic
18646 && !TARGET_SINGLE_PIC_BASE
18647 && arm_pic_register != INVALID_REGNUM
18648 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
18649 || crtl->uses_pic_offset_table))
18650 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18651
18652 /* The prologue will copy SP into R0, so save it. */
18653 if (IS_STACKALIGN (func_type))
18654 save_reg_mask |= 1;
18655 }
18656
18657 /* Save registers so the exception handler can modify them. */
18658 if (crtl->calls_eh_return)
18659 {
18660 unsigned int i;
18661
18662 for (i = 0; ; i++)
18663 {
18664 reg = EH_RETURN_DATA_REGNO (i);
18665 if (reg == INVALID_REGNUM)
18666 break;
18667 save_reg_mask |= 1 << reg;
18668 }
18669 }
18670
18671 return save_reg_mask;
18672 }
18673
18674 /* Return true if r3 is live at the start of the function. */
18675
18676 static bool
18677 arm_r3_live_at_start_p (void)
18678 {
18679 /* Just look at cfg info, which is still close enough to correct at this
18680 point. This gives false positives for broken functions that might use
18681 uninitialized data that happens to be allocated in r3, but who cares? */
18682 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
18683 }
18684
18685 /* Compute the number of bytes used to store the static chain register on the
18686 stack, above the stack frame. We need to know this accurately to get the
18687 alignment of the rest of the stack frame correct. */
18688
18689 static int
18690 arm_compute_static_chain_stack_bytes (void)
18691 {
18692 /* See the defining assertion in arm_expand_prologue. */
18693 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
18694 && IS_NESTED (arm_current_func_type ())
18695 && arm_r3_live_at_start_p ()
18696 && crtl->args.pretend_args_size == 0)
18697 return 4;
18698
18699 return 0;
18700 }
18701
18702 /* Compute a bit mask of which registers need to be
18703 saved on the stack for the current function.
18704 This is used by arm_get_frame_offsets, which may add extra registers. */
18705
18706 static unsigned long
18707 arm_compute_save_reg_mask (void)
18708 {
18709 unsigned int save_reg_mask = 0;
18710 unsigned long func_type = arm_current_func_type ();
18711 unsigned int reg;
18712
18713 if (IS_NAKED (func_type))
18714 /* This should never really happen. */
18715 return 0;
18716
18717 /* If we are creating a stack frame, then we must save the frame pointer,
18718 IP (which will hold the old stack pointer), LR and the PC. */
18719 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
18720 save_reg_mask |=
18721 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
18722 | (1 << IP_REGNUM)
18723 | (1 << LR_REGNUM)
18724 | (1 << PC_REGNUM);
18725
18726 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
18727
18728 /* Decide if we need to save the link register.
18729 Interrupt routines have their own banked link register,
18730 so they never need to save it.
18731 Otherwise if we do not use the link register we do not need to save
18732 it. If we are pushing other registers onto the stack however, we
18733 can save an instruction in the epilogue by pushing the link register
18734 now and then popping it back into the PC. This incurs extra memory
18735 accesses though, so we only do it when optimizing for size, and only
18736 if we know that we will not need a fancy return sequence. */
18737 if (df_regs_ever_live_p (LR_REGNUM)
18738 || (save_reg_mask
18739 && optimize_size
18740 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
18741 && !crtl->calls_eh_return))
18742 save_reg_mask |= 1 << LR_REGNUM;
18743
18744 if (cfun->machine->lr_save_eliminated)
18745 save_reg_mask &= ~ (1 << LR_REGNUM);
18746
18747 if (TARGET_REALLY_IWMMXT
18748 && ((bit_count (save_reg_mask)
18749 + ARM_NUM_INTS (crtl->args.pretend_args_size +
18750 arm_compute_static_chain_stack_bytes())
18751 ) % 2) != 0)
18752 {
18753 /* The total number of registers that are going to be pushed
18754 onto the stack is odd. We need to ensure that the stack
18755 is 64-bit aligned before we start to save iWMMXt registers,
18756 and also before we start to create locals. (A local variable
18757 might be a double or long long which we will load/store using
18758 an iWMMXt instruction). Therefore we need to push another
18759 ARM register, so that the stack will be 64-bit aligned. We
18760 try to avoid using the arg registers (r0 -r3) as they might be
18761 used to pass values in a tail call. */
18762 for (reg = 4; reg <= 12; reg++)
18763 if ((save_reg_mask & (1 << reg)) == 0)
18764 break;
18765
18766 if (reg <= 12)
18767 save_reg_mask |= (1 << reg);
18768 else
18769 {
18770 cfun->machine->sibcall_blocked = 1;
18771 save_reg_mask |= (1 << 3);
18772 }
18773 }
18774
18775 /* We may need to push an additional register for use initializing the
18776 PIC base register. */
18777 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
18778 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
18779 {
18780 reg = thumb_find_work_register (1 << 4);
18781 if (!call_used_regs[reg])
18782 save_reg_mask |= (1 << reg);
18783 }
18784
18785 return save_reg_mask;
18786 }
18787
18788
18789 /* Compute a bit mask of which registers need to be
18790 saved on the stack for the current function. */
18791 static unsigned long
18792 thumb1_compute_save_reg_mask (void)
18793 {
18794 unsigned long mask;
18795 unsigned reg;
18796
18797 mask = 0;
18798 for (reg = 0; reg < 12; reg ++)
18799 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
18800 mask |= 1 << reg;
18801
18802 if (flag_pic
18803 && !TARGET_SINGLE_PIC_BASE
18804 && arm_pic_register != INVALID_REGNUM
18805 && crtl->uses_pic_offset_table)
18806 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18807
18808 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
18809 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
18810 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18811
18812 /* LR will also be pushed if any lo regs are pushed. */
18813 if (mask & 0xff || thumb_force_lr_save ())
18814 mask |= (1 << LR_REGNUM);
18815
18816 /* Make sure we have a low work register if we need one.
18817 We will need one if we are going to push a high register,
18818 but we are not currently intending to push a low register. */
18819 if ((mask & 0xff) == 0
18820 && ((mask & 0x0f00) || TARGET_BACKTRACE))
18821 {
18822 /* Use thumb_find_work_register to choose which register
18823 we will use. If the register is live then we will
18824 have to push it. Use LAST_LO_REGNUM as our fallback
18825 choice for the register to select. */
18826 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
18827 /* Make sure the register returned by thumb_find_work_register is
18828 not part of the return value. */
18829 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
18830 reg = LAST_LO_REGNUM;
18831
18832 if (! call_used_regs[reg])
18833 mask |= 1 << reg;
18834 }
18835
18836 /* The 504 below is 8 bytes less than 512 because there are two possible
18837 alignment words. We can't tell here if they will be present or not so we
18838 have to play it safe and assume that they are. */
18839 if ((CALLER_INTERWORKING_SLOT_SIZE +
18840 ROUND_UP_WORD (get_frame_size ()) +
18841 crtl->outgoing_args_size) >= 504)
18842 {
18843 /* This is the same as the code in thumb1_expand_prologue() which
18844 determines which register to use for stack decrement. */
18845 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
18846 if (mask & (1 << reg))
18847 break;
18848
18849 if (reg > LAST_LO_REGNUM)
18850 {
18851 /* Make sure we have a register available for stack decrement. */
18852 mask |= 1 << LAST_LO_REGNUM;
18853 }
18854 }
18855
18856 return mask;
18857 }
18858
18859
18860 /* Return the number of bytes required to save VFP registers. */
18861 static int
18862 arm_get_vfp_saved_size (void)
18863 {
18864 unsigned int regno;
18865 int count;
18866 int saved;
18867
18868 saved = 0;
18869 /* Space for saved VFP registers. */
18870 if (TARGET_HARD_FLOAT && TARGET_VFP)
18871 {
18872 count = 0;
18873 for (regno = FIRST_VFP_REGNUM;
18874 regno < LAST_VFP_REGNUM;
18875 regno += 2)
18876 {
18877 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
18878 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
18879 {
18880 if (count > 0)
18881 {
18882 /* Workaround ARM10 VFPr1 bug. */
18883 if (count == 2 && !arm_arch6)
18884 count++;
18885 saved += count * 8;
18886 }
18887 count = 0;
18888 }
18889 else
18890 count++;
18891 }
18892 if (count > 0)
18893 {
18894 if (count == 2 && !arm_arch6)
18895 count++;
18896 saved += count * 8;
18897 }
18898 }
18899 return saved;
18900 }
18901
18902
18903 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
18904 everything bar the final return instruction. If simple_return is true,
18905 then do not output epilogue, because it has already been emitted in RTL. */
18906 const char *
18907 output_return_instruction (rtx operand, bool really_return, bool reverse,
18908 bool simple_return)
18909 {
18910 char conditional[10];
18911 char instr[100];
18912 unsigned reg;
18913 unsigned long live_regs_mask;
18914 unsigned long func_type;
18915 arm_stack_offsets *offsets;
18916
18917 func_type = arm_current_func_type ();
18918
18919 if (IS_NAKED (func_type))
18920 return "";
18921
18922 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
18923 {
18924 /* If this function was declared non-returning, and we have
18925 found a tail call, then we have to trust that the called
18926 function won't return. */
18927 if (really_return)
18928 {
18929 rtx ops[2];
18930
18931 /* Otherwise, trap an attempted return by aborting. */
18932 ops[0] = operand;
18933 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
18934 : "abort");
18935 assemble_external_libcall (ops[1]);
18936 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
18937 }
18938
18939 return "";
18940 }
18941
18942 gcc_assert (!cfun->calls_alloca || really_return);
18943
18944 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
18945
18946 cfun->machine->return_used_this_function = 1;
18947
18948 offsets = arm_get_frame_offsets ();
18949 live_regs_mask = offsets->saved_regs_mask;
18950
18951 if (!simple_return && live_regs_mask)
18952 {
18953 const char * return_reg;
18954
18955 /* If we do not have any special requirements for function exit
18956 (e.g. interworking) then we can load the return address
18957 directly into the PC. Otherwise we must load it into LR. */
18958 if (really_return
18959 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
18960 return_reg = reg_names[PC_REGNUM];
18961 else
18962 return_reg = reg_names[LR_REGNUM];
18963
18964 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
18965 {
18966 /* There are three possible reasons for the IP register
18967 being saved. 1) a stack frame was created, in which case
18968 IP contains the old stack pointer, or 2) an ISR routine
18969 corrupted it, or 3) it was saved to align the stack on
18970 iWMMXt. In case 1, restore IP into SP, otherwise just
18971 restore IP. */
18972 if (frame_pointer_needed)
18973 {
18974 live_regs_mask &= ~ (1 << IP_REGNUM);
18975 live_regs_mask |= (1 << SP_REGNUM);
18976 }
18977 else
18978 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
18979 }
18980
18981 /* On some ARM architectures it is faster to use LDR rather than
18982 LDM to load a single register. On other architectures, the
18983 cost is the same. In 26 bit mode, or for exception handlers,
18984 we have to use LDM to load the PC so that the CPSR is also
18985 restored. */
18986 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
18987 if (live_regs_mask == (1U << reg))
18988 break;
18989
18990 if (reg <= LAST_ARM_REGNUM
18991 && (reg != LR_REGNUM
18992 || ! really_return
18993 || ! IS_INTERRUPT (func_type)))
18994 {
18995 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
18996 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
18997 }
18998 else
18999 {
19000 char *p;
19001 int first = 1;
19002
19003 /* Generate the load multiple instruction to restore the
19004 registers. Note we can get here, even if
19005 frame_pointer_needed is true, but only if sp already
19006 points to the base of the saved core registers. */
19007 if (live_regs_mask & (1 << SP_REGNUM))
19008 {
19009 unsigned HOST_WIDE_INT stack_adjust;
19010
19011 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19012 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19013
19014 if (stack_adjust && arm_arch5 && TARGET_ARM)
19015 if (TARGET_UNIFIED_ASM)
19016 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19017 else
19018 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19019 else
19020 {
19021 /* If we can't use ldmib (SA110 bug),
19022 then try to pop r3 instead. */
19023 if (stack_adjust)
19024 live_regs_mask |= 1 << 3;
19025
19026 if (TARGET_UNIFIED_ASM)
19027 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19028 else
19029 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19030 }
19031 }
19032 else
19033 if (TARGET_UNIFIED_ASM)
19034 sprintf (instr, "pop%s\t{", conditional);
19035 else
19036 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19037
19038 p = instr + strlen (instr);
19039
19040 for (reg = 0; reg <= SP_REGNUM; reg++)
19041 if (live_regs_mask & (1 << reg))
19042 {
19043 int l = strlen (reg_names[reg]);
19044
19045 if (first)
19046 first = 0;
19047 else
19048 {
19049 memcpy (p, ", ", 2);
19050 p += 2;
19051 }
19052
19053 memcpy (p, "%|", 2);
19054 memcpy (p + 2, reg_names[reg], l);
19055 p += l + 2;
19056 }
19057
19058 if (live_regs_mask & (1 << LR_REGNUM))
19059 {
19060 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19061 /* If returning from an interrupt, restore the CPSR. */
19062 if (IS_INTERRUPT (func_type))
19063 strcat (p, "^");
19064 }
19065 else
19066 strcpy (p, "}");
19067 }
19068
19069 output_asm_insn (instr, & operand);
19070
19071 /* See if we need to generate an extra instruction to
19072 perform the actual function return. */
19073 if (really_return
19074 && func_type != ARM_FT_INTERWORKED
19075 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19076 {
19077 /* The return has already been handled
19078 by loading the LR into the PC. */
19079 return "";
19080 }
19081 }
19082
19083 if (really_return)
19084 {
19085 switch ((int) ARM_FUNC_TYPE (func_type))
19086 {
19087 case ARM_FT_ISR:
19088 case ARM_FT_FIQ:
19089 /* ??? This is wrong for unified assembly syntax. */
19090 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19091 break;
19092
19093 case ARM_FT_INTERWORKED:
19094 sprintf (instr, "bx%s\t%%|lr", conditional);
19095 break;
19096
19097 case ARM_FT_EXCEPTION:
19098 /* ??? This is wrong for unified assembly syntax. */
19099 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19100 break;
19101
19102 default:
19103 /* Use bx if it's available. */
19104 if (arm_arch5 || arm_arch4t)
19105 sprintf (instr, "bx%s\t%%|lr", conditional);
19106 else
19107 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19108 break;
19109 }
19110
19111 output_asm_insn (instr, & operand);
19112 }
19113
19114 return "";
19115 }
19116
19117 /* Write the function name into the code section, directly preceding
19118 the function prologue.
19119
19120 Code will be output similar to this:
19121 t0
19122 .ascii "arm_poke_function_name", 0
19123 .align
19124 t1
19125 .word 0xff000000 + (t1 - t0)
19126 arm_poke_function_name
19127 mov ip, sp
19128 stmfd sp!, {fp, ip, lr, pc}
19129 sub fp, ip, #4
19130
19131 When performing a stack backtrace, code can inspect the value
19132 of 'pc' stored at 'fp' + 0. If the trace function then looks
19133 at location pc - 12 and the top 8 bits are set, then we know
19134 that there is a function name embedded immediately preceding this
19135 location and has length ((pc[-3]) & 0xff000000).
19136
19137 We assume that pc is declared as a pointer to an unsigned long.
19138
19139 It is of no benefit to output the function name if we are assembling
19140 a leaf function. These function types will not contain a stack
19141 backtrace structure, therefore it is not possible to determine the
19142 function name. */
19143 void
19144 arm_poke_function_name (FILE *stream, const char *name)
19145 {
19146 unsigned long alignlength;
19147 unsigned long length;
19148 rtx x;
19149
19150 length = strlen (name) + 1;
19151 alignlength = ROUND_UP_WORD (length);
19152
19153 ASM_OUTPUT_ASCII (stream, name, length);
19154 ASM_OUTPUT_ALIGN (stream, 2);
19155 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19156 assemble_aligned_integer (UNITS_PER_WORD, x);
19157 }
19158
19159 /* Place some comments into the assembler stream
19160 describing the current function. */
19161 static void
19162 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19163 {
19164 unsigned long func_type;
19165
19166 /* ??? Do we want to print some of the below anyway? */
19167 if (TARGET_THUMB1)
19168 return;
19169
19170 /* Sanity check. */
19171 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19172
19173 func_type = arm_current_func_type ();
19174
19175 switch ((int) ARM_FUNC_TYPE (func_type))
19176 {
19177 default:
19178 case ARM_FT_NORMAL:
19179 break;
19180 case ARM_FT_INTERWORKED:
19181 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19182 break;
19183 case ARM_FT_ISR:
19184 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19185 break;
19186 case ARM_FT_FIQ:
19187 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19188 break;
19189 case ARM_FT_EXCEPTION:
19190 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19191 break;
19192 }
19193
19194 if (IS_NAKED (func_type))
19195 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19196
19197 if (IS_VOLATILE (func_type))
19198 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19199
19200 if (IS_NESTED (func_type))
19201 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19202 if (IS_STACKALIGN (func_type))
19203 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19204
19205 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19206 crtl->args.size,
19207 crtl->args.pretend_args_size, frame_size);
19208
19209 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19210 frame_pointer_needed,
19211 cfun->machine->uses_anonymous_args);
19212
19213 if (cfun->machine->lr_save_eliminated)
19214 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19215
19216 if (crtl->calls_eh_return)
19217 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19218
19219 }
19220
19221 static void
19222 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19223 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19224 {
19225 arm_stack_offsets *offsets;
19226
19227 if (TARGET_THUMB1)
19228 {
19229 int regno;
19230
19231 /* Emit any call-via-reg trampolines that are needed for v4t support
19232 of call_reg and call_value_reg type insns. */
19233 for (regno = 0; regno < LR_REGNUM; regno++)
19234 {
19235 rtx label = cfun->machine->call_via[regno];
19236
19237 if (label != NULL)
19238 {
19239 switch_to_section (function_section (current_function_decl));
19240 targetm.asm_out.internal_label (asm_out_file, "L",
19241 CODE_LABEL_NUMBER (label));
19242 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19243 }
19244 }
19245
19246 /* ??? Probably not safe to set this here, since it assumes that a
19247 function will be emitted as assembly immediately after we generate
19248 RTL for it. This does not happen for inline functions. */
19249 cfun->machine->return_used_this_function = 0;
19250 }
19251 else /* TARGET_32BIT */
19252 {
19253 /* We need to take into account any stack-frame rounding. */
19254 offsets = arm_get_frame_offsets ();
19255
19256 gcc_assert (!use_return_insn (FALSE, NULL)
19257 || (cfun->machine->return_used_this_function != 0)
19258 || offsets->saved_regs == offsets->outgoing_args
19259 || frame_pointer_needed);
19260
19261 /* Reset the ARM-specific per-function variables. */
19262 after_arm_reorg = 0;
19263 }
19264 }
19265
19266 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19267 STR and STRD. If an even number of registers are being pushed, one
19268 or more STRD patterns are created for each register pair. If an
19269 odd number of registers are pushed, emit an initial STR followed by
19270 as many STRD instructions as are needed. This works best when the
19271 stack is initially 64-bit aligned (the normal case), since it
19272 ensures that each STRD is also 64-bit aligned. */
19273 static void
19274 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19275 {
19276 int num_regs = 0;
19277 int i;
19278 int regno;
19279 rtx par = NULL_RTX;
19280 rtx dwarf = NULL_RTX;
19281 rtx tmp;
19282 bool first = true;
19283
19284 num_regs = bit_count (saved_regs_mask);
19285
19286 /* Must be at least one register to save, and can't save SP or PC. */
19287 gcc_assert (num_regs > 0 && num_regs <= 14);
19288 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19289 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19290
19291 /* Create sequence for DWARF info. All the frame-related data for
19292 debugging is held in this wrapper. */
19293 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19294
19295 /* Describe the stack adjustment. */
19296 tmp = gen_rtx_SET (VOIDmode,
19297 stack_pointer_rtx,
19298 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19299 RTX_FRAME_RELATED_P (tmp) = 1;
19300 XVECEXP (dwarf, 0, 0) = tmp;
19301
19302 /* Find the first register. */
19303 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19304 ;
19305
19306 i = 0;
19307
19308 /* If there's an odd number of registers to push. Start off by
19309 pushing a single register. This ensures that subsequent strd
19310 operations are dword aligned (assuming that SP was originally
19311 64-bit aligned). */
19312 if ((num_regs & 1) != 0)
19313 {
19314 rtx reg, mem, insn;
19315
19316 reg = gen_rtx_REG (SImode, regno);
19317 if (num_regs == 1)
19318 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19319 stack_pointer_rtx));
19320 else
19321 mem = gen_frame_mem (Pmode,
19322 gen_rtx_PRE_MODIFY
19323 (Pmode, stack_pointer_rtx,
19324 plus_constant (Pmode, stack_pointer_rtx,
19325 -4 * num_regs)));
19326
19327 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19328 RTX_FRAME_RELATED_P (tmp) = 1;
19329 insn = emit_insn (tmp);
19330 RTX_FRAME_RELATED_P (insn) = 1;
19331 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19332 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19333 reg);
19334 RTX_FRAME_RELATED_P (tmp) = 1;
19335 i++;
19336 regno++;
19337 XVECEXP (dwarf, 0, i) = tmp;
19338 first = false;
19339 }
19340
19341 while (i < num_regs)
19342 if (saved_regs_mask & (1 << regno))
19343 {
19344 rtx reg1, reg2, mem1, mem2;
19345 rtx tmp0, tmp1, tmp2;
19346 int regno2;
19347
19348 /* Find the register to pair with this one. */
19349 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19350 regno2++)
19351 ;
19352
19353 reg1 = gen_rtx_REG (SImode, regno);
19354 reg2 = gen_rtx_REG (SImode, regno2);
19355
19356 if (first)
19357 {
19358 rtx insn;
19359
19360 first = false;
19361 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19362 stack_pointer_rtx,
19363 -4 * num_regs));
19364 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19365 stack_pointer_rtx,
19366 -4 * (num_regs - 1)));
19367 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19368 plus_constant (Pmode, stack_pointer_rtx,
19369 -4 * (num_regs)));
19370 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19371 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19372 RTX_FRAME_RELATED_P (tmp0) = 1;
19373 RTX_FRAME_RELATED_P (tmp1) = 1;
19374 RTX_FRAME_RELATED_P (tmp2) = 1;
19375 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19376 XVECEXP (par, 0, 0) = tmp0;
19377 XVECEXP (par, 0, 1) = tmp1;
19378 XVECEXP (par, 0, 2) = tmp2;
19379 insn = emit_insn (par);
19380 RTX_FRAME_RELATED_P (insn) = 1;
19381 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19382 }
19383 else
19384 {
19385 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19386 stack_pointer_rtx,
19387 4 * i));
19388 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19389 stack_pointer_rtx,
19390 4 * (i + 1)));
19391 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19392 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19393 RTX_FRAME_RELATED_P (tmp1) = 1;
19394 RTX_FRAME_RELATED_P (tmp2) = 1;
19395 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19396 XVECEXP (par, 0, 0) = tmp1;
19397 XVECEXP (par, 0, 1) = tmp2;
19398 emit_insn (par);
19399 }
19400
19401 /* Create unwind information. This is an approximation. */
19402 tmp1 = gen_rtx_SET (VOIDmode,
19403 gen_frame_mem (Pmode,
19404 plus_constant (Pmode,
19405 stack_pointer_rtx,
19406 4 * i)),
19407 reg1);
19408 tmp2 = gen_rtx_SET (VOIDmode,
19409 gen_frame_mem (Pmode,
19410 plus_constant (Pmode,
19411 stack_pointer_rtx,
19412 4 * (i + 1))),
19413 reg2);
19414
19415 RTX_FRAME_RELATED_P (tmp1) = 1;
19416 RTX_FRAME_RELATED_P (tmp2) = 1;
19417 XVECEXP (dwarf, 0, i + 1) = tmp1;
19418 XVECEXP (dwarf, 0, i + 2) = tmp2;
19419 i += 2;
19420 regno = regno2 + 1;
19421 }
19422 else
19423 regno++;
19424
19425 return;
19426 }
19427
19428 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19429 whenever possible, otherwise it emits single-word stores. The first store
19430 also allocates stack space for all saved registers, using writeback with
19431 post-addressing mode. All other stores use offset addressing. If no STRD
19432 can be emitted, this function emits a sequence of single-word stores,
19433 and not an STM as before, because single-word stores provide more freedom
19434 scheduling and can be turned into an STM by peephole optimizations. */
19435 static void
19436 arm_emit_strd_push (unsigned long saved_regs_mask)
19437 {
19438 int num_regs = 0;
19439 int i, j, dwarf_index = 0;
19440 int offset = 0;
19441 rtx dwarf = NULL_RTX;
19442 rtx insn = NULL_RTX;
19443 rtx tmp, mem;
19444
19445 /* TODO: A more efficient code can be emitted by changing the
19446 layout, e.g., first push all pairs that can use STRD to keep the
19447 stack aligned, and then push all other registers. */
19448 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19449 if (saved_regs_mask & (1 << i))
19450 num_regs++;
19451
19452 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19453 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19454 gcc_assert (num_regs > 0);
19455
19456 /* Create sequence for DWARF info. */
19457 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19458
19459 /* For dwarf info, we generate explicit stack update. */
19460 tmp = gen_rtx_SET (VOIDmode,
19461 stack_pointer_rtx,
19462 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19463 RTX_FRAME_RELATED_P (tmp) = 1;
19464 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19465
19466 /* Save registers. */
19467 offset = - 4 * num_regs;
19468 j = 0;
19469 while (j <= LAST_ARM_REGNUM)
19470 if (saved_regs_mask & (1 << j))
19471 {
19472 if ((j % 2 == 0)
19473 && (saved_regs_mask & (1 << (j + 1))))
19474 {
19475 /* Current register and previous register form register pair for
19476 which STRD can be generated. */
19477 if (offset < 0)
19478 {
19479 /* Allocate stack space for all saved registers. */
19480 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19481 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19482 mem = gen_frame_mem (DImode, tmp);
19483 offset = 0;
19484 }
19485 else if (offset > 0)
19486 mem = gen_frame_mem (DImode,
19487 plus_constant (Pmode,
19488 stack_pointer_rtx,
19489 offset));
19490 else
19491 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19492
19493 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19494 RTX_FRAME_RELATED_P (tmp) = 1;
19495 tmp = emit_insn (tmp);
19496
19497 /* Record the first store insn. */
19498 if (dwarf_index == 1)
19499 insn = tmp;
19500
19501 /* Generate dwarf info. */
19502 mem = gen_frame_mem (SImode,
19503 plus_constant (Pmode,
19504 stack_pointer_rtx,
19505 offset));
19506 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19507 RTX_FRAME_RELATED_P (tmp) = 1;
19508 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19509
19510 mem = gen_frame_mem (SImode,
19511 plus_constant (Pmode,
19512 stack_pointer_rtx,
19513 offset + 4));
19514 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19515 RTX_FRAME_RELATED_P (tmp) = 1;
19516 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19517
19518 offset += 8;
19519 j += 2;
19520 }
19521 else
19522 {
19523 /* Emit a single word store. */
19524 if (offset < 0)
19525 {
19526 /* Allocate stack space for all saved registers. */
19527 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19528 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19529 mem = gen_frame_mem (SImode, tmp);
19530 offset = 0;
19531 }
19532 else if (offset > 0)
19533 mem = gen_frame_mem (SImode,
19534 plus_constant (Pmode,
19535 stack_pointer_rtx,
19536 offset));
19537 else
19538 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19539
19540 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19541 RTX_FRAME_RELATED_P (tmp) = 1;
19542 tmp = emit_insn (tmp);
19543
19544 /* Record the first store insn. */
19545 if (dwarf_index == 1)
19546 insn = tmp;
19547
19548 /* Generate dwarf info. */
19549 mem = gen_frame_mem (SImode,
19550 plus_constant(Pmode,
19551 stack_pointer_rtx,
19552 offset));
19553 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19554 RTX_FRAME_RELATED_P (tmp) = 1;
19555 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19556
19557 offset += 4;
19558 j += 1;
19559 }
19560 }
19561 else
19562 j++;
19563
19564 /* Attach dwarf info to the first insn we generate. */
19565 gcc_assert (insn != NULL_RTX);
19566 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19567 RTX_FRAME_RELATED_P (insn) = 1;
19568 }
19569
19570 /* Generate and emit an insn that we will recognize as a push_multi.
19571 Unfortunately, since this insn does not reflect very well the actual
19572 semantics of the operation, we need to annotate the insn for the benefit
19573 of DWARF2 frame unwind information. */
19574 static rtx
19575 emit_multi_reg_push (unsigned long mask)
19576 {
19577 int num_regs = 0;
19578 int num_dwarf_regs;
19579 int i, j;
19580 rtx par;
19581 rtx dwarf;
19582 int dwarf_par_index;
19583 rtx tmp, reg;
19584
19585 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19586 if (mask & (1 << i))
19587 num_regs++;
19588
19589 gcc_assert (num_regs && num_regs <= 16);
19590
19591 /* We don't record the PC in the dwarf frame information. */
19592 num_dwarf_regs = num_regs;
19593 if (mask & (1 << PC_REGNUM))
19594 num_dwarf_regs--;
19595
19596 /* For the body of the insn we are going to generate an UNSPEC in
19597 parallel with several USEs. This allows the insn to be recognized
19598 by the push_multi pattern in the arm.md file.
19599
19600 The body of the insn looks something like this:
19601
19602 (parallel [
19603 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19604 (const_int:SI <num>)))
19605 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19606 (use (reg:SI XX))
19607 (use (reg:SI YY))
19608 ...
19609 ])
19610
19611 For the frame note however, we try to be more explicit and actually
19612 show each register being stored into the stack frame, plus a (single)
19613 decrement of the stack pointer. We do it this way in order to be
19614 friendly to the stack unwinding code, which only wants to see a single
19615 stack decrement per instruction. The RTL we generate for the note looks
19616 something like this:
19617
19618 (sequence [
19619 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19620 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19621 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19622 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19623 ...
19624 ])
19625
19626 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19627 instead we'd have a parallel expression detailing all
19628 the stores to the various memory addresses so that debug
19629 information is more up-to-date. Remember however while writing
19630 this to take care of the constraints with the push instruction.
19631
19632 Note also that this has to be taken care of for the VFP registers.
19633
19634 For more see PR43399. */
19635
19636 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
19637 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
19638 dwarf_par_index = 1;
19639
19640 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19641 {
19642 if (mask & (1 << i))
19643 {
19644 reg = gen_rtx_REG (SImode, i);
19645
19646 XVECEXP (par, 0, 0)
19647 = gen_rtx_SET (VOIDmode,
19648 gen_frame_mem
19649 (BLKmode,
19650 gen_rtx_PRE_MODIFY (Pmode,
19651 stack_pointer_rtx,
19652 plus_constant
19653 (Pmode, stack_pointer_rtx,
19654 -4 * num_regs))
19655 ),
19656 gen_rtx_UNSPEC (BLKmode,
19657 gen_rtvec (1, reg),
19658 UNSPEC_PUSH_MULT));
19659
19660 if (i != PC_REGNUM)
19661 {
19662 tmp = gen_rtx_SET (VOIDmode,
19663 gen_frame_mem (SImode, stack_pointer_rtx),
19664 reg);
19665 RTX_FRAME_RELATED_P (tmp) = 1;
19666 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
19667 dwarf_par_index++;
19668 }
19669
19670 break;
19671 }
19672 }
19673
19674 for (j = 1, i++; j < num_regs; i++)
19675 {
19676 if (mask & (1 << i))
19677 {
19678 reg = gen_rtx_REG (SImode, i);
19679
19680 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
19681
19682 if (i != PC_REGNUM)
19683 {
19684 tmp
19685 = gen_rtx_SET (VOIDmode,
19686 gen_frame_mem
19687 (SImode,
19688 plus_constant (Pmode, stack_pointer_rtx,
19689 4 * j)),
19690 reg);
19691 RTX_FRAME_RELATED_P (tmp) = 1;
19692 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19693 }
19694
19695 j++;
19696 }
19697 }
19698
19699 par = emit_insn (par);
19700
19701 tmp = gen_rtx_SET (VOIDmode,
19702 stack_pointer_rtx,
19703 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19704 RTX_FRAME_RELATED_P (tmp) = 1;
19705 XVECEXP (dwarf, 0, 0) = tmp;
19706
19707 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19708
19709 return par;
19710 }
19711
19712 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
19713 SIZE is the offset to be adjusted.
19714 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
19715 static void
19716 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
19717 {
19718 rtx dwarf;
19719
19720 RTX_FRAME_RELATED_P (insn) = 1;
19721 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
19722 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
19723 }
19724
19725 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
19726 SAVED_REGS_MASK shows which registers need to be restored.
19727
19728 Unfortunately, since this insn does not reflect very well the actual
19729 semantics of the operation, we need to annotate the insn for the benefit
19730 of DWARF2 frame unwind information. */
19731 static void
19732 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
19733 {
19734 int num_regs = 0;
19735 int i, j;
19736 rtx par;
19737 rtx dwarf = NULL_RTX;
19738 rtx tmp, reg;
19739 bool return_in_pc;
19740 int offset_adj;
19741 int emit_update;
19742
19743 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
19744 offset_adj = return_in_pc ? 1 : 0;
19745 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19746 if (saved_regs_mask & (1 << i))
19747 num_regs++;
19748
19749 gcc_assert (num_regs && num_regs <= 16);
19750
19751 /* If SP is in reglist, then we don't emit SP update insn. */
19752 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
19753
19754 /* The parallel needs to hold num_regs SETs
19755 and one SET for the stack update. */
19756 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
19757
19758 if (return_in_pc)
19759 {
19760 tmp = ret_rtx;
19761 XVECEXP (par, 0, 0) = tmp;
19762 }
19763
19764 if (emit_update)
19765 {
19766 /* Increment the stack pointer, based on there being
19767 num_regs 4-byte registers to restore. */
19768 tmp = gen_rtx_SET (VOIDmode,
19769 stack_pointer_rtx,
19770 plus_constant (Pmode,
19771 stack_pointer_rtx,
19772 4 * num_regs));
19773 RTX_FRAME_RELATED_P (tmp) = 1;
19774 XVECEXP (par, 0, offset_adj) = tmp;
19775 }
19776
19777 /* Now restore every reg, which may include PC. */
19778 for (j = 0, i = 0; j < num_regs; i++)
19779 if (saved_regs_mask & (1 << i))
19780 {
19781 reg = gen_rtx_REG (SImode, i);
19782 if ((num_regs == 1) && emit_update && !return_in_pc)
19783 {
19784 /* Emit single load with writeback. */
19785 tmp = gen_frame_mem (SImode,
19786 gen_rtx_POST_INC (Pmode,
19787 stack_pointer_rtx));
19788 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
19789 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19790 return;
19791 }
19792
19793 tmp = gen_rtx_SET (VOIDmode,
19794 reg,
19795 gen_frame_mem
19796 (SImode,
19797 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
19798 RTX_FRAME_RELATED_P (tmp) = 1;
19799 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
19800
19801 /* We need to maintain a sequence for DWARF info too. As dwarf info
19802 should not have PC, skip PC. */
19803 if (i != PC_REGNUM)
19804 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19805
19806 j++;
19807 }
19808
19809 if (return_in_pc)
19810 par = emit_jump_insn (par);
19811 else
19812 par = emit_insn (par);
19813
19814 REG_NOTES (par) = dwarf;
19815 if (!return_in_pc)
19816 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
19817 stack_pointer_rtx, stack_pointer_rtx);
19818 }
19819
19820 /* Generate and emit an insn pattern that we will recognize as a pop_multi
19821 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
19822
19823 Unfortunately, since this insn does not reflect very well the actual
19824 semantics of the operation, we need to annotate the insn for the benefit
19825 of DWARF2 frame unwind information. */
19826 static void
19827 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
19828 {
19829 int i, j;
19830 rtx par;
19831 rtx dwarf = NULL_RTX;
19832 rtx tmp, reg;
19833
19834 gcc_assert (num_regs && num_regs <= 32);
19835
19836 /* Workaround ARM10 VFPr1 bug. */
19837 if (num_regs == 2 && !arm_arch6)
19838 {
19839 if (first_reg == 15)
19840 first_reg--;
19841
19842 num_regs++;
19843 }
19844
19845 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
19846 there could be up to 32 D-registers to restore.
19847 If there are more than 16 D-registers, make two recursive calls,
19848 each of which emits one pop_multi instruction. */
19849 if (num_regs > 16)
19850 {
19851 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
19852 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
19853 return;
19854 }
19855
19856 /* The parallel needs to hold num_regs SETs
19857 and one SET for the stack update. */
19858 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
19859
19860 /* Increment the stack pointer, based on there being
19861 num_regs 8-byte registers to restore. */
19862 tmp = gen_rtx_SET (VOIDmode,
19863 base_reg,
19864 plus_constant (Pmode, base_reg, 8 * num_regs));
19865 RTX_FRAME_RELATED_P (tmp) = 1;
19866 XVECEXP (par, 0, 0) = tmp;
19867
19868 /* Now show every reg that will be restored, using a SET for each. */
19869 for (j = 0, i=first_reg; j < num_regs; i += 2)
19870 {
19871 reg = gen_rtx_REG (DFmode, i);
19872
19873 tmp = gen_rtx_SET (VOIDmode,
19874 reg,
19875 gen_frame_mem
19876 (DFmode,
19877 plus_constant (Pmode, base_reg, 8 * j)));
19878 RTX_FRAME_RELATED_P (tmp) = 1;
19879 XVECEXP (par, 0, j + 1) = tmp;
19880
19881 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19882
19883 j++;
19884 }
19885
19886 par = emit_insn (par);
19887 REG_NOTES (par) = dwarf;
19888
19889 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
19890 base_reg, base_reg);
19891 }
19892
19893 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
19894 number of registers are being popped, multiple LDRD patterns are created for
19895 all register pairs. If odd number of registers are popped, last register is
19896 loaded by using LDR pattern. */
19897 static void
19898 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
19899 {
19900 int num_regs = 0;
19901 int i, j;
19902 rtx par = NULL_RTX;
19903 rtx dwarf = NULL_RTX;
19904 rtx tmp, reg, tmp1;
19905 bool return_in_pc;
19906
19907 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
19908 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19909 if (saved_regs_mask & (1 << i))
19910 num_regs++;
19911
19912 gcc_assert (num_regs && num_regs <= 16);
19913
19914 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
19915 to be popped. So, if num_regs is even, now it will become odd,
19916 and we can generate pop with PC. If num_regs is odd, it will be
19917 even now, and ldr with return can be generated for PC. */
19918 if (return_in_pc)
19919 num_regs--;
19920
19921 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19922
19923 /* Var j iterates over all the registers to gather all the registers in
19924 saved_regs_mask. Var i gives index of saved registers in stack frame.
19925 A PARALLEL RTX of register-pair is created here, so that pattern for
19926 LDRD can be matched. As PC is always last register to be popped, and
19927 we have already decremented num_regs if PC, we don't have to worry
19928 about PC in this loop. */
19929 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
19930 if (saved_regs_mask & (1 << j))
19931 {
19932 /* Create RTX for memory load. */
19933 reg = gen_rtx_REG (SImode, j);
19934 tmp = gen_rtx_SET (SImode,
19935 reg,
19936 gen_frame_mem (SImode,
19937 plus_constant (Pmode,
19938 stack_pointer_rtx, 4 * i)));
19939 RTX_FRAME_RELATED_P (tmp) = 1;
19940
19941 if (i % 2 == 0)
19942 {
19943 /* When saved-register index (i) is even, the RTX to be emitted is
19944 yet to be created. Hence create it first. The LDRD pattern we
19945 are generating is :
19946 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
19947 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
19948 where target registers need not be consecutive. */
19949 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19950 dwarf = NULL_RTX;
19951 }
19952
19953 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
19954 added as 0th element and if i is odd, reg_i is added as 1st element
19955 of LDRD pattern shown above. */
19956 XVECEXP (par, 0, (i % 2)) = tmp;
19957 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19958
19959 if ((i % 2) == 1)
19960 {
19961 /* When saved-register index (i) is odd, RTXs for both the registers
19962 to be loaded are generated in above given LDRD pattern, and the
19963 pattern can be emitted now. */
19964 par = emit_insn (par);
19965 REG_NOTES (par) = dwarf;
19966 RTX_FRAME_RELATED_P (par) = 1;
19967 }
19968
19969 i++;
19970 }
19971
19972 /* If the number of registers pushed is odd AND return_in_pc is false OR
19973 number of registers are even AND return_in_pc is true, last register is
19974 popped using LDR. It can be PC as well. Hence, adjust the stack first and
19975 then LDR with post increment. */
19976
19977 /* Increment the stack pointer, based on there being
19978 num_regs 4-byte registers to restore. */
19979 tmp = gen_rtx_SET (VOIDmode,
19980 stack_pointer_rtx,
19981 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
19982 RTX_FRAME_RELATED_P (tmp) = 1;
19983 tmp = emit_insn (tmp);
19984 if (!return_in_pc)
19985 {
19986 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
19987 stack_pointer_rtx, stack_pointer_rtx);
19988 }
19989
19990 dwarf = NULL_RTX;
19991
19992 if (((num_regs % 2) == 1 && !return_in_pc)
19993 || ((num_regs % 2) == 0 && return_in_pc))
19994 {
19995 /* Scan for the single register to be popped. Skip until the saved
19996 register is found. */
19997 for (; (saved_regs_mask & (1 << j)) == 0; j++);
19998
19999 /* Gen LDR with post increment here. */
20000 tmp1 = gen_rtx_MEM (SImode,
20001 gen_rtx_POST_INC (SImode,
20002 stack_pointer_rtx));
20003 set_mem_alias_set (tmp1, get_frame_alias_set ());
20004
20005 reg = gen_rtx_REG (SImode, j);
20006 tmp = gen_rtx_SET (SImode, reg, tmp1);
20007 RTX_FRAME_RELATED_P (tmp) = 1;
20008 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20009
20010 if (return_in_pc)
20011 {
20012 /* If return_in_pc, j must be PC_REGNUM. */
20013 gcc_assert (j == PC_REGNUM);
20014 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20015 XVECEXP (par, 0, 0) = ret_rtx;
20016 XVECEXP (par, 0, 1) = tmp;
20017 par = emit_jump_insn (par);
20018 }
20019 else
20020 {
20021 par = emit_insn (tmp);
20022 REG_NOTES (par) = dwarf;
20023 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20024 stack_pointer_rtx, stack_pointer_rtx);
20025 }
20026
20027 }
20028 else if ((num_regs % 2) == 1 && return_in_pc)
20029 {
20030 /* There are 2 registers to be popped. So, generate the pattern
20031 pop_multiple_with_stack_update_and_return to pop in PC. */
20032 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20033 }
20034
20035 return;
20036 }
20037
20038 /* LDRD in ARM mode needs consecutive registers as operands. This function
20039 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20040 offset addressing and then generates one separate stack udpate. This provides
20041 more scheduling freedom, compared to writeback on every load. However,
20042 if the function returns using load into PC directly
20043 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20044 before the last load. TODO: Add a peephole optimization to recognize
20045 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20046 peephole optimization to merge the load at stack-offset zero
20047 with the stack update instruction using load with writeback
20048 in post-index addressing mode. */
20049 static void
20050 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20051 {
20052 int j = 0;
20053 int offset = 0;
20054 rtx par = NULL_RTX;
20055 rtx dwarf = NULL_RTX;
20056 rtx tmp, mem;
20057
20058 /* Restore saved registers. */
20059 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20060 j = 0;
20061 while (j <= LAST_ARM_REGNUM)
20062 if (saved_regs_mask & (1 << j))
20063 {
20064 if ((j % 2) == 0
20065 && (saved_regs_mask & (1 << (j + 1)))
20066 && (j + 1) != PC_REGNUM)
20067 {
20068 /* Current register and next register form register pair for which
20069 LDRD can be generated. PC is always the last register popped, and
20070 we handle it separately. */
20071 if (offset > 0)
20072 mem = gen_frame_mem (DImode,
20073 plus_constant (Pmode,
20074 stack_pointer_rtx,
20075 offset));
20076 else
20077 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20078
20079 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20080 tmp = emit_insn (tmp);
20081 RTX_FRAME_RELATED_P (tmp) = 1;
20082
20083 /* Generate dwarf info. */
20084
20085 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20086 gen_rtx_REG (SImode, j),
20087 NULL_RTX);
20088 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20089 gen_rtx_REG (SImode, j + 1),
20090 dwarf);
20091
20092 REG_NOTES (tmp) = dwarf;
20093
20094 offset += 8;
20095 j += 2;
20096 }
20097 else if (j != PC_REGNUM)
20098 {
20099 /* Emit a single word load. */
20100 if (offset > 0)
20101 mem = gen_frame_mem (SImode,
20102 plus_constant (Pmode,
20103 stack_pointer_rtx,
20104 offset));
20105 else
20106 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20107
20108 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20109 tmp = emit_insn (tmp);
20110 RTX_FRAME_RELATED_P (tmp) = 1;
20111
20112 /* Generate dwarf info. */
20113 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20114 gen_rtx_REG (SImode, j),
20115 NULL_RTX);
20116
20117 offset += 4;
20118 j += 1;
20119 }
20120 else /* j == PC_REGNUM */
20121 j++;
20122 }
20123 else
20124 j++;
20125
20126 /* Update the stack. */
20127 if (offset > 0)
20128 {
20129 tmp = gen_rtx_SET (Pmode,
20130 stack_pointer_rtx,
20131 plus_constant (Pmode,
20132 stack_pointer_rtx,
20133 offset));
20134 tmp = emit_insn (tmp);
20135 arm_add_cfa_adjust_cfa_note (tmp, offset,
20136 stack_pointer_rtx, stack_pointer_rtx);
20137 offset = 0;
20138 }
20139
20140 if (saved_regs_mask & (1 << PC_REGNUM))
20141 {
20142 /* Only PC is to be popped. */
20143 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20144 XVECEXP (par, 0, 0) = ret_rtx;
20145 tmp = gen_rtx_SET (SImode,
20146 gen_rtx_REG (SImode, PC_REGNUM),
20147 gen_frame_mem (SImode,
20148 gen_rtx_POST_INC (SImode,
20149 stack_pointer_rtx)));
20150 RTX_FRAME_RELATED_P (tmp) = 1;
20151 XVECEXP (par, 0, 1) = tmp;
20152 par = emit_jump_insn (par);
20153
20154 /* Generate dwarf info. */
20155 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20156 gen_rtx_REG (SImode, PC_REGNUM),
20157 NULL_RTX);
20158 REG_NOTES (par) = dwarf;
20159 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20160 stack_pointer_rtx, stack_pointer_rtx);
20161 }
20162 }
20163
20164 /* Calculate the size of the return value that is passed in registers. */
20165 static unsigned
20166 arm_size_return_regs (void)
20167 {
20168 enum machine_mode mode;
20169
20170 if (crtl->return_rtx != 0)
20171 mode = GET_MODE (crtl->return_rtx);
20172 else
20173 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20174
20175 return GET_MODE_SIZE (mode);
20176 }
20177
20178 /* Return true if the current function needs to save/restore LR. */
20179 static bool
20180 thumb_force_lr_save (void)
20181 {
20182 return !cfun->machine->lr_save_eliminated
20183 && (!leaf_function_p ()
20184 || thumb_far_jump_used_p ()
20185 || df_regs_ever_live_p (LR_REGNUM));
20186 }
20187
20188 /* We do not know if r3 will be available because
20189 we do have an indirect tailcall happening in this
20190 particular case. */
20191 static bool
20192 is_indirect_tailcall_p (rtx call)
20193 {
20194 rtx pat = PATTERN (call);
20195
20196 /* Indirect tail call. */
20197 pat = XVECEXP (pat, 0, 0);
20198 if (GET_CODE (pat) == SET)
20199 pat = SET_SRC (pat);
20200
20201 pat = XEXP (XEXP (pat, 0), 0);
20202 return REG_P (pat);
20203 }
20204
20205 /* Return true if r3 is used by any of the tail call insns in the
20206 current function. */
20207 static bool
20208 any_sibcall_could_use_r3 (void)
20209 {
20210 edge_iterator ei;
20211 edge e;
20212
20213 if (!crtl->tail_call_emit)
20214 return false;
20215 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20216 if (e->flags & EDGE_SIBCALL)
20217 {
20218 rtx call = BB_END (e->src);
20219 if (!CALL_P (call))
20220 call = prev_nonnote_nondebug_insn (call);
20221 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20222 if (find_regno_fusage (call, USE, 3)
20223 || is_indirect_tailcall_p (call))
20224 return true;
20225 }
20226 return false;
20227 }
20228
20229
20230 /* Compute the distance from register FROM to register TO.
20231 These can be the arg pointer (26), the soft frame pointer (25),
20232 the stack pointer (13) or the hard frame pointer (11).
20233 In thumb mode r7 is used as the soft frame pointer, if needed.
20234 Typical stack layout looks like this:
20235
20236 old stack pointer -> | |
20237 ----
20238 | | \
20239 | | saved arguments for
20240 | | vararg functions
20241 | | /
20242 --
20243 hard FP & arg pointer -> | | \
20244 | | stack
20245 | | frame
20246 | | /
20247 --
20248 | | \
20249 | | call saved
20250 | | registers
20251 soft frame pointer -> | | /
20252 --
20253 | | \
20254 | | local
20255 | | variables
20256 locals base pointer -> | | /
20257 --
20258 | | \
20259 | | outgoing
20260 | | arguments
20261 current stack pointer -> | | /
20262 --
20263
20264 For a given function some or all of these stack components
20265 may not be needed, giving rise to the possibility of
20266 eliminating some of the registers.
20267
20268 The values returned by this function must reflect the behavior
20269 of arm_expand_prologue() and arm_compute_save_reg_mask().
20270
20271 The sign of the number returned reflects the direction of stack
20272 growth, so the values are positive for all eliminations except
20273 from the soft frame pointer to the hard frame pointer.
20274
20275 SFP may point just inside the local variables block to ensure correct
20276 alignment. */
20277
20278
20279 /* Calculate stack offsets. These are used to calculate register elimination
20280 offsets and in prologue/epilogue code. Also calculates which registers
20281 should be saved. */
20282
20283 static arm_stack_offsets *
20284 arm_get_frame_offsets (void)
20285 {
20286 struct arm_stack_offsets *offsets;
20287 unsigned long func_type;
20288 int leaf;
20289 int saved;
20290 int core_saved;
20291 HOST_WIDE_INT frame_size;
20292 int i;
20293
20294 offsets = &cfun->machine->stack_offsets;
20295
20296 /* We need to know if we are a leaf function. Unfortunately, it
20297 is possible to be called after start_sequence has been called,
20298 which causes get_insns to return the insns for the sequence,
20299 not the function, which will cause leaf_function_p to return
20300 the incorrect result.
20301
20302 to know about leaf functions once reload has completed, and the
20303 frame size cannot be changed after that time, so we can safely
20304 use the cached value. */
20305
20306 if (reload_completed)
20307 return offsets;
20308
20309 /* Initially this is the size of the local variables. It will translated
20310 into an offset once we have determined the size of preceding data. */
20311 frame_size = ROUND_UP_WORD (get_frame_size ());
20312
20313 leaf = leaf_function_p ();
20314
20315 /* Space for variadic functions. */
20316 offsets->saved_args = crtl->args.pretend_args_size;
20317
20318 /* In Thumb mode this is incorrect, but never used. */
20319 offsets->frame
20320 = (offsets->saved_args
20321 + arm_compute_static_chain_stack_bytes ()
20322 + (frame_pointer_needed ? 4 : 0));
20323
20324 if (TARGET_32BIT)
20325 {
20326 unsigned int regno;
20327
20328 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20329 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20330 saved = core_saved;
20331
20332 /* We know that SP will be doubleword aligned on entry, and we must
20333 preserve that condition at any subroutine call. We also require the
20334 soft frame pointer to be doubleword aligned. */
20335
20336 if (TARGET_REALLY_IWMMXT)
20337 {
20338 /* Check for the call-saved iWMMXt registers. */
20339 for (regno = FIRST_IWMMXT_REGNUM;
20340 regno <= LAST_IWMMXT_REGNUM;
20341 regno++)
20342 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20343 saved += 8;
20344 }
20345
20346 func_type = arm_current_func_type ();
20347 /* Space for saved VFP registers. */
20348 if (! IS_VOLATILE (func_type)
20349 && TARGET_HARD_FLOAT && TARGET_VFP)
20350 saved += arm_get_vfp_saved_size ();
20351 }
20352 else /* TARGET_THUMB1 */
20353 {
20354 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20355 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20356 saved = core_saved;
20357 if (TARGET_BACKTRACE)
20358 saved += 16;
20359 }
20360
20361 /* Saved registers include the stack frame. */
20362 offsets->saved_regs
20363 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20364 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20365
20366 /* A leaf function does not need any stack alignment if it has nothing
20367 on the stack. */
20368 if (leaf && frame_size == 0
20369 /* However if it calls alloca(), we have a dynamically allocated
20370 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20371 && ! cfun->calls_alloca)
20372 {
20373 offsets->outgoing_args = offsets->soft_frame;
20374 offsets->locals_base = offsets->soft_frame;
20375 return offsets;
20376 }
20377
20378 /* Ensure SFP has the correct alignment. */
20379 if (ARM_DOUBLEWORD_ALIGN
20380 && (offsets->soft_frame & 7))
20381 {
20382 offsets->soft_frame += 4;
20383 /* Try to align stack by pushing an extra reg. Don't bother doing this
20384 when there is a stack frame as the alignment will be rolled into
20385 the normal stack adjustment. */
20386 if (frame_size + crtl->outgoing_args_size == 0)
20387 {
20388 int reg = -1;
20389
20390 /* If it is safe to use r3, then do so. This sometimes
20391 generates better code on Thumb-2 by avoiding the need to
20392 use 32-bit push/pop instructions. */
20393 if (! any_sibcall_could_use_r3 ()
20394 && arm_size_return_regs () <= 12
20395 && (offsets->saved_regs_mask & (1 << 3)) == 0
20396 && (TARGET_THUMB2
20397 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20398 {
20399 reg = 3;
20400 }
20401 else
20402 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20403 {
20404 /* Avoid fixed registers; they may be changed at
20405 arbitrary times so it's unsafe to restore them
20406 during the epilogue. */
20407 if (!fixed_regs[i]
20408 && (offsets->saved_regs_mask & (1 << i)) == 0)
20409 {
20410 reg = i;
20411 break;
20412 }
20413 }
20414
20415 if (reg != -1)
20416 {
20417 offsets->saved_regs += 4;
20418 offsets->saved_regs_mask |= (1 << reg);
20419 }
20420 }
20421 }
20422
20423 offsets->locals_base = offsets->soft_frame + frame_size;
20424 offsets->outgoing_args = (offsets->locals_base
20425 + crtl->outgoing_args_size);
20426
20427 if (ARM_DOUBLEWORD_ALIGN)
20428 {
20429 /* Ensure SP remains doubleword aligned. */
20430 if (offsets->outgoing_args & 7)
20431 offsets->outgoing_args += 4;
20432 gcc_assert (!(offsets->outgoing_args & 7));
20433 }
20434
20435 return offsets;
20436 }
20437
20438
20439 /* Calculate the relative offsets for the different stack pointers. Positive
20440 offsets are in the direction of stack growth. */
20441
20442 HOST_WIDE_INT
20443 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20444 {
20445 arm_stack_offsets *offsets;
20446
20447 offsets = arm_get_frame_offsets ();
20448
20449 /* OK, now we have enough information to compute the distances.
20450 There must be an entry in these switch tables for each pair
20451 of registers in ELIMINABLE_REGS, even if some of the entries
20452 seem to be redundant or useless. */
20453 switch (from)
20454 {
20455 case ARG_POINTER_REGNUM:
20456 switch (to)
20457 {
20458 case THUMB_HARD_FRAME_POINTER_REGNUM:
20459 return 0;
20460
20461 case FRAME_POINTER_REGNUM:
20462 /* This is the reverse of the soft frame pointer
20463 to hard frame pointer elimination below. */
20464 return offsets->soft_frame - offsets->saved_args;
20465
20466 case ARM_HARD_FRAME_POINTER_REGNUM:
20467 /* This is only non-zero in the case where the static chain register
20468 is stored above the frame. */
20469 return offsets->frame - offsets->saved_args - 4;
20470
20471 case STACK_POINTER_REGNUM:
20472 /* If nothing has been pushed on the stack at all
20473 then this will return -4. This *is* correct! */
20474 return offsets->outgoing_args - (offsets->saved_args + 4);
20475
20476 default:
20477 gcc_unreachable ();
20478 }
20479 gcc_unreachable ();
20480
20481 case FRAME_POINTER_REGNUM:
20482 switch (to)
20483 {
20484 case THUMB_HARD_FRAME_POINTER_REGNUM:
20485 return 0;
20486
20487 case ARM_HARD_FRAME_POINTER_REGNUM:
20488 /* The hard frame pointer points to the top entry in the
20489 stack frame. The soft frame pointer to the bottom entry
20490 in the stack frame. If there is no stack frame at all,
20491 then they are identical. */
20492
20493 return offsets->frame - offsets->soft_frame;
20494
20495 case STACK_POINTER_REGNUM:
20496 return offsets->outgoing_args - offsets->soft_frame;
20497
20498 default:
20499 gcc_unreachable ();
20500 }
20501 gcc_unreachable ();
20502
20503 default:
20504 /* You cannot eliminate from the stack pointer.
20505 In theory you could eliminate from the hard frame
20506 pointer to the stack pointer, but this will never
20507 happen, since if a stack frame is not needed the
20508 hard frame pointer will never be used. */
20509 gcc_unreachable ();
20510 }
20511 }
20512
20513 /* Given FROM and TO register numbers, say whether this elimination is
20514 allowed. Frame pointer elimination is automatically handled.
20515
20516 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20517 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20518 pointer, we must eliminate FRAME_POINTER_REGNUM into
20519 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20520 ARG_POINTER_REGNUM. */
20521
20522 bool
20523 arm_can_eliminate (const int from, const int to)
20524 {
20525 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20526 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20527 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20528 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20529 true);
20530 }
20531
20532 /* Emit RTL to save coprocessor registers on function entry. Returns the
20533 number of bytes pushed. */
20534
20535 static int
20536 arm_save_coproc_regs(void)
20537 {
20538 int saved_size = 0;
20539 unsigned reg;
20540 unsigned start_reg;
20541 rtx insn;
20542
20543 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20544 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20545 {
20546 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20547 insn = gen_rtx_MEM (V2SImode, insn);
20548 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20549 RTX_FRAME_RELATED_P (insn) = 1;
20550 saved_size += 8;
20551 }
20552
20553 if (TARGET_HARD_FLOAT && TARGET_VFP)
20554 {
20555 start_reg = FIRST_VFP_REGNUM;
20556
20557 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20558 {
20559 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20560 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20561 {
20562 if (start_reg != reg)
20563 saved_size += vfp_emit_fstmd (start_reg,
20564 (reg - start_reg) / 2);
20565 start_reg = reg + 2;
20566 }
20567 }
20568 if (start_reg != reg)
20569 saved_size += vfp_emit_fstmd (start_reg,
20570 (reg - start_reg) / 2);
20571 }
20572 return saved_size;
20573 }
20574
20575
20576 /* Set the Thumb frame pointer from the stack pointer. */
20577
20578 static void
20579 thumb_set_frame_pointer (arm_stack_offsets *offsets)
20580 {
20581 HOST_WIDE_INT amount;
20582 rtx insn, dwarf;
20583
20584 amount = offsets->outgoing_args - offsets->locals_base;
20585 if (amount < 1024)
20586 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20587 stack_pointer_rtx, GEN_INT (amount)));
20588 else
20589 {
20590 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
20591 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20592 expects the first two operands to be the same. */
20593 if (TARGET_THUMB2)
20594 {
20595 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20596 stack_pointer_rtx,
20597 hard_frame_pointer_rtx));
20598 }
20599 else
20600 {
20601 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20602 hard_frame_pointer_rtx,
20603 stack_pointer_rtx));
20604 }
20605 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
20606 plus_constant (Pmode, stack_pointer_rtx, amount));
20607 RTX_FRAME_RELATED_P (dwarf) = 1;
20608 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20609 }
20610
20611 RTX_FRAME_RELATED_P (insn) = 1;
20612 }
20613
20614 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20615 function. */
20616 void
20617 arm_expand_prologue (void)
20618 {
20619 rtx amount;
20620 rtx insn;
20621 rtx ip_rtx;
20622 unsigned long live_regs_mask;
20623 unsigned long func_type;
20624 int fp_offset = 0;
20625 int saved_pretend_args = 0;
20626 int saved_regs = 0;
20627 unsigned HOST_WIDE_INT args_to_push;
20628 arm_stack_offsets *offsets;
20629
20630 func_type = arm_current_func_type ();
20631
20632 /* Naked functions don't have prologues. */
20633 if (IS_NAKED (func_type))
20634 return;
20635
20636 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20637 args_to_push = crtl->args.pretend_args_size;
20638
20639 /* Compute which register we will have to save onto the stack. */
20640 offsets = arm_get_frame_offsets ();
20641 live_regs_mask = offsets->saved_regs_mask;
20642
20643 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
20644
20645 if (IS_STACKALIGN (func_type))
20646 {
20647 rtx r0, r1;
20648
20649 /* Handle a word-aligned stack pointer. We generate the following:
20650
20651 mov r0, sp
20652 bic r1, r0, #7
20653 mov sp, r1
20654 <save and restore r0 in normal prologue/epilogue>
20655 mov sp, r0
20656 bx lr
20657
20658 The unwinder doesn't need to know about the stack realignment.
20659 Just tell it we saved SP in r0. */
20660 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
20661
20662 r0 = gen_rtx_REG (SImode, 0);
20663 r1 = gen_rtx_REG (SImode, 1);
20664
20665 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
20666 RTX_FRAME_RELATED_P (insn) = 1;
20667 add_reg_note (insn, REG_CFA_REGISTER, NULL);
20668
20669 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
20670
20671 /* ??? The CFA changes here, which may cause GDB to conclude that it
20672 has entered a different function. That said, the unwind info is
20673 correct, individually, before and after this instruction because
20674 we've described the save of SP, which will override the default
20675 handling of SP as restoring from the CFA. */
20676 emit_insn (gen_movsi (stack_pointer_rtx, r1));
20677 }
20678
20679 /* For APCS frames, if IP register is clobbered
20680 when creating frame, save that register in a special
20681 way. */
20682 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20683 {
20684 if (IS_INTERRUPT (func_type))
20685 {
20686 /* Interrupt functions must not corrupt any registers.
20687 Creating a frame pointer however, corrupts the IP
20688 register, so we must push it first. */
20689 emit_multi_reg_push (1 << IP_REGNUM);
20690
20691 /* Do not set RTX_FRAME_RELATED_P on this insn.
20692 The dwarf stack unwinding code only wants to see one
20693 stack decrement per function, and this is not it. If
20694 this instruction is labeled as being part of the frame
20695 creation sequence then dwarf2out_frame_debug_expr will
20696 die when it encounters the assignment of IP to FP
20697 later on, since the use of SP here establishes SP as
20698 the CFA register and not IP.
20699
20700 Anyway this instruction is not really part of the stack
20701 frame creation although it is part of the prologue. */
20702 }
20703 else if (IS_NESTED (func_type))
20704 {
20705 /* The static chain register is the same as the IP register
20706 used as a scratch register during stack frame creation.
20707 To get around this need to find somewhere to store IP
20708 whilst the frame is being created. We try the following
20709 places in order:
20710
20711 1. The last argument register r3 if it is available.
20712 2. A slot on the stack above the frame if there are no
20713 arguments to push onto the stack.
20714 3. Register r3 again, after pushing the argument registers
20715 onto the stack, if this is a varargs function.
20716 4. The last slot on the stack created for the arguments to
20717 push, if this isn't a varargs function.
20718
20719 Note - we only need to tell the dwarf2 backend about the SP
20720 adjustment in the second variant; the static chain register
20721 doesn't need to be unwound, as it doesn't contain a value
20722 inherited from the caller. */
20723
20724 if (!arm_r3_live_at_start_p ())
20725 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
20726 else if (args_to_push == 0)
20727 {
20728 rtx addr, dwarf;
20729
20730 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
20731 saved_regs += 4;
20732
20733 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20734 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
20735 fp_offset = 4;
20736
20737 /* Just tell the dwarf backend that we adjusted SP. */
20738 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20739 plus_constant (Pmode, stack_pointer_rtx,
20740 -fp_offset));
20741 RTX_FRAME_RELATED_P (insn) = 1;
20742 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20743 }
20744 else
20745 {
20746 /* Store the args on the stack. */
20747 if (cfun->machine->uses_anonymous_args)
20748 {
20749 insn
20750 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf);
20751 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
20752 saved_pretend_args = 1;
20753 }
20754 else
20755 {
20756 rtx addr, dwarf;
20757
20758 if (args_to_push == 4)
20759 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20760 else
20761 addr
20762 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
20763 plus_constant (Pmode,
20764 stack_pointer_rtx,
20765 -args_to_push));
20766
20767 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
20768
20769 /* Just tell the dwarf backend that we adjusted SP. */
20770 dwarf
20771 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20772 plus_constant (Pmode, stack_pointer_rtx,
20773 -args_to_push));
20774 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20775 }
20776
20777 RTX_FRAME_RELATED_P (insn) = 1;
20778 fp_offset = args_to_push;
20779 args_to_push = 0;
20780 }
20781 }
20782
20783 insn = emit_set_insn (ip_rtx,
20784 plus_constant (Pmode, stack_pointer_rtx,
20785 fp_offset));
20786 RTX_FRAME_RELATED_P (insn) = 1;
20787 }
20788
20789 if (args_to_push)
20790 {
20791 /* Push the argument registers, or reserve space for them. */
20792 if (cfun->machine->uses_anonymous_args)
20793 insn = emit_multi_reg_push
20794 ((0xf0 >> (args_to_push / 4)) & 0xf);
20795 else
20796 insn = emit_insn
20797 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20798 GEN_INT (- args_to_push)));
20799 RTX_FRAME_RELATED_P (insn) = 1;
20800 }
20801
20802 /* If this is an interrupt service routine, and the link register
20803 is going to be pushed, and we're not generating extra
20804 push of IP (needed when frame is needed and frame layout if apcs),
20805 subtracting four from LR now will mean that the function return
20806 can be done with a single instruction. */
20807 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
20808 && (live_regs_mask & (1 << LR_REGNUM)) != 0
20809 && !(frame_pointer_needed && TARGET_APCS_FRAME)
20810 && TARGET_ARM)
20811 {
20812 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
20813
20814 emit_set_insn (lr, plus_constant (SImode, lr, -4));
20815 }
20816
20817 if (live_regs_mask)
20818 {
20819 saved_regs += bit_count (live_regs_mask) * 4;
20820 if (optimize_size && !frame_pointer_needed
20821 && saved_regs == offsets->saved_regs - offsets->saved_args)
20822 {
20823 /* If no coprocessor registers are being pushed and we don't have
20824 to worry about a frame pointer then push extra registers to
20825 create the stack frame. This is done is a way that does not
20826 alter the frame layout, so is independent of the epilogue. */
20827 int n;
20828 int frame;
20829 n = 0;
20830 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
20831 n++;
20832 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
20833 if (frame && n * 4 >= frame)
20834 {
20835 n = frame / 4;
20836 live_regs_mask |= (1 << n) - 1;
20837 saved_regs += frame;
20838 }
20839 }
20840
20841 if (TARGET_LDRD
20842 && current_tune->prefer_ldrd_strd
20843 && !optimize_function_for_size_p (cfun))
20844 {
20845 if (TARGET_THUMB2)
20846 {
20847 thumb2_emit_strd_push (live_regs_mask);
20848 }
20849 else if (TARGET_ARM
20850 && !TARGET_APCS_FRAME
20851 && !IS_INTERRUPT (func_type))
20852 {
20853 arm_emit_strd_push (live_regs_mask);
20854 }
20855 else
20856 {
20857 insn = emit_multi_reg_push (live_regs_mask);
20858 RTX_FRAME_RELATED_P (insn) = 1;
20859 }
20860 }
20861 else
20862 {
20863 insn = emit_multi_reg_push (live_regs_mask);
20864 RTX_FRAME_RELATED_P (insn) = 1;
20865 }
20866 }
20867
20868 if (! IS_VOLATILE (func_type))
20869 saved_regs += arm_save_coproc_regs ();
20870
20871 if (frame_pointer_needed && TARGET_ARM)
20872 {
20873 /* Create the new frame pointer. */
20874 if (TARGET_APCS_FRAME)
20875 {
20876 insn = GEN_INT (-(4 + args_to_push + fp_offset));
20877 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
20878 RTX_FRAME_RELATED_P (insn) = 1;
20879
20880 if (IS_NESTED (func_type))
20881 {
20882 /* Recover the static chain register. */
20883 if (!arm_r3_live_at_start_p () || saved_pretend_args)
20884 insn = gen_rtx_REG (SImode, 3);
20885 else
20886 {
20887 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
20888 insn = gen_frame_mem (SImode, insn);
20889 }
20890 emit_set_insn (ip_rtx, insn);
20891 /* Add a USE to stop propagate_one_insn() from barfing. */
20892 emit_insn (gen_force_register_use (ip_rtx));
20893 }
20894 }
20895 else
20896 {
20897 insn = GEN_INT (saved_regs - 4);
20898 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20899 stack_pointer_rtx, insn));
20900 RTX_FRAME_RELATED_P (insn) = 1;
20901 }
20902 }
20903
20904 if (flag_stack_usage_info)
20905 current_function_static_stack_size
20906 = offsets->outgoing_args - offsets->saved_args;
20907
20908 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
20909 {
20910 /* This add can produce multiple insns for a large constant, so we
20911 need to get tricky. */
20912 rtx last = get_last_insn ();
20913
20914 amount = GEN_INT (offsets->saved_args + saved_regs
20915 - offsets->outgoing_args);
20916
20917 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20918 amount));
20919 do
20920 {
20921 last = last ? NEXT_INSN (last) : get_insns ();
20922 RTX_FRAME_RELATED_P (last) = 1;
20923 }
20924 while (last != insn);
20925
20926 /* If the frame pointer is needed, emit a special barrier that
20927 will prevent the scheduler from moving stores to the frame
20928 before the stack adjustment. */
20929 if (frame_pointer_needed)
20930 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
20931 hard_frame_pointer_rtx));
20932 }
20933
20934
20935 if (frame_pointer_needed && TARGET_THUMB2)
20936 thumb_set_frame_pointer (offsets);
20937
20938 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20939 {
20940 unsigned long mask;
20941
20942 mask = live_regs_mask;
20943 mask &= THUMB2_WORK_REGS;
20944 if (!IS_NESTED (func_type))
20945 mask |= (1 << IP_REGNUM);
20946 arm_load_pic_register (mask);
20947 }
20948
20949 /* If we are profiling, make sure no instructions are scheduled before
20950 the call to mcount. Similarly if the user has requested no
20951 scheduling in the prolog. Similarly if we want non-call exceptions
20952 using the EABI unwinder, to prevent faulting instructions from being
20953 swapped with a stack adjustment. */
20954 if (crtl->profile || !TARGET_SCHED_PROLOG
20955 || (arm_except_unwind_info (&global_options) == UI_TARGET
20956 && cfun->can_throw_non_call_exceptions))
20957 emit_insn (gen_blockage ());
20958
20959 /* If the link register is being kept alive, with the return address in it,
20960 then make sure that it does not get reused by the ce2 pass. */
20961 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
20962 cfun->machine->lr_save_eliminated = 1;
20963 }
20964 \f
20965 /* Print condition code to STREAM. Helper function for arm_print_operand. */
20966 static void
20967 arm_print_condition (FILE *stream)
20968 {
20969 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
20970 {
20971 /* Branch conversion is not implemented for Thumb-2. */
20972 if (TARGET_THUMB)
20973 {
20974 output_operand_lossage ("predicated Thumb instruction");
20975 return;
20976 }
20977 if (current_insn_predicate != NULL)
20978 {
20979 output_operand_lossage
20980 ("predicated instruction in conditional sequence");
20981 return;
20982 }
20983
20984 fputs (arm_condition_codes[arm_current_cc], stream);
20985 }
20986 else if (current_insn_predicate)
20987 {
20988 enum arm_cond_code code;
20989
20990 if (TARGET_THUMB1)
20991 {
20992 output_operand_lossage ("predicated Thumb instruction");
20993 return;
20994 }
20995
20996 code = get_arm_condition_code (current_insn_predicate);
20997 fputs (arm_condition_codes[code], stream);
20998 }
20999 }
21000
21001
21002 /* If CODE is 'd', then the X is a condition operand and the instruction
21003 should only be executed if the condition is true.
21004 if CODE is 'D', then the X is a condition operand and the instruction
21005 should only be executed if the condition is false: however, if the mode
21006 of the comparison is CCFPEmode, then always execute the instruction -- we
21007 do this because in these circumstances !GE does not necessarily imply LT;
21008 in these cases the instruction pattern will take care to make sure that
21009 an instruction containing %d will follow, thereby undoing the effects of
21010 doing this instruction unconditionally.
21011 If CODE is 'N' then X is a floating point operand that must be negated
21012 before output.
21013 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21014 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21015 static void
21016 arm_print_operand (FILE *stream, rtx x, int code)
21017 {
21018 switch (code)
21019 {
21020 case '@':
21021 fputs (ASM_COMMENT_START, stream);
21022 return;
21023
21024 case '_':
21025 fputs (user_label_prefix, stream);
21026 return;
21027
21028 case '|':
21029 fputs (REGISTER_PREFIX, stream);
21030 return;
21031
21032 case '?':
21033 arm_print_condition (stream);
21034 return;
21035
21036 case '(':
21037 /* Nothing in unified syntax, otherwise the current condition code. */
21038 if (!TARGET_UNIFIED_ASM)
21039 arm_print_condition (stream);
21040 break;
21041
21042 case ')':
21043 /* The current condition code in unified syntax, otherwise nothing. */
21044 if (TARGET_UNIFIED_ASM)
21045 arm_print_condition (stream);
21046 break;
21047
21048 case '.':
21049 /* The current condition code for a condition code setting instruction.
21050 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21051 if (TARGET_UNIFIED_ASM)
21052 {
21053 fputc('s', stream);
21054 arm_print_condition (stream);
21055 }
21056 else
21057 {
21058 arm_print_condition (stream);
21059 fputc('s', stream);
21060 }
21061 return;
21062
21063 case '!':
21064 /* If the instruction is conditionally executed then print
21065 the current condition code, otherwise print 's'. */
21066 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21067 if (current_insn_predicate)
21068 arm_print_condition (stream);
21069 else
21070 fputc('s', stream);
21071 break;
21072
21073 /* %# is a "break" sequence. It doesn't output anything, but is used to
21074 separate e.g. operand numbers from following text, if that text consists
21075 of further digits which we don't want to be part of the operand
21076 number. */
21077 case '#':
21078 return;
21079
21080 case 'N':
21081 {
21082 REAL_VALUE_TYPE r;
21083 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21084 r = real_value_negate (&r);
21085 fprintf (stream, "%s", fp_const_from_val (&r));
21086 }
21087 return;
21088
21089 /* An integer or symbol address without a preceding # sign. */
21090 case 'c':
21091 switch (GET_CODE (x))
21092 {
21093 case CONST_INT:
21094 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21095 break;
21096
21097 case SYMBOL_REF:
21098 output_addr_const (stream, x);
21099 break;
21100
21101 case CONST:
21102 if (GET_CODE (XEXP (x, 0)) == PLUS
21103 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21104 {
21105 output_addr_const (stream, x);
21106 break;
21107 }
21108 /* Fall through. */
21109
21110 default:
21111 output_operand_lossage ("Unsupported operand for code '%c'", code);
21112 }
21113 return;
21114
21115 /* An integer that we want to print in HEX. */
21116 case 'x':
21117 switch (GET_CODE (x))
21118 {
21119 case CONST_INT:
21120 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21121 break;
21122
21123 default:
21124 output_operand_lossage ("Unsupported operand for code '%c'", code);
21125 }
21126 return;
21127
21128 case 'B':
21129 if (CONST_INT_P (x))
21130 {
21131 HOST_WIDE_INT val;
21132 val = ARM_SIGN_EXTEND (~INTVAL (x));
21133 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21134 }
21135 else
21136 {
21137 putc ('~', stream);
21138 output_addr_const (stream, x);
21139 }
21140 return;
21141
21142 case 'L':
21143 /* The low 16 bits of an immediate constant. */
21144 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21145 return;
21146
21147 case 'i':
21148 fprintf (stream, "%s", arithmetic_instr (x, 1));
21149 return;
21150
21151 case 'I':
21152 fprintf (stream, "%s", arithmetic_instr (x, 0));
21153 return;
21154
21155 case 'S':
21156 {
21157 HOST_WIDE_INT val;
21158 const char *shift;
21159
21160 shift = shift_op (x, &val);
21161
21162 if (shift)
21163 {
21164 fprintf (stream, ", %s ", shift);
21165 if (val == -1)
21166 arm_print_operand (stream, XEXP (x, 1), 0);
21167 else
21168 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21169 }
21170 }
21171 return;
21172
21173 /* An explanation of the 'Q', 'R' and 'H' register operands:
21174
21175 In a pair of registers containing a DI or DF value the 'Q'
21176 operand returns the register number of the register containing
21177 the least significant part of the value. The 'R' operand returns
21178 the register number of the register containing the most
21179 significant part of the value.
21180
21181 The 'H' operand returns the higher of the two register numbers.
21182 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21183 same as the 'Q' operand, since the most significant part of the
21184 value is held in the lower number register. The reverse is true
21185 on systems where WORDS_BIG_ENDIAN is false.
21186
21187 The purpose of these operands is to distinguish between cases
21188 where the endian-ness of the values is important (for example
21189 when they are added together), and cases where the endian-ness
21190 is irrelevant, but the order of register operations is important.
21191 For example when loading a value from memory into a register
21192 pair, the endian-ness does not matter. Provided that the value
21193 from the lower memory address is put into the lower numbered
21194 register, and the value from the higher address is put into the
21195 higher numbered register, the load will work regardless of whether
21196 the value being loaded is big-wordian or little-wordian. The
21197 order of the two register loads can matter however, if the address
21198 of the memory location is actually held in one of the registers
21199 being overwritten by the load.
21200
21201 The 'Q' and 'R' constraints are also available for 64-bit
21202 constants. */
21203 case 'Q':
21204 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21205 {
21206 rtx part = gen_lowpart (SImode, x);
21207 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21208 return;
21209 }
21210
21211 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21212 {
21213 output_operand_lossage ("invalid operand for code '%c'", code);
21214 return;
21215 }
21216
21217 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21218 return;
21219
21220 case 'R':
21221 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21222 {
21223 enum machine_mode mode = GET_MODE (x);
21224 rtx part;
21225
21226 if (mode == VOIDmode)
21227 mode = DImode;
21228 part = gen_highpart_mode (SImode, mode, x);
21229 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21230 return;
21231 }
21232
21233 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21234 {
21235 output_operand_lossage ("invalid operand for code '%c'", code);
21236 return;
21237 }
21238
21239 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21240 return;
21241
21242 case 'H':
21243 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21244 {
21245 output_operand_lossage ("invalid operand for code '%c'", code);
21246 return;
21247 }
21248
21249 asm_fprintf (stream, "%r", REGNO (x) + 1);
21250 return;
21251
21252 case 'J':
21253 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21254 {
21255 output_operand_lossage ("invalid operand for code '%c'", code);
21256 return;
21257 }
21258
21259 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21260 return;
21261
21262 case 'K':
21263 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21264 {
21265 output_operand_lossage ("invalid operand for code '%c'", code);
21266 return;
21267 }
21268
21269 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21270 return;
21271
21272 case 'm':
21273 asm_fprintf (stream, "%r",
21274 REG_P (XEXP (x, 0))
21275 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21276 return;
21277
21278 case 'M':
21279 asm_fprintf (stream, "{%r-%r}",
21280 REGNO (x),
21281 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21282 return;
21283
21284 /* Like 'M', but writing doubleword vector registers, for use by Neon
21285 insns. */
21286 case 'h':
21287 {
21288 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21289 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21290 if (numregs == 1)
21291 asm_fprintf (stream, "{d%d}", regno);
21292 else
21293 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21294 }
21295 return;
21296
21297 case 'd':
21298 /* CONST_TRUE_RTX means always -- that's the default. */
21299 if (x == const_true_rtx)
21300 return;
21301
21302 if (!COMPARISON_P (x))
21303 {
21304 output_operand_lossage ("invalid operand for code '%c'", code);
21305 return;
21306 }
21307
21308 fputs (arm_condition_codes[get_arm_condition_code (x)],
21309 stream);
21310 return;
21311
21312 case 'D':
21313 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21314 want to do that. */
21315 if (x == const_true_rtx)
21316 {
21317 output_operand_lossage ("instruction never executed");
21318 return;
21319 }
21320 if (!COMPARISON_P (x))
21321 {
21322 output_operand_lossage ("invalid operand for code '%c'", code);
21323 return;
21324 }
21325
21326 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21327 (get_arm_condition_code (x))],
21328 stream);
21329 return;
21330
21331 case 's':
21332 case 'V':
21333 case 'W':
21334 case 'X':
21335 case 'Y':
21336 case 'Z':
21337 /* Former Maverick support, removed after GCC-4.7. */
21338 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21339 return;
21340
21341 case 'U':
21342 if (!REG_P (x)
21343 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21344 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21345 /* Bad value for wCG register number. */
21346 {
21347 output_operand_lossage ("invalid operand for code '%c'", code);
21348 return;
21349 }
21350
21351 else
21352 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21353 return;
21354
21355 /* Print an iWMMXt control register name. */
21356 case 'w':
21357 if (!CONST_INT_P (x)
21358 || INTVAL (x) < 0
21359 || INTVAL (x) >= 16)
21360 /* Bad value for wC register number. */
21361 {
21362 output_operand_lossage ("invalid operand for code '%c'", code);
21363 return;
21364 }
21365
21366 else
21367 {
21368 static const char * wc_reg_names [16] =
21369 {
21370 "wCID", "wCon", "wCSSF", "wCASF",
21371 "wC4", "wC5", "wC6", "wC7",
21372 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21373 "wC12", "wC13", "wC14", "wC15"
21374 };
21375
21376 fputs (wc_reg_names [INTVAL (x)], stream);
21377 }
21378 return;
21379
21380 /* Print the high single-precision register of a VFP double-precision
21381 register. */
21382 case 'p':
21383 {
21384 int mode = GET_MODE (x);
21385 int regno;
21386
21387 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21388 {
21389 output_operand_lossage ("invalid operand for code '%c'", code);
21390 return;
21391 }
21392
21393 regno = REGNO (x);
21394 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21395 {
21396 output_operand_lossage ("invalid operand for code '%c'", code);
21397 return;
21398 }
21399
21400 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21401 }
21402 return;
21403
21404 /* Print a VFP/Neon double precision or quad precision register name. */
21405 case 'P':
21406 case 'q':
21407 {
21408 int mode = GET_MODE (x);
21409 int is_quad = (code == 'q');
21410 int regno;
21411
21412 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21413 {
21414 output_operand_lossage ("invalid operand for code '%c'", code);
21415 return;
21416 }
21417
21418 if (!REG_P (x)
21419 || !IS_VFP_REGNUM (REGNO (x)))
21420 {
21421 output_operand_lossage ("invalid operand for code '%c'", code);
21422 return;
21423 }
21424
21425 regno = REGNO (x);
21426 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21427 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21428 {
21429 output_operand_lossage ("invalid operand for code '%c'", code);
21430 return;
21431 }
21432
21433 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21434 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21435 }
21436 return;
21437
21438 /* These two codes print the low/high doubleword register of a Neon quad
21439 register, respectively. For pair-structure types, can also print
21440 low/high quadword registers. */
21441 case 'e':
21442 case 'f':
21443 {
21444 int mode = GET_MODE (x);
21445 int regno;
21446
21447 if ((GET_MODE_SIZE (mode) != 16
21448 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21449 {
21450 output_operand_lossage ("invalid operand for code '%c'", code);
21451 return;
21452 }
21453
21454 regno = REGNO (x);
21455 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21456 {
21457 output_operand_lossage ("invalid operand for code '%c'", code);
21458 return;
21459 }
21460
21461 if (GET_MODE_SIZE (mode) == 16)
21462 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21463 + (code == 'f' ? 1 : 0));
21464 else
21465 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21466 + (code == 'f' ? 1 : 0));
21467 }
21468 return;
21469
21470 /* Print a VFPv3 floating-point constant, represented as an integer
21471 index. */
21472 case 'G':
21473 {
21474 int index = vfp3_const_double_index (x);
21475 gcc_assert (index != -1);
21476 fprintf (stream, "%d", index);
21477 }
21478 return;
21479
21480 /* Print bits representing opcode features for Neon.
21481
21482 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21483 and polynomials as unsigned.
21484
21485 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21486
21487 Bit 2 is 1 for rounding functions, 0 otherwise. */
21488
21489 /* Identify the type as 's', 'u', 'p' or 'f'. */
21490 case 'T':
21491 {
21492 HOST_WIDE_INT bits = INTVAL (x);
21493 fputc ("uspf"[bits & 3], stream);
21494 }
21495 return;
21496
21497 /* Likewise, but signed and unsigned integers are both 'i'. */
21498 case 'F':
21499 {
21500 HOST_WIDE_INT bits = INTVAL (x);
21501 fputc ("iipf"[bits & 3], stream);
21502 }
21503 return;
21504
21505 /* As for 'T', but emit 'u' instead of 'p'. */
21506 case 't':
21507 {
21508 HOST_WIDE_INT bits = INTVAL (x);
21509 fputc ("usuf"[bits & 3], stream);
21510 }
21511 return;
21512
21513 /* Bit 2: rounding (vs none). */
21514 case 'O':
21515 {
21516 HOST_WIDE_INT bits = INTVAL (x);
21517 fputs ((bits & 4) != 0 ? "r" : "", stream);
21518 }
21519 return;
21520
21521 /* Memory operand for vld1/vst1 instruction. */
21522 case 'A':
21523 {
21524 rtx addr;
21525 bool postinc = FALSE;
21526 unsigned align, memsize, align_bits;
21527
21528 gcc_assert (MEM_P (x));
21529 addr = XEXP (x, 0);
21530 if (GET_CODE (addr) == POST_INC)
21531 {
21532 postinc = 1;
21533 addr = XEXP (addr, 0);
21534 }
21535 asm_fprintf (stream, "[%r", REGNO (addr));
21536
21537 /* We know the alignment of this access, so we can emit a hint in the
21538 instruction (for some alignments) as an aid to the memory subsystem
21539 of the target. */
21540 align = MEM_ALIGN (x) >> 3;
21541 memsize = MEM_SIZE (x);
21542
21543 /* Only certain alignment specifiers are supported by the hardware. */
21544 if (memsize == 32 && (align % 32) == 0)
21545 align_bits = 256;
21546 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
21547 align_bits = 128;
21548 else if (memsize >= 8 && (align % 8) == 0)
21549 align_bits = 64;
21550 else
21551 align_bits = 0;
21552
21553 if (align_bits != 0)
21554 asm_fprintf (stream, ":%d", align_bits);
21555
21556 asm_fprintf (stream, "]");
21557
21558 if (postinc)
21559 fputs("!", stream);
21560 }
21561 return;
21562
21563 case 'C':
21564 {
21565 rtx addr;
21566
21567 gcc_assert (MEM_P (x));
21568 addr = XEXP (x, 0);
21569 gcc_assert (REG_P (addr));
21570 asm_fprintf (stream, "[%r]", REGNO (addr));
21571 }
21572 return;
21573
21574 /* Translate an S register number into a D register number and element index. */
21575 case 'y':
21576 {
21577 int mode = GET_MODE (x);
21578 int regno;
21579
21580 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
21581 {
21582 output_operand_lossage ("invalid operand for code '%c'", code);
21583 return;
21584 }
21585
21586 regno = REGNO (x);
21587 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21588 {
21589 output_operand_lossage ("invalid operand for code '%c'", code);
21590 return;
21591 }
21592
21593 regno = regno - FIRST_VFP_REGNUM;
21594 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
21595 }
21596 return;
21597
21598 case 'v':
21599 gcc_assert (CONST_DOUBLE_P (x));
21600 int result;
21601 result = vfp3_const_double_for_fract_bits (x);
21602 if (result == 0)
21603 result = vfp3_const_double_for_bits (x);
21604 fprintf (stream, "#%d", result);
21605 return;
21606
21607 /* Register specifier for vld1.16/vst1.16. Translate the S register
21608 number into a D register number and element index. */
21609 case 'z':
21610 {
21611 int mode = GET_MODE (x);
21612 int regno;
21613
21614 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
21615 {
21616 output_operand_lossage ("invalid operand for code '%c'", code);
21617 return;
21618 }
21619
21620 regno = REGNO (x);
21621 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21622 {
21623 output_operand_lossage ("invalid operand for code '%c'", code);
21624 return;
21625 }
21626
21627 regno = regno - FIRST_VFP_REGNUM;
21628 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
21629 }
21630 return;
21631
21632 default:
21633 if (x == 0)
21634 {
21635 output_operand_lossage ("missing operand");
21636 return;
21637 }
21638
21639 switch (GET_CODE (x))
21640 {
21641 case REG:
21642 asm_fprintf (stream, "%r", REGNO (x));
21643 break;
21644
21645 case MEM:
21646 output_memory_reference_mode = GET_MODE (x);
21647 output_address (XEXP (x, 0));
21648 break;
21649
21650 case CONST_DOUBLE:
21651 if (TARGET_NEON)
21652 {
21653 char fpstr[20];
21654 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
21655 sizeof (fpstr), 0, 1);
21656 fprintf (stream, "#%s", fpstr);
21657 }
21658 else
21659 fprintf (stream, "#%s", fp_immediate_constant (x));
21660 break;
21661
21662 default:
21663 gcc_assert (GET_CODE (x) != NEG);
21664 fputc ('#', stream);
21665 if (GET_CODE (x) == HIGH)
21666 {
21667 fputs (":lower16:", stream);
21668 x = XEXP (x, 0);
21669 }
21670
21671 output_addr_const (stream, x);
21672 break;
21673 }
21674 }
21675 }
21676 \f
21677 /* Target hook for printing a memory address. */
21678 static void
21679 arm_print_operand_address (FILE *stream, rtx x)
21680 {
21681 if (TARGET_32BIT)
21682 {
21683 int is_minus = GET_CODE (x) == MINUS;
21684
21685 if (REG_P (x))
21686 asm_fprintf (stream, "[%r]", REGNO (x));
21687 else if (GET_CODE (x) == PLUS || is_minus)
21688 {
21689 rtx base = XEXP (x, 0);
21690 rtx index = XEXP (x, 1);
21691 HOST_WIDE_INT offset = 0;
21692 if (!REG_P (base)
21693 || (REG_P (index) && REGNO (index) == SP_REGNUM))
21694 {
21695 /* Ensure that BASE is a register. */
21696 /* (one of them must be). */
21697 /* Also ensure the SP is not used as in index register. */
21698 rtx temp = base;
21699 base = index;
21700 index = temp;
21701 }
21702 switch (GET_CODE (index))
21703 {
21704 case CONST_INT:
21705 offset = INTVAL (index);
21706 if (is_minus)
21707 offset = -offset;
21708 asm_fprintf (stream, "[%r, #%wd]",
21709 REGNO (base), offset);
21710 break;
21711
21712 case REG:
21713 asm_fprintf (stream, "[%r, %s%r]",
21714 REGNO (base), is_minus ? "-" : "",
21715 REGNO (index));
21716 break;
21717
21718 case MULT:
21719 case ASHIFTRT:
21720 case LSHIFTRT:
21721 case ASHIFT:
21722 case ROTATERT:
21723 {
21724 asm_fprintf (stream, "[%r, %s%r",
21725 REGNO (base), is_minus ? "-" : "",
21726 REGNO (XEXP (index, 0)));
21727 arm_print_operand (stream, index, 'S');
21728 fputs ("]", stream);
21729 break;
21730 }
21731
21732 default:
21733 gcc_unreachable ();
21734 }
21735 }
21736 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
21737 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
21738 {
21739 extern enum machine_mode output_memory_reference_mode;
21740
21741 gcc_assert (REG_P (XEXP (x, 0)));
21742
21743 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
21744 asm_fprintf (stream, "[%r, #%s%d]!",
21745 REGNO (XEXP (x, 0)),
21746 GET_CODE (x) == PRE_DEC ? "-" : "",
21747 GET_MODE_SIZE (output_memory_reference_mode));
21748 else
21749 asm_fprintf (stream, "[%r], #%s%d",
21750 REGNO (XEXP (x, 0)),
21751 GET_CODE (x) == POST_DEC ? "-" : "",
21752 GET_MODE_SIZE (output_memory_reference_mode));
21753 }
21754 else if (GET_CODE (x) == PRE_MODIFY)
21755 {
21756 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
21757 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
21758 asm_fprintf (stream, "#%wd]!",
21759 INTVAL (XEXP (XEXP (x, 1), 1)));
21760 else
21761 asm_fprintf (stream, "%r]!",
21762 REGNO (XEXP (XEXP (x, 1), 1)));
21763 }
21764 else if (GET_CODE (x) == POST_MODIFY)
21765 {
21766 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
21767 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
21768 asm_fprintf (stream, "#%wd",
21769 INTVAL (XEXP (XEXP (x, 1), 1)));
21770 else
21771 asm_fprintf (stream, "%r",
21772 REGNO (XEXP (XEXP (x, 1), 1)));
21773 }
21774 else output_addr_const (stream, x);
21775 }
21776 else
21777 {
21778 if (REG_P (x))
21779 asm_fprintf (stream, "[%r]", REGNO (x));
21780 else if (GET_CODE (x) == POST_INC)
21781 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
21782 else if (GET_CODE (x) == PLUS)
21783 {
21784 gcc_assert (REG_P (XEXP (x, 0)));
21785 if (CONST_INT_P (XEXP (x, 1)))
21786 asm_fprintf (stream, "[%r, #%wd]",
21787 REGNO (XEXP (x, 0)),
21788 INTVAL (XEXP (x, 1)));
21789 else
21790 asm_fprintf (stream, "[%r, %r]",
21791 REGNO (XEXP (x, 0)),
21792 REGNO (XEXP (x, 1)));
21793 }
21794 else
21795 output_addr_const (stream, x);
21796 }
21797 }
21798 \f
21799 /* Target hook for indicating whether a punctuation character for
21800 TARGET_PRINT_OPERAND is valid. */
21801 static bool
21802 arm_print_operand_punct_valid_p (unsigned char code)
21803 {
21804 return (code == '@' || code == '|' || code == '.'
21805 || code == '(' || code == ')' || code == '#'
21806 || (TARGET_32BIT && (code == '?'))
21807 || (TARGET_THUMB2 && (code == '!'))
21808 || (TARGET_THUMB && (code == '_')));
21809 }
21810 \f
21811 /* Target hook for assembling integer objects. The ARM version needs to
21812 handle word-sized values specially. */
21813 static bool
21814 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
21815 {
21816 enum machine_mode mode;
21817
21818 if (size == UNITS_PER_WORD && aligned_p)
21819 {
21820 fputs ("\t.word\t", asm_out_file);
21821 output_addr_const (asm_out_file, x);
21822
21823 /* Mark symbols as position independent. We only do this in the
21824 .text segment, not in the .data segment. */
21825 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
21826 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
21827 {
21828 /* See legitimize_pic_address for an explanation of the
21829 TARGET_VXWORKS_RTP check. */
21830 if (!arm_pic_data_is_text_relative
21831 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
21832 fputs ("(GOT)", asm_out_file);
21833 else
21834 fputs ("(GOTOFF)", asm_out_file);
21835 }
21836 fputc ('\n', asm_out_file);
21837 return true;
21838 }
21839
21840 mode = GET_MODE (x);
21841
21842 if (arm_vector_mode_supported_p (mode))
21843 {
21844 int i, units;
21845
21846 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21847
21848 units = CONST_VECTOR_NUNITS (x);
21849 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
21850
21851 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21852 for (i = 0; i < units; i++)
21853 {
21854 rtx elt = CONST_VECTOR_ELT (x, i);
21855 assemble_integer
21856 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
21857 }
21858 else
21859 for (i = 0; i < units; i++)
21860 {
21861 rtx elt = CONST_VECTOR_ELT (x, i);
21862 REAL_VALUE_TYPE rval;
21863
21864 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
21865
21866 assemble_real
21867 (rval, GET_MODE_INNER (mode),
21868 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
21869 }
21870
21871 return true;
21872 }
21873
21874 return default_assemble_integer (x, size, aligned_p);
21875 }
21876
21877 static void
21878 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
21879 {
21880 section *s;
21881
21882 if (!TARGET_AAPCS_BASED)
21883 {
21884 (is_ctor ?
21885 default_named_section_asm_out_constructor
21886 : default_named_section_asm_out_destructor) (symbol, priority);
21887 return;
21888 }
21889
21890 /* Put these in the .init_array section, using a special relocation. */
21891 if (priority != DEFAULT_INIT_PRIORITY)
21892 {
21893 char buf[18];
21894 sprintf (buf, "%s.%.5u",
21895 is_ctor ? ".init_array" : ".fini_array",
21896 priority);
21897 s = get_section (buf, SECTION_WRITE, NULL_TREE);
21898 }
21899 else if (is_ctor)
21900 s = ctors_section;
21901 else
21902 s = dtors_section;
21903
21904 switch_to_section (s);
21905 assemble_align (POINTER_SIZE);
21906 fputs ("\t.word\t", asm_out_file);
21907 output_addr_const (asm_out_file, symbol);
21908 fputs ("(target1)\n", asm_out_file);
21909 }
21910
21911 /* Add a function to the list of static constructors. */
21912
21913 static void
21914 arm_elf_asm_constructor (rtx symbol, int priority)
21915 {
21916 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
21917 }
21918
21919 /* Add a function to the list of static destructors. */
21920
21921 static void
21922 arm_elf_asm_destructor (rtx symbol, int priority)
21923 {
21924 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
21925 }
21926 \f
21927 /* A finite state machine takes care of noticing whether or not instructions
21928 can be conditionally executed, and thus decrease execution time and code
21929 size by deleting branch instructions. The fsm is controlled by
21930 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
21931
21932 /* The state of the fsm controlling condition codes are:
21933 0: normal, do nothing special
21934 1: make ASM_OUTPUT_OPCODE not output this instruction
21935 2: make ASM_OUTPUT_OPCODE not output this instruction
21936 3: make instructions conditional
21937 4: make instructions conditional
21938
21939 State transitions (state->state by whom under condition):
21940 0 -> 1 final_prescan_insn if the `target' is a label
21941 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
21942 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
21943 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
21944 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
21945 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
21946 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
21947 (the target insn is arm_target_insn).
21948
21949 If the jump clobbers the conditions then we use states 2 and 4.
21950
21951 A similar thing can be done with conditional return insns.
21952
21953 XXX In case the `target' is an unconditional branch, this conditionalising
21954 of the instructions always reduces code size, but not always execution
21955 time. But then, I want to reduce the code size to somewhere near what
21956 /bin/cc produces. */
21957
21958 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
21959 instructions. When a COND_EXEC instruction is seen the subsequent
21960 instructions are scanned so that multiple conditional instructions can be
21961 combined into a single IT block. arm_condexec_count and arm_condexec_mask
21962 specify the length and true/false mask for the IT block. These will be
21963 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
21964
21965 /* Returns the index of the ARM condition code string in
21966 `arm_condition_codes', or ARM_NV if the comparison is invalid.
21967 COMPARISON should be an rtx like `(eq (...) (...))'. */
21968
21969 enum arm_cond_code
21970 maybe_get_arm_condition_code (rtx comparison)
21971 {
21972 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
21973 enum arm_cond_code code;
21974 enum rtx_code comp_code = GET_CODE (comparison);
21975
21976 if (GET_MODE_CLASS (mode) != MODE_CC)
21977 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
21978 XEXP (comparison, 1));
21979
21980 switch (mode)
21981 {
21982 case CC_DNEmode: code = ARM_NE; goto dominance;
21983 case CC_DEQmode: code = ARM_EQ; goto dominance;
21984 case CC_DGEmode: code = ARM_GE; goto dominance;
21985 case CC_DGTmode: code = ARM_GT; goto dominance;
21986 case CC_DLEmode: code = ARM_LE; goto dominance;
21987 case CC_DLTmode: code = ARM_LT; goto dominance;
21988 case CC_DGEUmode: code = ARM_CS; goto dominance;
21989 case CC_DGTUmode: code = ARM_HI; goto dominance;
21990 case CC_DLEUmode: code = ARM_LS; goto dominance;
21991 case CC_DLTUmode: code = ARM_CC;
21992
21993 dominance:
21994 if (comp_code == EQ)
21995 return ARM_INVERSE_CONDITION_CODE (code);
21996 if (comp_code == NE)
21997 return code;
21998 return ARM_NV;
21999
22000 case CC_NOOVmode:
22001 switch (comp_code)
22002 {
22003 case NE: return ARM_NE;
22004 case EQ: return ARM_EQ;
22005 case GE: return ARM_PL;
22006 case LT: return ARM_MI;
22007 default: return ARM_NV;
22008 }
22009
22010 case CC_Zmode:
22011 switch (comp_code)
22012 {
22013 case NE: return ARM_NE;
22014 case EQ: return ARM_EQ;
22015 default: return ARM_NV;
22016 }
22017
22018 case CC_Nmode:
22019 switch (comp_code)
22020 {
22021 case NE: return ARM_MI;
22022 case EQ: return ARM_PL;
22023 default: return ARM_NV;
22024 }
22025
22026 case CCFPEmode:
22027 case CCFPmode:
22028 /* We can handle all cases except UNEQ and LTGT. */
22029 switch (comp_code)
22030 {
22031 case GE: return ARM_GE;
22032 case GT: return ARM_GT;
22033 case LE: return ARM_LS;
22034 case LT: return ARM_MI;
22035 case NE: return ARM_NE;
22036 case EQ: return ARM_EQ;
22037 case ORDERED: return ARM_VC;
22038 case UNORDERED: return ARM_VS;
22039 case UNLT: return ARM_LT;
22040 case UNLE: return ARM_LE;
22041 case UNGT: return ARM_HI;
22042 case UNGE: return ARM_PL;
22043 /* UNEQ and LTGT do not have a representation. */
22044 case UNEQ: /* Fall through. */
22045 case LTGT: /* Fall through. */
22046 default: return ARM_NV;
22047 }
22048
22049 case CC_SWPmode:
22050 switch (comp_code)
22051 {
22052 case NE: return ARM_NE;
22053 case EQ: return ARM_EQ;
22054 case GE: return ARM_LE;
22055 case GT: return ARM_LT;
22056 case LE: return ARM_GE;
22057 case LT: return ARM_GT;
22058 case GEU: return ARM_LS;
22059 case GTU: return ARM_CC;
22060 case LEU: return ARM_CS;
22061 case LTU: return ARM_HI;
22062 default: return ARM_NV;
22063 }
22064
22065 case CC_Cmode:
22066 switch (comp_code)
22067 {
22068 case LTU: return ARM_CS;
22069 case GEU: return ARM_CC;
22070 default: return ARM_NV;
22071 }
22072
22073 case CC_CZmode:
22074 switch (comp_code)
22075 {
22076 case NE: return ARM_NE;
22077 case EQ: return ARM_EQ;
22078 case GEU: return ARM_CS;
22079 case GTU: return ARM_HI;
22080 case LEU: return ARM_LS;
22081 case LTU: return ARM_CC;
22082 default: return ARM_NV;
22083 }
22084
22085 case CC_NCVmode:
22086 switch (comp_code)
22087 {
22088 case GE: return ARM_GE;
22089 case LT: return ARM_LT;
22090 case GEU: return ARM_CS;
22091 case LTU: return ARM_CC;
22092 default: return ARM_NV;
22093 }
22094
22095 case CCmode:
22096 switch (comp_code)
22097 {
22098 case NE: return ARM_NE;
22099 case EQ: return ARM_EQ;
22100 case GE: return ARM_GE;
22101 case GT: return ARM_GT;
22102 case LE: return ARM_LE;
22103 case LT: return ARM_LT;
22104 case GEU: return ARM_CS;
22105 case GTU: return ARM_HI;
22106 case LEU: return ARM_LS;
22107 case LTU: return ARM_CC;
22108 default: return ARM_NV;
22109 }
22110
22111 default: gcc_unreachable ();
22112 }
22113 }
22114
22115 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22116 static enum arm_cond_code
22117 get_arm_condition_code (rtx comparison)
22118 {
22119 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22120 gcc_assert (code != ARM_NV);
22121 return code;
22122 }
22123
22124 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22125 instructions. */
22126 void
22127 thumb2_final_prescan_insn (rtx insn)
22128 {
22129 rtx first_insn = insn;
22130 rtx body = PATTERN (insn);
22131 rtx predicate;
22132 enum arm_cond_code code;
22133 int n;
22134 int mask;
22135 int max;
22136
22137 /* Maximum number of conditionally executed instructions in a block
22138 is minimum of the two max values: maximum allowed in an IT block
22139 and maximum that is beneficial according to the cost model and tune. */
22140 max = (max_insns_skipped < MAX_INSN_PER_IT_BLOCK) ?
22141 max_insns_skipped : MAX_INSN_PER_IT_BLOCK;
22142
22143 /* Remove the previous insn from the count of insns to be output. */
22144 if (arm_condexec_count)
22145 arm_condexec_count--;
22146
22147 /* Nothing to do if we are already inside a conditional block. */
22148 if (arm_condexec_count)
22149 return;
22150
22151 if (GET_CODE (body) != COND_EXEC)
22152 return;
22153
22154 /* Conditional jumps are implemented directly. */
22155 if (JUMP_P (insn))
22156 return;
22157
22158 predicate = COND_EXEC_TEST (body);
22159 arm_current_cc = get_arm_condition_code (predicate);
22160
22161 n = get_attr_ce_count (insn);
22162 arm_condexec_count = 1;
22163 arm_condexec_mask = (1 << n) - 1;
22164 arm_condexec_masklen = n;
22165 /* See if subsequent instructions can be combined into the same block. */
22166 for (;;)
22167 {
22168 insn = next_nonnote_insn (insn);
22169
22170 /* Jumping into the middle of an IT block is illegal, so a label or
22171 barrier terminates the block. */
22172 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22173 break;
22174
22175 body = PATTERN (insn);
22176 /* USE and CLOBBER aren't really insns, so just skip them. */
22177 if (GET_CODE (body) == USE
22178 || GET_CODE (body) == CLOBBER)
22179 continue;
22180
22181 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22182 if (GET_CODE (body) != COND_EXEC)
22183 break;
22184 /* Maximum number of conditionally executed instructions in a block. */
22185 n = get_attr_ce_count (insn);
22186 if (arm_condexec_masklen + n > max)
22187 break;
22188
22189 predicate = COND_EXEC_TEST (body);
22190 code = get_arm_condition_code (predicate);
22191 mask = (1 << n) - 1;
22192 if (arm_current_cc == code)
22193 arm_condexec_mask |= (mask << arm_condexec_masklen);
22194 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22195 break;
22196
22197 arm_condexec_count++;
22198 arm_condexec_masklen += n;
22199
22200 /* A jump must be the last instruction in a conditional block. */
22201 if (JUMP_P (insn))
22202 break;
22203 }
22204 /* Restore recog_data (getting the attributes of other insns can
22205 destroy this array, but final.c assumes that it remains intact
22206 across this call). */
22207 extract_constrain_insn_cached (first_insn);
22208 }
22209
22210 void
22211 arm_final_prescan_insn (rtx insn)
22212 {
22213 /* BODY will hold the body of INSN. */
22214 rtx body = PATTERN (insn);
22215
22216 /* This will be 1 if trying to repeat the trick, and things need to be
22217 reversed if it appears to fail. */
22218 int reverse = 0;
22219
22220 /* If we start with a return insn, we only succeed if we find another one. */
22221 int seeking_return = 0;
22222 enum rtx_code return_code = UNKNOWN;
22223
22224 /* START_INSN will hold the insn from where we start looking. This is the
22225 first insn after the following code_label if REVERSE is true. */
22226 rtx start_insn = insn;
22227
22228 /* If in state 4, check if the target branch is reached, in order to
22229 change back to state 0. */
22230 if (arm_ccfsm_state == 4)
22231 {
22232 if (insn == arm_target_insn)
22233 {
22234 arm_target_insn = NULL;
22235 arm_ccfsm_state = 0;
22236 }
22237 return;
22238 }
22239
22240 /* If in state 3, it is possible to repeat the trick, if this insn is an
22241 unconditional branch to a label, and immediately following this branch
22242 is the previous target label which is only used once, and the label this
22243 branch jumps to is not too far off. */
22244 if (arm_ccfsm_state == 3)
22245 {
22246 if (simplejump_p (insn))
22247 {
22248 start_insn = next_nonnote_insn (start_insn);
22249 if (BARRIER_P (start_insn))
22250 {
22251 /* XXX Isn't this always a barrier? */
22252 start_insn = next_nonnote_insn (start_insn);
22253 }
22254 if (LABEL_P (start_insn)
22255 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22256 && LABEL_NUSES (start_insn) == 1)
22257 reverse = TRUE;
22258 else
22259 return;
22260 }
22261 else if (ANY_RETURN_P (body))
22262 {
22263 start_insn = next_nonnote_insn (start_insn);
22264 if (BARRIER_P (start_insn))
22265 start_insn = next_nonnote_insn (start_insn);
22266 if (LABEL_P (start_insn)
22267 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22268 && LABEL_NUSES (start_insn) == 1)
22269 {
22270 reverse = TRUE;
22271 seeking_return = 1;
22272 return_code = GET_CODE (body);
22273 }
22274 else
22275 return;
22276 }
22277 else
22278 return;
22279 }
22280
22281 gcc_assert (!arm_ccfsm_state || reverse);
22282 if (!JUMP_P (insn))
22283 return;
22284
22285 /* This jump might be paralleled with a clobber of the condition codes
22286 the jump should always come first */
22287 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22288 body = XVECEXP (body, 0, 0);
22289
22290 if (reverse
22291 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22292 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22293 {
22294 int insns_skipped;
22295 int fail = FALSE, succeed = FALSE;
22296 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22297 int then_not_else = TRUE;
22298 rtx this_insn = start_insn, label = 0;
22299
22300 /* Register the insn jumped to. */
22301 if (reverse)
22302 {
22303 if (!seeking_return)
22304 label = XEXP (SET_SRC (body), 0);
22305 }
22306 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22307 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22308 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22309 {
22310 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22311 then_not_else = FALSE;
22312 }
22313 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22314 {
22315 seeking_return = 1;
22316 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22317 }
22318 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22319 {
22320 seeking_return = 1;
22321 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22322 then_not_else = FALSE;
22323 }
22324 else
22325 gcc_unreachable ();
22326
22327 /* See how many insns this branch skips, and what kind of insns. If all
22328 insns are okay, and the label or unconditional branch to the same
22329 label is not too far away, succeed. */
22330 for (insns_skipped = 0;
22331 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22332 {
22333 rtx scanbody;
22334
22335 this_insn = next_nonnote_insn (this_insn);
22336 if (!this_insn)
22337 break;
22338
22339 switch (GET_CODE (this_insn))
22340 {
22341 case CODE_LABEL:
22342 /* Succeed if it is the target label, otherwise fail since
22343 control falls in from somewhere else. */
22344 if (this_insn == label)
22345 {
22346 arm_ccfsm_state = 1;
22347 succeed = TRUE;
22348 }
22349 else
22350 fail = TRUE;
22351 break;
22352
22353 case BARRIER:
22354 /* Succeed if the following insn is the target label.
22355 Otherwise fail.
22356 If return insns are used then the last insn in a function
22357 will be a barrier. */
22358 this_insn = next_nonnote_insn (this_insn);
22359 if (this_insn && this_insn == label)
22360 {
22361 arm_ccfsm_state = 1;
22362 succeed = TRUE;
22363 }
22364 else
22365 fail = TRUE;
22366 break;
22367
22368 case CALL_INSN:
22369 /* The AAPCS says that conditional calls should not be
22370 used since they make interworking inefficient (the
22371 linker can't transform BL<cond> into BLX). That's
22372 only a problem if the machine has BLX. */
22373 if (arm_arch5)
22374 {
22375 fail = TRUE;
22376 break;
22377 }
22378
22379 /* Succeed if the following insn is the target label, or
22380 if the following two insns are a barrier and the
22381 target label. */
22382 this_insn = next_nonnote_insn (this_insn);
22383 if (this_insn && BARRIER_P (this_insn))
22384 this_insn = next_nonnote_insn (this_insn);
22385
22386 if (this_insn && this_insn == label
22387 && insns_skipped < max_insns_skipped)
22388 {
22389 arm_ccfsm_state = 1;
22390 succeed = TRUE;
22391 }
22392 else
22393 fail = TRUE;
22394 break;
22395
22396 case JUMP_INSN:
22397 /* If this is an unconditional branch to the same label, succeed.
22398 If it is to another label, do nothing. If it is conditional,
22399 fail. */
22400 /* XXX Probably, the tests for SET and the PC are
22401 unnecessary. */
22402
22403 scanbody = PATTERN (this_insn);
22404 if (GET_CODE (scanbody) == SET
22405 && GET_CODE (SET_DEST (scanbody)) == PC)
22406 {
22407 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22408 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22409 {
22410 arm_ccfsm_state = 2;
22411 succeed = TRUE;
22412 }
22413 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22414 fail = TRUE;
22415 }
22416 /* Fail if a conditional return is undesirable (e.g. on a
22417 StrongARM), but still allow this if optimizing for size. */
22418 else if (GET_CODE (scanbody) == return_code
22419 && !use_return_insn (TRUE, NULL)
22420 && !optimize_size)
22421 fail = TRUE;
22422 else if (GET_CODE (scanbody) == return_code)
22423 {
22424 arm_ccfsm_state = 2;
22425 succeed = TRUE;
22426 }
22427 else if (GET_CODE (scanbody) == PARALLEL)
22428 {
22429 switch (get_attr_conds (this_insn))
22430 {
22431 case CONDS_NOCOND:
22432 break;
22433 default:
22434 fail = TRUE;
22435 break;
22436 }
22437 }
22438 else
22439 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22440
22441 break;
22442
22443 case INSN:
22444 /* Instructions using or affecting the condition codes make it
22445 fail. */
22446 scanbody = PATTERN (this_insn);
22447 if (!(GET_CODE (scanbody) == SET
22448 || GET_CODE (scanbody) == PARALLEL)
22449 || get_attr_conds (this_insn) != CONDS_NOCOND)
22450 fail = TRUE;
22451 break;
22452
22453 default:
22454 break;
22455 }
22456 }
22457 if (succeed)
22458 {
22459 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22460 arm_target_label = CODE_LABEL_NUMBER (label);
22461 else
22462 {
22463 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22464
22465 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22466 {
22467 this_insn = next_nonnote_insn (this_insn);
22468 gcc_assert (!this_insn
22469 || (!BARRIER_P (this_insn)
22470 && !LABEL_P (this_insn)));
22471 }
22472 if (!this_insn)
22473 {
22474 /* Oh, dear! we ran off the end.. give up. */
22475 extract_constrain_insn_cached (insn);
22476 arm_ccfsm_state = 0;
22477 arm_target_insn = NULL;
22478 return;
22479 }
22480 arm_target_insn = this_insn;
22481 }
22482
22483 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22484 what it was. */
22485 if (!reverse)
22486 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22487
22488 if (reverse || then_not_else)
22489 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22490 }
22491
22492 /* Restore recog_data (getting the attributes of other insns can
22493 destroy this array, but final.c assumes that it remains intact
22494 across this call. */
22495 extract_constrain_insn_cached (insn);
22496 }
22497 }
22498
22499 /* Output IT instructions. */
22500 void
22501 thumb2_asm_output_opcode (FILE * stream)
22502 {
22503 char buff[5];
22504 int n;
22505
22506 if (arm_condexec_mask)
22507 {
22508 for (n = 0; n < arm_condexec_masklen; n++)
22509 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22510 buff[n] = 0;
22511 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22512 arm_condition_codes[arm_current_cc]);
22513 arm_condexec_mask = 0;
22514 }
22515 }
22516
22517 /* Returns true if REGNO is a valid register
22518 for holding a quantity of type MODE. */
22519 int
22520 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
22521 {
22522 if (GET_MODE_CLASS (mode) == MODE_CC)
22523 return (regno == CC_REGNUM
22524 || (TARGET_HARD_FLOAT && TARGET_VFP
22525 && regno == VFPCC_REGNUM));
22526
22527 if (TARGET_THUMB1)
22528 /* For the Thumb we only allow values bigger than SImode in
22529 registers 0 - 6, so that there is always a second low
22530 register available to hold the upper part of the value.
22531 We probably we ought to ensure that the register is the
22532 start of an even numbered register pair. */
22533 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
22534
22535 if (TARGET_HARD_FLOAT && TARGET_VFP
22536 && IS_VFP_REGNUM (regno))
22537 {
22538 if (mode == SFmode || mode == SImode)
22539 return VFP_REGNO_OK_FOR_SINGLE (regno);
22540
22541 if (mode == DFmode)
22542 return VFP_REGNO_OK_FOR_DOUBLE (regno);
22543
22544 /* VFP registers can hold HFmode values, but there is no point in
22545 putting them there unless we have hardware conversion insns. */
22546 if (mode == HFmode)
22547 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
22548
22549 if (TARGET_NEON)
22550 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
22551 || (VALID_NEON_QREG_MODE (mode)
22552 && NEON_REGNO_OK_FOR_QUAD (regno))
22553 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
22554 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
22555 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
22556 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
22557 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
22558
22559 return FALSE;
22560 }
22561
22562 if (TARGET_REALLY_IWMMXT)
22563 {
22564 if (IS_IWMMXT_GR_REGNUM (regno))
22565 return mode == SImode;
22566
22567 if (IS_IWMMXT_REGNUM (regno))
22568 return VALID_IWMMXT_REG_MODE (mode);
22569 }
22570
22571 /* We allow almost any value to be stored in the general registers.
22572 Restrict doubleword quantities to even register pairs so that we can
22573 use ldrd. Do not allow very large Neon structure opaque modes in
22574 general registers; they would use too many. */
22575 if (regno <= LAST_ARM_REGNUM)
22576 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
22577 && ARM_NUM_REGS (mode) <= 4;
22578
22579 if (regno == FRAME_POINTER_REGNUM
22580 || regno == ARG_POINTER_REGNUM)
22581 /* We only allow integers in the fake hard registers. */
22582 return GET_MODE_CLASS (mode) == MODE_INT;
22583
22584 return FALSE;
22585 }
22586
22587 /* Implement MODES_TIEABLE_P. */
22588
22589 bool
22590 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22591 {
22592 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
22593 return true;
22594
22595 /* We specifically want to allow elements of "structure" modes to
22596 be tieable to the structure. This more general condition allows
22597 other rarer situations too. */
22598 if (TARGET_NEON
22599 && (VALID_NEON_DREG_MODE (mode1)
22600 || VALID_NEON_QREG_MODE (mode1)
22601 || VALID_NEON_STRUCT_MODE (mode1))
22602 && (VALID_NEON_DREG_MODE (mode2)
22603 || VALID_NEON_QREG_MODE (mode2)
22604 || VALID_NEON_STRUCT_MODE (mode2)))
22605 return true;
22606
22607 return false;
22608 }
22609
22610 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22611 not used in arm mode. */
22612
22613 enum reg_class
22614 arm_regno_class (int regno)
22615 {
22616 if (TARGET_THUMB1)
22617 {
22618 if (regno == STACK_POINTER_REGNUM)
22619 return STACK_REG;
22620 if (regno == CC_REGNUM)
22621 return CC_REG;
22622 if (regno < 8)
22623 return LO_REGS;
22624 return HI_REGS;
22625 }
22626
22627 if (TARGET_THUMB2 && regno < 8)
22628 return LO_REGS;
22629
22630 if ( regno <= LAST_ARM_REGNUM
22631 || regno == FRAME_POINTER_REGNUM
22632 || regno == ARG_POINTER_REGNUM)
22633 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
22634
22635 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
22636 return TARGET_THUMB2 ? CC_REG : NO_REGS;
22637
22638 if (IS_VFP_REGNUM (regno))
22639 {
22640 if (regno <= D7_VFP_REGNUM)
22641 return VFP_D0_D7_REGS;
22642 else if (regno <= LAST_LO_VFP_REGNUM)
22643 return VFP_LO_REGS;
22644 else
22645 return VFP_HI_REGS;
22646 }
22647
22648 if (IS_IWMMXT_REGNUM (regno))
22649 return IWMMXT_REGS;
22650
22651 if (IS_IWMMXT_GR_REGNUM (regno))
22652 return IWMMXT_GR_REGS;
22653
22654 return NO_REGS;
22655 }
22656
22657 /* Handle a special case when computing the offset
22658 of an argument from the frame pointer. */
22659 int
22660 arm_debugger_arg_offset (int value, rtx addr)
22661 {
22662 rtx insn;
22663
22664 /* We are only interested if dbxout_parms() failed to compute the offset. */
22665 if (value != 0)
22666 return 0;
22667
22668 /* We can only cope with the case where the address is held in a register. */
22669 if (!REG_P (addr))
22670 return 0;
22671
22672 /* If we are using the frame pointer to point at the argument, then
22673 an offset of 0 is correct. */
22674 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
22675 return 0;
22676
22677 /* If we are using the stack pointer to point at the
22678 argument, then an offset of 0 is correct. */
22679 /* ??? Check this is consistent with thumb2 frame layout. */
22680 if ((TARGET_THUMB || !frame_pointer_needed)
22681 && REGNO (addr) == SP_REGNUM)
22682 return 0;
22683
22684 /* Oh dear. The argument is pointed to by a register rather
22685 than being held in a register, or being stored at a known
22686 offset from the frame pointer. Since GDB only understands
22687 those two kinds of argument we must translate the address
22688 held in the register into an offset from the frame pointer.
22689 We do this by searching through the insns for the function
22690 looking to see where this register gets its value. If the
22691 register is initialized from the frame pointer plus an offset
22692 then we are in luck and we can continue, otherwise we give up.
22693
22694 This code is exercised by producing debugging information
22695 for a function with arguments like this:
22696
22697 double func (double a, double b, int c, double d) {return d;}
22698
22699 Without this code the stab for parameter 'd' will be set to
22700 an offset of 0 from the frame pointer, rather than 8. */
22701
22702 /* The if() statement says:
22703
22704 If the insn is a normal instruction
22705 and if the insn is setting the value in a register
22706 and if the register being set is the register holding the address of the argument
22707 and if the address is computing by an addition
22708 that involves adding to a register
22709 which is the frame pointer
22710 a constant integer
22711
22712 then... */
22713
22714 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22715 {
22716 if ( NONJUMP_INSN_P (insn)
22717 && GET_CODE (PATTERN (insn)) == SET
22718 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
22719 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
22720 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
22721 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
22722 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
22723 )
22724 {
22725 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
22726
22727 break;
22728 }
22729 }
22730
22731 if (value == 0)
22732 {
22733 debug_rtx (addr);
22734 warning (0, "unable to compute real location of stacked parameter");
22735 value = 8; /* XXX magic hack */
22736 }
22737
22738 return value;
22739 }
22740 \f
22741 typedef enum {
22742 T_V8QI,
22743 T_V4HI,
22744 T_V4HF,
22745 T_V2SI,
22746 T_V2SF,
22747 T_DI,
22748 T_V16QI,
22749 T_V8HI,
22750 T_V4SI,
22751 T_V4SF,
22752 T_V2DI,
22753 T_TI,
22754 T_EI,
22755 T_OI,
22756 T_MAX /* Size of enum. Keep last. */
22757 } neon_builtin_type_mode;
22758
22759 #define TYPE_MODE_BIT(X) (1 << (X))
22760
22761 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
22762 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
22763 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
22764 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
22765 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
22766 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
22767
22768 #define v8qi_UP T_V8QI
22769 #define v4hi_UP T_V4HI
22770 #define v4hf_UP T_V4HF
22771 #define v2si_UP T_V2SI
22772 #define v2sf_UP T_V2SF
22773 #define di_UP T_DI
22774 #define v16qi_UP T_V16QI
22775 #define v8hi_UP T_V8HI
22776 #define v4si_UP T_V4SI
22777 #define v4sf_UP T_V4SF
22778 #define v2di_UP T_V2DI
22779 #define ti_UP T_TI
22780 #define ei_UP T_EI
22781 #define oi_UP T_OI
22782
22783 #define UP(X) X##_UP
22784
22785 typedef enum {
22786 NEON_BINOP,
22787 NEON_TERNOP,
22788 NEON_UNOP,
22789 NEON_GETLANE,
22790 NEON_SETLANE,
22791 NEON_CREATE,
22792 NEON_RINT,
22793 NEON_DUP,
22794 NEON_DUPLANE,
22795 NEON_COMBINE,
22796 NEON_SPLIT,
22797 NEON_LANEMUL,
22798 NEON_LANEMULL,
22799 NEON_LANEMULH,
22800 NEON_LANEMAC,
22801 NEON_SCALARMUL,
22802 NEON_SCALARMULL,
22803 NEON_SCALARMULH,
22804 NEON_SCALARMAC,
22805 NEON_CONVERT,
22806 NEON_FLOAT_WIDEN,
22807 NEON_FLOAT_NARROW,
22808 NEON_FIXCONV,
22809 NEON_SELECT,
22810 NEON_RESULTPAIR,
22811 NEON_REINTERP,
22812 NEON_VTBL,
22813 NEON_VTBX,
22814 NEON_LOAD1,
22815 NEON_LOAD1LANE,
22816 NEON_STORE1,
22817 NEON_STORE1LANE,
22818 NEON_LOADSTRUCT,
22819 NEON_LOADSTRUCTLANE,
22820 NEON_STORESTRUCT,
22821 NEON_STORESTRUCTLANE,
22822 NEON_LOGICBINOP,
22823 NEON_SHIFTINSERT,
22824 NEON_SHIFTIMM,
22825 NEON_SHIFTACC
22826 } neon_itype;
22827
22828 typedef struct {
22829 const char *name;
22830 const neon_itype itype;
22831 const neon_builtin_type_mode mode;
22832 const enum insn_code code;
22833 unsigned int fcode;
22834 } neon_builtin_datum;
22835
22836 #define CF(N,X) CODE_FOR_neon_##N##X
22837
22838 #define VAR1(T, N, A) \
22839 {#N, NEON_##T, UP (A), CF (N, A), 0}
22840 #define VAR2(T, N, A, B) \
22841 VAR1 (T, N, A), \
22842 {#N, NEON_##T, UP (B), CF (N, B), 0}
22843 #define VAR3(T, N, A, B, C) \
22844 VAR2 (T, N, A, B), \
22845 {#N, NEON_##T, UP (C), CF (N, C), 0}
22846 #define VAR4(T, N, A, B, C, D) \
22847 VAR3 (T, N, A, B, C), \
22848 {#N, NEON_##T, UP (D), CF (N, D), 0}
22849 #define VAR5(T, N, A, B, C, D, E) \
22850 VAR4 (T, N, A, B, C, D), \
22851 {#N, NEON_##T, UP (E), CF (N, E), 0}
22852 #define VAR6(T, N, A, B, C, D, E, F) \
22853 VAR5 (T, N, A, B, C, D, E), \
22854 {#N, NEON_##T, UP (F), CF (N, F), 0}
22855 #define VAR7(T, N, A, B, C, D, E, F, G) \
22856 VAR6 (T, N, A, B, C, D, E, F), \
22857 {#N, NEON_##T, UP (G), CF (N, G), 0}
22858 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22859 VAR7 (T, N, A, B, C, D, E, F, G), \
22860 {#N, NEON_##T, UP (H), CF (N, H), 0}
22861 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22862 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22863 {#N, NEON_##T, UP (I), CF (N, I), 0}
22864 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22865 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22866 {#N, NEON_##T, UP (J), CF (N, J), 0}
22867
22868 /* The NEON builtin data can be found in arm_neon_builtins.def.
22869 The mode entries in the following table correspond to the "key" type of the
22870 instruction variant, i.e. equivalent to that which would be specified after
22871 the assembler mnemonic, which usually refers to the last vector operand.
22872 (Signed/unsigned/polynomial types are not differentiated between though, and
22873 are all mapped onto the same mode for a given element size.) The modes
22874 listed per instruction should be the same as those defined for that
22875 instruction's pattern in neon.md. */
22876
22877 static neon_builtin_datum neon_builtin_data[] =
22878 {
22879 #include "arm_neon_builtins.def"
22880 };
22881
22882 #undef CF
22883 #undef VAR1
22884 #undef VAR2
22885 #undef VAR3
22886 #undef VAR4
22887 #undef VAR5
22888 #undef VAR6
22889 #undef VAR7
22890 #undef VAR8
22891 #undef VAR9
22892 #undef VAR10
22893
22894 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
22895 #define VAR1(T, N, A) \
22896 CF (N, A)
22897 #define VAR2(T, N, A, B) \
22898 VAR1 (T, N, A), \
22899 CF (N, B)
22900 #define VAR3(T, N, A, B, C) \
22901 VAR2 (T, N, A, B), \
22902 CF (N, C)
22903 #define VAR4(T, N, A, B, C, D) \
22904 VAR3 (T, N, A, B, C), \
22905 CF (N, D)
22906 #define VAR5(T, N, A, B, C, D, E) \
22907 VAR4 (T, N, A, B, C, D), \
22908 CF (N, E)
22909 #define VAR6(T, N, A, B, C, D, E, F) \
22910 VAR5 (T, N, A, B, C, D, E), \
22911 CF (N, F)
22912 #define VAR7(T, N, A, B, C, D, E, F, G) \
22913 VAR6 (T, N, A, B, C, D, E, F), \
22914 CF (N, G)
22915 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22916 VAR7 (T, N, A, B, C, D, E, F, G), \
22917 CF (N, H)
22918 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22919 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22920 CF (N, I)
22921 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22922 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22923 CF (N, J)
22924 enum arm_builtins
22925 {
22926 ARM_BUILTIN_GETWCGR0,
22927 ARM_BUILTIN_GETWCGR1,
22928 ARM_BUILTIN_GETWCGR2,
22929 ARM_BUILTIN_GETWCGR3,
22930
22931 ARM_BUILTIN_SETWCGR0,
22932 ARM_BUILTIN_SETWCGR1,
22933 ARM_BUILTIN_SETWCGR2,
22934 ARM_BUILTIN_SETWCGR3,
22935
22936 ARM_BUILTIN_WZERO,
22937
22938 ARM_BUILTIN_WAVG2BR,
22939 ARM_BUILTIN_WAVG2HR,
22940 ARM_BUILTIN_WAVG2B,
22941 ARM_BUILTIN_WAVG2H,
22942
22943 ARM_BUILTIN_WACCB,
22944 ARM_BUILTIN_WACCH,
22945 ARM_BUILTIN_WACCW,
22946
22947 ARM_BUILTIN_WMACS,
22948 ARM_BUILTIN_WMACSZ,
22949 ARM_BUILTIN_WMACU,
22950 ARM_BUILTIN_WMACUZ,
22951
22952 ARM_BUILTIN_WSADB,
22953 ARM_BUILTIN_WSADBZ,
22954 ARM_BUILTIN_WSADH,
22955 ARM_BUILTIN_WSADHZ,
22956
22957 ARM_BUILTIN_WALIGNI,
22958 ARM_BUILTIN_WALIGNR0,
22959 ARM_BUILTIN_WALIGNR1,
22960 ARM_BUILTIN_WALIGNR2,
22961 ARM_BUILTIN_WALIGNR3,
22962
22963 ARM_BUILTIN_TMIA,
22964 ARM_BUILTIN_TMIAPH,
22965 ARM_BUILTIN_TMIABB,
22966 ARM_BUILTIN_TMIABT,
22967 ARM_BUILTIN_TMIATB,
22968 ARM_BUILTIN_TMIATT,
22969
22970 ARM_BUILTIN_TMOVMSKB,
22971 ARM_BUILTIN_TMOVMSKH,
22972 ARM_BUILTIN_TMOVMSKW,
22973
22974 ARM_BUILTIN_TBCSTB,
22975 ARM_BUILTIN_TBCSTH,
22976 ARM_BUILTIN_TBCSTW,
22977
22978 ARM_BUILTIN_WMADDS,
22979 ARM_BUILTIN_WMADDU,
22980
22981 ARM_BUILTIN_WPACKHSS,
22982 ARM_BUILTIN_WPACKWSS,
22983 ARM_BUILTIN_WPACKDSS,
22984 ARM_BUILTIN_WPACKHUS,
22985 ARM_BUILTIN_WPACKWUS,
22986 ARM_BUILTIN_WPACKDUS,
22987
22988 ARM_BUILTIN_WADDB,
22989 ARM_BUILTIN_WADDH,
22990 ARM_BUILTIN_WADDW,
22991 ARM_BUILTIN_WADDSSB,
22992 ARM_BUILTIN_WADDSSH,
22993 ARM_BUILTIN_WADDSSW,
22994 ARM_BUILTIN_WADDUSB,
22995 ARM_BUILTIN_WADDUSH,
22996 ARM_BUILTIN_WADDUSW,
22997 ARM_BUILTIN_WSUBB,
22998 ARM_BUILTIN_WSUBH,
22999 ARM_BUILTIN_WSUBW,
23000 ARM_BUILTIN_WSUBSSB,
23001 ARM_BUILTIN_WSUBSSH,
23002 ARM_BUILTIN_WSUBSSW,
23003 ARM_BUILTIN_WSUBUSB,
23004 ARM_BUILTIN_WSUBUSH,
23005 ARM_BUILTIN_WSUBUSW,
23006
23007 ARM_BUILTIN_WAND,
23008 ARM_BUILTIN_WANDN,
23009 ARM_BUILTIN_WOR,
23010 ARM_BUILTIN_WXOR,
23011
23012 ARM_BUILTIN_WCMPEQB,
23013 ARM_BUILTIN_WCMPEQH,
23014 ARM_BUILTIN_WCMPEQW,
23015 ARM_BUILTIN_WCMPGTUB,
23016 ARM_BUILTIN_WCMPGTUH,
23017 ARM_BUILTIN_WCMPGTUW,
23018 ARM_BUILTIN_WCMPGTSB,
23019 ARM_BUILTIN_WCMPGTSH,
23020 ARM_BUILTIN_WCMPGTSW,
23021
23022 ARM_BUILTIN_TEXTRMSB,
23023 ARM_BUILTIN_TEXTRMSH,
23024 ARM_BUILTIN_TEXTRMSW,
23025 ARM_BUILTIN_TEXTRMUB,
23026 ARM_BUILTIN_TEXTRMUH,
23027 ARM_BUILTIN_TEXTRMUW,
23028 ARM_BUILTIN_TINSRB,
23029 ARM_BUILTIN_TINSRH,
23030 ARM_BUILTIN_TINSRW,
23031
23032 ARM_BUILTIN_WMAXSW,
23033 ARM_BUILTIN_WMAXSH,
23034 ARM_BUILTIN_WMAXSB,
23035 ARM_BUILTIN_WMAXUW,
23036 ARM_BUILTIN_WMAXUH,
23037 ARM_BUILTIN_WMAXUB,
23038 ARM_BUILTIN_WMINSW,
23039 ARM_BUILTIN_WMINSH,
23040 ARM_BUILTIN_WMINSB,
23041 ARM_BUILTIN_WMINUW,
23042 ARM_BUILTIN_WMINUH,
23043 ARM_BUILTIN_WMINUB,
23044
23045 ARM_BUILTIN_WMULUM,
23046 ARM_BUILTIN_WMULSM,
23047 ARM_BUILTIN_WMULUL,
23048
23049 ARM_BUILTIN_PSADBH,
23050 ARM_BUILTIN_WSHUFH,
23051
23052 ARM_BUILTIN_WSLLH,
23053 ARM_BUILTIN_WSLLW,
23054 ARM_BUILTIN_WSLLD,
23055 ARM_BUILTIN_WSRAH,
23056 ARM_BUILTIN_WSRAW,
23057 ARM_BUILTIN_WSRAD,
23058 ARM_BUILTIN_WSRLH,
23059 ARM_BUILTIN_WSRLW,
23060 ARM_BUILTIN_WSRLD,
23061 ARM_BUILTIN_WRORH,
23062 ARM_BUILTIN_WRORW,
23063 ARM_BUILTIN_WRORD,
23064 ARM_BUILTIN_WSLLHI,
23065 ARM_BUILTIN_WSLLWI,
23066 ARM_BUILTIN_WSLLDI,
23067 ARM_BUILTIN_WSRAHI,
23068 ARM_BUILTIN_WSRAWI,
23069 ARM_BUILTIN_WSRADI,
23070 ARM_BUILTIN_WSRLHI,
23071 ARM_BUILTIN_WSRLWI,
23072 ARM_BUILTIN_WSRLDI,
23073 ARM_BUILTIN_WRORHI,
23074 ARM_BUILTIN_WRORWI,
23075 ARM_BUILTIN_WRORDI,
23076
23077 ARM_BUILTIN_WUNPCKIHB,
23078 ARM_BUILTIN_WUNPCKIHH,
23079 ARM_BUILTIN_WUNPCKIHW,
23080 ARM_BUILTIN_WUNPCKILB,
23081 ARM_BUILTIN_WUNPCKILH,
23082 ARM_BUILTIN_WUNPCKILW,
23083
23084 ARM_BUILTIN_WUNPCKEHSB,
23085 ARM_BUILTIN_WUNPCKEHSH,
23086 ARM_BUILTIN_WUNPCKEHSW,
23087 ARM_BUILTIN_WUNPCKEHUB,
23088 ARM_BUILTIN_WUNPCKEHUH,
23089 ARM_BUILTIN_WUNPCKEHUW,
23090 ARM_BUILTIN_WUNPCKELSB,
23091 ARM_BUILTIN_WUNPCKELSH,
23092 ARM_BUILTIN_WUNPCKELSW,
23093 ARM_BUILTIN_WUNPCKELUB,
23094 ARM_BUILTIN_WUNPCKELUH,
23095 ARM_BUILTIN_WUNPCKELUW,
23096
23097 ARM_BUILTIN_WABSB,
23098 ARM_BUILTIN_WABSH,
23099 ARM_BUILTIN_WABSW,
23100
23101 ARM_BUILTIN_WADDSUBHX,
23102 ARM_BUILTIN_WSUBADDHX,
23103
23104 ARM_BUILTIN_WABSDIFFB,
23105 ARM_BUILTIN_WABSDIFFH,
23106 ARM_BUILTIN_WABSDIFFW,
23107
23108 ARM_BUILTIN_WADDCH,
23109 ARM_BUILTIN_WADDCW,
23110
23111 ARM_BUILTIN_WAVG4,
23112 ARM_BUILTIN_WAVG4R,
23113
23114 ARM_BUILTIN_WMADDSX,
23115 ARM_BUILTIN_WMADDUX,
23116
23117 ARM_BUILTIN_WMADDSN,
23118 ARM_BUILTIN_WMADDUN,
23119
23120 ARM_BUILTIN_WMULWSM,
23121 ARM_BUILTIN_WMULWUM,
23122
23123 ARM_BUILTIN_WMULWSMR,
23124 ARM_BUILTIN_WMULWUMR,
23125
23126 ARM_BUILTIN_WMULWL,
23127
23128 ARM_BUILTIN_WMULSMR,
23129 ARM_BUILTIN_WMULUMR,
23130
23131 ARM_BUILTIN_WQMULM,
23132 ARM_BUILTIN_WQMULMR,
23133
23134 ARM_BUILTIN_WQMULWM,
23135 ARM_BUILTIN_WQMULWMR,
23136
23137 ARM_BUILTIN_WADDBHUSM,
23138 ARM_BUILTIN_WADDBHUSL,
23139
23140 ARM_BUILTIN_WQMIABB,
23141 ARM_BUILTIN_WQMIABT,
23142 ARM_BUILTIN_WQMIATB,
23143 ARM_BUILTIN_WQMIATT,
23144
23145 ARM_BUILTIN_WQMIABBN,
23146 ARM_BUILTIN_WQMIABTN,
23147 ARM_BUILTIN_WQMIATBN,
23148 ARM_BUILTIN_WQMIATTN,
23149
23150 ARM_BUILTIN_WMIABB,
23151 ARM_BUILTIN_WMIABT,
23152 ARM_BUILTIN_WMIATB,
23153 ARM_BUILTIN_WMIATT,
23154
23155 ARM_BUILTIN_WMIABBN,
23156 ARM_BUILTIN_WMIABTN,
23157 ARM_BUILTIN_WMIATBN,
23158 ARM_BUILTIN_WMIATTN,
23159
23160 ARM_BUILTIN_WMIAWBB,
23161 ARM_BUILTIN_WMIAWBT,
23162 ARM_BUILTIN_WMIAWTB,
23163 ARM_BUILTIN_WMIAWTT,
23164
23165 ARM_BUILTIN_WMIAWBBN,
23166 ARM_BUILTIN_WMIAWBTN,
23167 ARM_BUILTIN_WMIAWTBN,
23168 ARM_BUILTIN_WMIAWTTN,
23169
23170 ARM_BUILTIN_WMERGE,
23171
23172 ARM_BUILTIN_CRC32B,
23173 ARM_BUILTIN_CRC32H,
23174 ARM_BUILTIN_CRC32W,
23175 ARM_BUILTIN_CRC32CB,
23176 ARM_BUILTIN_CRC32CH,
23177 ARM_BUILTIN_CRC32CW,
23178
23179 #undef CRYPTO1
23180 #undef CRYPTO2
23181 #undef CRYPTO3
23182
23183 #define CRYPTO1(L, U, M1, M2) \
23184 ARM_BUILTIN_CRYPTO_##U,
23185 #define CRYPTO2(L, U, M1, M2, M3) \
23186 ARM_BUILTIN_CRYPTO_##U,
23187 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23188 ARM_BUILTIN_CRYPTO_##U,
23189
23190 #include "crypto.def"
23191
23192 #undef CRYPTO1
23193 #undef CRYPTO2
23194 #undef CRYPTO3
23195
23196 #include "arm_neon_builtins.def"
23197
23198 ,ARM_BUILTIN_MAX
23199 };
23200
23201 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23202
23203 #undef CF
23204 #undef VAR1
23205 #undef VAR2
23206 #undef VAR3
23207 #undef VAR4
23208 #undef VAR5
23209 #undef VAR6
23210 #undef VAR7
23211 #undef VAR8
23212 #undef VAR9
23213 #undef VAR10
23214
23215 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
23216
23217 #define NUM_DREG_TYPES 5
23218 #define NUM_QREG_TYPES 6
23219
23220 static void
23221 arm_init_neon_builtins (void)
23222 {
23223 unsigned int i, fcode;
23224 tree decl;
23225
23226 tree neon_intQI_type_node;
23227 tree neon_intHI_type_node;
23228 tree neon_floatHF_type_node;
23229 tree neon_polyQI_type_node;
23230 tree neon_polyHI_type_node;
23231 tree neon_intSI_type_node;
23232 tree neon_intDI_type_node;
23233 tree neon_intUTI_type_node;
23234 tree neon_float_type_node;
23235
23236 tree intQI_pointer_node;
23237 tree intHI_pointer_node;
23238 tree intSI_pointer_node;
23239 tree intDI_pointer_node;
23240 tree float_pointer_node;
23241
23242 tree const_intQI_node;
23243 tree const_intHI_node;
23244 tree const_intSI_node;
23245 tree const_intDI_node;
23246 tree const_float_node;
23247
23248 tree const_intQI_pointer_node;
23249 tree const_intHI_pointer_node;
23250 tree const_intSI_pointer_node;
23251 tree const_intDI_pointer_node;
23252 tree const_float_pointer_node;
23253
23254 tree V8QI_type_node;
23255 tree V4HI_type_node;
23256 tree V4HF_type_node;
23257 tree V2SI_type_node;
23258 tree V2SF_type_node;
23259 tree V16QI_type_node;
23260 tree V8HI_type_node;
23261 tree V4SI_type_node;
23262 tree V4SF_type_node;
23263 tree V2DI_type_node;
23264
23265 tree intUQI_type_node;
23266 tree intUHI_type_node;
23267 tree intUSI_type_node;
23268 tree intUDI_type_node;
23269
23270 tree intEI_type_node;
23271 tree intOI_type_node;
23272 tree intCI_type_node;
23273 tree intXI_type_node;
23274
23275 tree V8QI_pointer_node;
23276 tree V4HI_pointer_node;
23277 tree V2SI_pointer_node;
23278 tree V2SF_pointer_node;
23279 tree V16QI_pointer_node;
23280 tree V8HI_pointer_node;
23281 tree V4SI_pointer_node;
23282 tree V4SF_pointer_node;
23283 tree V2DI_pointer_node;
23284
23285 tree void_ftype_pv8qi_v8qi_v8qi;
23286 tree void_ftype_pv4hi_v4hi_v4hi;
23287 tree void_ftype_pv2si_v2si_v2si;
23288 tree void_ftype_pv2sf_v2sf_v2sf;
23289 tree void_ftype_pdi_di_di;
23290 tree void_ftype_pv16qi_v16qi_v16qi;
23291 tree void_ftype_pv8hi_v8hi_v8hi;
23292 tree void_ftype_pv4si_v4si_v4si;
23293 tree void_ftype_pv4sf_v4sf_v4sf;
23294 tree void_ftype_pv2di_v2di_v2di;
23295
23296 tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
23297 tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
23298 tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
23299
23300 /* Create distinguished type nodes for NEON vector element types,
23301 and pointers to values of such types, so we can detect them later. */
23302 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23303 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23304 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23305 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23306 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
23307 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
23308 neon_float_type_node = make_node (REAL_TYPE);
23309 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
23310 layout_type (neon_float_type_node);
23311 neon_floatHF_type_node = make_node (REAL_TYPE);
23312 TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
23313 layout_type (neon_floatHF_type_node);
23314
23315 /* Define typedefs which exactly correspond to the modes we are basing vector
23316 types on. If you change these names you'll need to change
23317 the table used by arm_mangle_type too. */
23318 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
23319 "__builtin_neon_qi");
23320 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
23321 "__builtin_neon_hi");
23322 (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
23323 "__builtin_neon_hf");
23324 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
23325 "__builtin_neon_si");
23326 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
23327 "__builtin_neon_sf");
23328 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
23329 "__builtin_neon_di");
23330 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
23331 "__builtin_neon_poly8");
23332 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
23333 "__builtin_neon_poly16");
23334
23335 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
23336 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
23337 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
23338 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
23339 float_pointer_node = build_pointer_type (neon_float_type_node);
23340
23341 /* Next create constant-qualified versions of the above types. */
23342 const_intQI_node = build_qualified_type (neon_intQI_type_node,
23343 TYPE_QUAL_CONST);
23344 const_intHI_node = build_qualified_type (neon_intHI_type_node,
23345 TYPE_QUAL_CONST);
23346 const_intSI_node = build_qualified_type (neon_intSI_type_node,
23347 TYPE_QUAL_CONST);
23348 const_intDI_node = build_qualified_type (neon_intDI_type_node,
23349 TYPE_QUAL_CONST);
23350 const_float_node = build_qualified_type (neon_float_type_node,
23351 TYPE_QUAL_CONST);
23352
23353 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
23354 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
23355 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
23356 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
23357 const_float_pointer_node = build_pointer_type (const_float_node);
23358
23359 /* Now create vector types based on our NEON element types. */
23360 /* 64-bit vectors. */
23361 V8QI_type_node =
23362 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
23363 V4HI_type_node =
23364 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
23365 V4HF_type_node =
23366 build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
23367 V2SI_type_node =
23368 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
23369 V2SF_type_node =
23370 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
23371 /* 128-bit vectors. */
23372 V16QI_type_node =
23373 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
23374 V8HI_type_node =
23375 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
23376 V4SI_type_node =
23377 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
23378 V4SF_type_node =
23379 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
23380 V2DI_type_node =
23381 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
23382
23383 /* Unsigned integer types for various mode sizes. */
23384 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
23385 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
23386 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
23387 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
23388 neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
23389
23390
23391 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
23392 "__builtin_neon_uqi");
23393 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
23394 "__builtin_neon_uhi");
23395 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
23396 "__builtin_neon_usi");
23397 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23398 "__builtin_neon_udi");
23399 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23400 "__builtin_neon_poly64");
23401 (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node,
23402 "__builtin_neon_poly128");
23403
23404 /* Opaque integer types for structures of vectors. */
23405 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
23406 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
23407 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
23408 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
23409
23410 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
23411 "__builtin_neon_ti");
23412 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
23413 "__builtin_neon_ei");
23414 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
23415 "__builtin_neon_oi");
23416 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
23417 "__builtin_neon_ci");
23418 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
23419 "__builtin_neon_xi");
23420
23421 /* Pointers to vector types. */
23422 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
23423 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
23424 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
23425 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
23426 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
23427 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
23428 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
23429 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
23430 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
23431
23432 /* Operations which return results as pairs. */
23433 void_ftype_pv8qi_v8qi_v8qi =
23434 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
23435 V8QI_type_node, NULL);
23436 void_ftype_pv4hi_v4hi_v4hi =
23437 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
23438 V4HI_type_node, NULL);
23439 void_ftype_pv2si_v2si_v2si =
23440 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
23441 V2SI_type_node, NULL);
23442 void_ftype_pv2sf_v2sf_v2sf =
23443 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
23444 V2SF_type_node, NULL);
23445 void_ftype_pdi_di_di =
23446 build_function_type_list (void_type_node, intDI_pointer_node,
23447 neon_intDI_type_node, neon_intDI_type_node, NULL);
23448 void_ftype_pv16qi_v16qi_v16qi =
23449 build_function_type_list (void_type_node, V16QI_pointer_node,
23450 V16QI_type_node, V16QI_type_node, NULL);
23451 void_ftype_pv8hi_v8hi_v8hi =
23452 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
23453 V8HI_type_node, NULL);
23454 void_ftype_pv4si_v4si_v4si =
23455 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
23456 V4SI_type_node, NULL);
23457 void_ftype_pv4sf_v4sf_v4sf =
23458 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
23459 V4SF_type_node, NULL);
23460 void_ftype_pv2di_v2di_v2di =
23461 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
23462 V2DI_type_node, NULL);
23463
23464 if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
23465 {
23466 tree V4USI_type_node =
23467 build_vector_type_for_mode (intUSI_type_node, V4SImode);
23468
23469 tree V16UQI_type_node =
23470 build_vector_type_for_mode (intUQI_type_node, V16QImode);
23471
23472 tree v16uqi_ftype_v16uqi
23473 = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE);
23474
23475 tree v16uqi_ftype_v16uqi_v16uqi
23476 = build_function_type_list (V16UQI_type_node, V16UQI_type_node,
23477 V16UQI_type_node, NULL_TREE);
23478
23479 tree v4usi_ftype_v4usi
23480 = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE);
23481
23482 tree v4usi_ftype_v4usi_v4usi
23483 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23484 V4USI_type_node, NULL_TREE);
23485
23486 tree v4usi_ftype_v4usi_v4usi_v4usi
23487 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23488 V4USI_type_node, V4USI_type_node, NULL_TREE);
23489
23490 tree uti_ftype_udi_udi
23491 = build_function_type_list (neon_intUTI_type_node, intUDI_type_node,
23492 intUDI_type_node, NULL_TREE);
23493
23494 #undef CRYPTO1
23495 #undef CRYPTO2
23496 #undef CRYPTO3
23497 #undef C
23498 #undef N
23499 #undef CF
23500 #undef FT1
23501 #undef FT2
23502 #undef FT3
23503
23504 #define C(U) \
23505 ARM_BUILTIN_CRYPTO_##U
23506 #define N(L) \
23507 "__builtin_arm_crypto_"#L
23508 #define FT1(R, A) \
23509 R##_ftype_##A
23510 #define FT2(R, A1, A2) \
23511 R##_ftype_##A1##_##A2
23512 #define FT3(R, A1, A2, A3) \
23513 R##_ftype_##A1##_##A2##_##A3
23514 #define CRYPTO1(L, U, R, A) \
23515 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23516 C (U), BUILT_IN_MD, \
23517 NULL, NULL_TREE);
23518 #define CRYPTO2(L, U, R, A1, A2) \
23519 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23520 C (U), BUILT_IN_MD, \
23521 NULL, NULL_TREE);
23522
23523 #define CRYPTO3(L, U, R, A1, A2, A3) \
23524 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23525 C (U), BUILT_IN_MD, \
23526 NULL, NULL_TREE);
23527 #include "crypto.def"
23528
23529 #undef CRYPTO1
23530 #undef CRYPTO2
23531 #undef CRYPTO3
23532 #undef C
23533 #undef N
23534 #undef FT1
23535 #undef FT2
23536 #undef FT3
23537 }
23538 dreg_types[0] = V8QI_type_node;
23539 dreg_types[1] = V4HI_type_node;
23540 dreg_types[2] = V2SI_type_node;
23541 dreg_types[3] = V2SF_type_node;
23542 dreg_types[4] = neon_intDI_type_node;
23543
23544 qreg_types[0] = V16QI_type_node;
23545 qreg_types[1] = V8HI_type_node;
23546 qreg_types[2] = V4SI_type_node;
23547 qreg_types[3] = V4SF_type_node;
23548 qreg_types[4] = V2DI_type_node;
23549 qreg_types[5] = neon_intUTI_type_node;
23550
23551 for (i = 0; i < NUM_QREG_TYPES; i++)
23552 {
23553 int j;
23554 for (j = 0; j < NUM_QREG_TYPES; j++)
23555 {
23556 if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES)
23557 reinterp_ftype_dreg[i][j]
23558 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
23559
23560 reinterp_ftype_qreg[i][j]
23561 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
23562 }
23563 }
23564
23565 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
23566 i < ARRAY_SIZE (neon_builtin_data);
23567 i++, fcode++)
23568 {
23569 neon_builtin_datum *d = &neon_builtin_data[i];
23570
23571 const char* const modenames[] = {
23572 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
23573 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
23574 "ti", "ei", "oi"
23575 };
23576 char namebuf[60];
23577 tree ftype = NULL;
23578 int is_load = 0, is_store = 0;
23579
23580 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
23581
23582 d->fcode = fcode;
23583
23584 switch (d->itype)
23585 {
23586 case NEON_LOAD1:
23587 case NEON_LOAD1LANE:
23588 case NEON_LOADSTRUCT:
23589 case NEON_LOADSTRUCTLANE:
23590 is_load = 1;
23591 /* Fall through. */
23592 case NEON_STORE1:
23593 case NEON_STORE1LANE:
23594 case NEON_STORESTRUCT:
23595 case NEON_STORESTRUCTLANE:
23596 if (!is_load)
23597 is_store = 1;
23598 /* Fall through. */
23599 case NEON_UNOP:
23600 case NEON_RINT:
23601 case NEON_BINOP:
23602 case NEON_LOGICBINOP:
23603 case NEON_SHIFTINSERT:
23604 case NEON_TERNOP:
23605 case NEON_GETLANE:
23606 case NEON_SETLANE:
23607 case NEON_CREATE:
23608 case NEON_DUP:
23609 case NEON_DUPLANE:
23610 case NEON_SHIFTIMM:
23611 case NEON_SHIFTACC:
23612 case NEON_COMBINE:
23613 case NEON_SPLIT:
23614 case NEON_CONVERT:
23615 case NEON_FIXCONV:
23616 case NEON_LANEMUL:
23617 case NEON_LANEMULL:
23618 case NEON_LANEMULH:
23619 case NEON_LANEMAC:
23620 case NEON_SCALARMUL:
23621 case NEON_SCALARMULL:
23622 case NEON_SCALARMULH:
23623 case NEON_SCALARMAC:
23624 case NEON_SELECT:
23625 case NEON_VTBL:
23626 case NEON_VTBX:
23627 {
23628 int k;
23629 tree return_type = void_type_node, args = void_list_node;
23630
23631 /* Build a function type directly from the insn_data for
23632 this builtin. The build_function_type() function takes
23633 care of removing duplicates for us. */
23634 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
23635 {
23636 tree eltype;
23637
23638 if (is_load && k == 1)
23639 {
23640 /* Neon load patterns always have the memory
23641 operand in the operand 1 position. */
23642 gcc_assert (insn_data[d->code].operand[k].predicate
23643 == neon_struct_operand);
23644
23645 switch (d->mode)
23646 {
23647 case T_V8QI:
23648 case T_V16QI:
23649 eltype = const_intQI_pointer_node;
23650 break;
23651
23652 case T_V4HI:
23653 case T_V8HI:
23654 eltype = const_intHI_pointer_node;
23655 break;
23656
23657 case T_V2SI:
23658 case T_V4SI:
23659 eltype = const_intSI_pointer_node;
23660 break;
23661
23662 case T_V2SF:
23663 case T_V4SF:
23664 eltype = const_float_pointer_node;
23665 break;
23666
23667 case T_DI:
23668 case T_V2DI:
23669 eltype = const_intDI_pointer_node;
23670 break;
23671
23672 default: gcc_unreachable ();
23673 }
23674 }
23675 else if (is_store && k == 0)
23676 {
23677 /* Similarly, Neon store patterns use operand 0 as
23678 the memory location to store to. */
23679 gcc_assert (insn_data[d->code].operand[k].predicate
23680 == neon_struct_operand);
23681
23682 switch (d->mode)
23683 {
23684 case T_V8QI:
23685 case T_V16QI:
23686 eltype = intQI_pointer_node;
23687 break;
23688
23689 case T_V4HI:
23690 case T_V8HI:
23691 eltype = intHI_pointer_node;
23692 break;
23693
23694 case T_V2SI:
23695 case T_V4SI:
23696 eltype = intSI_pointer_node;
23697 break;
23698
23699 case T_V2SF:
23700 case T_V4SF:
23701 eltype = float_pointer_node;
23702 break;
23703
23704 case T_DI:
23705 case T_V2DI:
23706 eltype = intDI_pointer_node;
23707 break;
23708
23709 default: gcc_unreachable ();
23710 }
23711 }
23712 else
23713 {
23714 switch (insn_data[d->code].operand[k].mode)
23715 {
23716 case VOIDmode: eltype = void_type_node; break;
23717 /* Scalars. */
23718 case QImode: eltype = neon_intQI_type_node; break;
23719 case HImode: eltype = neon_intHI_type_node; break;
23720 case SImode: eltype = neon_intSI_type_node; break;
23721 case SFmode: eltype = neon_float_type_node; break;
23722 case DImode: eltype = neon_intDI_type_node; break;
23723 case TImode: eltype = intTI_type_node; break;
23724 case EImode: eltype = intEI_type_node; break;
23725 case OImode: eltype = intOI_type_node; break;
23726 case CImode: eltype = intCI_type_node; break;
23727 case XImode: eltype = intXI_type_node; break;
23728 /* 64-bit vectors. */
23729 case V8QImode: eltype = V8QI_type_node; break;
23730 case V4HImode: eltype = V4HI_type_node; break;
23731 case V2SImode: eltype = V2SI_type_node; break;
23732 case V2SFmode: eltype = V2SF_type_node; break;
23733 /* 128-bit vectors. */
23734 case V16QImode: eltype = V16QI_type_node; break;
23735 case V8HImode: eltype = V8HI_type_node; break;
23736 case V4SImode: eltype = V4SI_type_node; break;
23737 case V4SFmode: eltype = V4SF_type_node; break;
23738 case V2DImode: eltype = V2DI_type_node; break;
23739 default: gcc_unreachable ();
23740 }
23741 }
23742
23743 if (k == 0 && !is_store)
23744 return_type = eltype;
23745 else
23746 args = tree_cons (NULL_TREE, eltype, args);
23747 }
23748
23749 ftype = build_function_type (return_type, args);
23750 }
23751 break;
23752
23753 case NEON_RESULTPAIR:
23754 {
23755 switch (insn_data[d->code].operand[1].mode)
23756 {
23757 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
23758 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
23759 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
23760 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
23761 case DImode: ftype = void_ftype_pdi_di_di; break;
23762 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
23763 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
23764 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
23765 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
23766 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
23767 default: gcc_unreachable ();
23768 }
23769 }
23770 break;
23771
23772 case NEON_REINTERP:
23773 {
23774 /* We iterate over NUM_DREG_TYPES doubleword types,
23775 then NUM_QREG_TYPES quadword types.
23776 V4HF is not a type used in reinterpret, so we translate
23777 d->mode to the correct index in reinterp_ftype_dreg. */
23778 bool qreg_p
23779 = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8;
23780 int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0))
23781 % NUM_QREG_TYPES;
23782 switch (insn_data[d->code].operand[0].mode)
23783 {
23784 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
23785 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
23786 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
23787 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
23788 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
23789 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
23790 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
23791 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
23792 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
23793 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
23794 case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break;
23795 default: gcc_unreachable ();
23796 }
23797 }
23798 break;
23799 case NEON_FLOAT_WIDEN:
23800 {
23801 tree eltype = NULL_TREE;
23802 tree return_type = NULL_TREE;
23803
23804 switch (insn_data[d->code].operand[1].mode)
23805 {
23806 case V4HFmode:
23807 eltype = V4HF_type_node;
23808 return_type = V4SF_type_node;
23809 break;
23810 default: gcc_unreachable ();
23811 }
23812 ftype = build_function_type_list (return_type, eltype, NULL);
23813 break;
23814 }
23815 case NEON_FLOAT_NARROW:
23816 {
23817 tree eltype = NULL_TREE;
23818 tree return_type = NULL_TREE;
23819
23820 switch (insn_data[d->code].operand[1].mode)
23821 {
23822 case V4SFmode:
23823 eltype = V4SF_type_node;
23824 return_type = V4HF_type_node;
23825 break;
23826 default: gcc_unreachable ();
23827 }
23828 ftype = build_function_type_list (return_type, eltype, NULL);
23829 break;
23830 }
23831 default:
23832 gcc_unreachable ();
23833 }
23834
23835 gcc_assert (ftype != NULL);
23836
23837 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
23838
23839 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
23840 NULL_TREE);
23841 arm_builtin_decls[fcode] = decl;
23842 }
23843 }
23844
23845 #undef NUM_DREG_TYPES
23846 #undef NUM_QREG_TYPES
23847
23848 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
23849 do \
23850 { \
23851 if ((MASK) & insn_flags) \
23852 { \
23853 tree bdecl; \
23854 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
23855 BUILT_IN_MD, NULL, NULL_TREE); \
23856 arm_builtin_decls[CODE] = bdecl; \
23857 } \
23858 } \
23859 while (0)
23860
23861 struct builtin_description
23862 {
23863 const unsigned int mask;
23864 const enum insn_code icode;
23865 const char * const name;
23866 const enum arm_builtins code;
23867 const enum rtx_code comparison;
23868 const unsigned int flag;
23869 };
23870
23871 static const struct builtin_description bdesc_2arg[] =
23872 {
23873 #define IWMMXT_BUILTIN(code, string, builtin) \
23874 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
23875 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23876
23877 #define IWMMXT2_BUILTIN(code, string, builtin) \
23878 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
23879 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23880
23881 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
23882 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
23883 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
23884 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
23885 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
23886 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
23887 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
23888 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
23889 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
23890 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
23891 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
23892 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
23893 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
23894 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
23895 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
23896 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
23897 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
23898 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
23899 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
23900 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
23901 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
23902 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
23903 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
23904 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
23905 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
23906 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
23907 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
23908 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
23909 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
23910 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
23911 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
23912 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
23913 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
23914 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
23915 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
23916 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
23917 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
23918 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
23919 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
23920 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
23921 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
23922 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
23923 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
23924 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
23925 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
23926 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
23927 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
23928 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
23929 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
23930 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
23931 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
23932 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
23933 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
23934 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
23935 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
23936 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
23937 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
23938 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
23939 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
23940 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
23941 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
23942 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
23943 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
23944 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
23945 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
23946 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
23947 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
23948 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
23949 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
23950 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
23951 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
23952 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
23953 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
23954 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
23955 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
23956 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
23957 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
23958 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
23959
23960 #define IWMMXT_BUILTIN2(code, builtin) \
23961 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23962
23963 #define IWMMXT2_BUILTIN2(code, builtin) \
23964 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23965
23966 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
23967 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
23968 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
23969 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
23970 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
23971 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
23972 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
23973 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
23974 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
23975 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
23976
23977 #define CRC32_BUILTIN(L, U) \
23978 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
23979 UNKNOWN, 0},
23980 CRC32_BUILTIN (crc32b, CRC32B)
23981 CRC32_BUILTIN (crc32h, CRC32H)
23982 CRC32_BUILTIN (crc32w, CRC32W)
23983 CRC32_BUILTIN (crc32cb, CRC32CB)
23984 CRC32_BUILTIN (crc32ch, CRC32CH)
23985 CRC32_BUILTIN (crc32cw, CRC32CW)
23986 #undef CRC32_BUILTIN
23987
23988
23989 #define CRYPTO_BUILTIN(L, U) \
23990 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
23991 UNKNOWN, 0},
23992 #undef CRYPTO1
23993 #undef CRYPTO2
23994 #undef CRYPTO3
23995 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
23996 #define CRYPTO1(L, U, R, A)
23997 #define CRYPTO3(L, U, R, A1, A2, A3)
23998 #include "crypto.def"
23999 #undef CRYPTO1
24000 #undef CRYPTO2
24001 #undef CRYPTO3
24002
24003 };
24004
24005 static const struct builtin_description bdesc_1arg[] =
24006 {
24007 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
24008 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
24009 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
24010 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
24011 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
24012 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
24013 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
24014 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
24015 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
24016 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
24017 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
24018 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
24019 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
24020 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
24021 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
24022 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
24023 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
24024 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
24025 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
24026 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
24027 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
24028 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
24029 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
24030 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
24031
24032 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24033 #define CRYPTO2(L, U, R, A1, A2)
24034 #define CRYPTO3(L, U, R, A1, A2, A3)
24035 #include "crypto.def"
24036 #undef CRYPTO1
24037 #undef CRYPTO2
24038 #undef CRYPTO3
24039 };
24040
24041 static const struct builtin_description bdesc_3arg[] =
24042 {
24043 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24044 #define CRYPTO1(L, U, R, A)
24045 #define CRYPTO2(L, U, R, A1, A2)
24046 #include "crypto.def"
24047 #undef CRYPTO1
24048 #undef CRYPTO2
24049 #undef CRYPTO3
24050 };
24051 #undef CRYPTO_BUILTIN
24052
24053 /* Set up all the iWMMXt builtins. This is not called if
24054 TARGET_IWMMXT is zero. */
24055
24056 static void
24057 arm_init_iwmmxt_builtins (void)
24058 {
24059 const struct builtin_description * d;
24060 size_t i;
24061
24062 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
24063 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
24064 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
24065
24066 tree v8qi_ftype_v8qi_v8qi_int
24067 = build_function_type_list (V8QI_type_node,
24068 V8QI_type_node, V8QI_type_node,
24069 integer_type_node, NULL_TREE);
24070 tree v4hi_ftype_v4hi_int
24071 = build_function_type_list (V4HI_type_node,
24072 V4HI_type_node, integer_type_node, NULL_TREE);
24073 tree v2si_ftype_v2si_int
24074 = build_function_type_list (V2SI_type_node,
24075 V2SI_type_node, integer_type_node, NULL_TREE);
24076 tree v2si_ftype_di_di
24077 = build_function_type_list (V2SI_type_node,
24078 long_long_integer_type_node,
24079 long_long_integer_type_node,
24080 NULL_TREE);
24081 tree di_ftype_di_int
24082 = build_function_type_list (long_long_integer_type_node,
24083 long_long_integer_type_node,
24084 integer_type_node, NULL_TREE);
24085 tree di_ftype_di_int_int
24086 = build_function_type_list (long_long_integer_type_node,
24087 long_long_integer_type_node,
24088 integer_type_node,
24089 integer_type_node, NULL_TREE);
24090 tree int_ftype_v8qi
24091 = build_function_type_list (integer_type_node,
24092 V8QI_type_node, NULL_TREE);
24093 tree int_ftype_v4hi
24094 = build_function_type_list (integer_type_node,
24095 V4HI_type_node, NULL_TREE);
24096 tree int_ftype_v2si
24097 = build_function_type_list (integer_type_node,
24098 V2SI_type_node, NULL_TREE);
24099 tree int_ftype_v8qi_int
24100 = build_function_type_list (integer_type_node,
24101 V8QI_type_node, integer_type_node, NULL_TREE);
24102 tree int_ftype_v4hi_int
24103 = build_function_type_list (integer_type_node,
24104 V4HI_type_node, integer_type_node, NULL_TREE);
24105 tree int_ftype_v2si_int
24106 = build_function_type_list (integer_type_node,
24107 V2SI_type_node, integer_type_node, NULL_TREE);
24108 tree v8qi_ftype_v8qi_int_int
24109 = build_function_type_list (V8QI_type_node,
24110 V8QI_type_node, integer_type_node,
24111 integer_type_node, NULL_TREE);
24112 tree v4hi_ftype_v4hi_int_int
24113 = build_function_type_list (V4HI_type_node,
24114 V4HI_type_node, integer_type_node,
24115 integer_type_node, NULL_TREE);
24116 tree v2si_ftype_v2si_int_int
24117 = build_function_type_list (V2SI_type_node,
24118 V2SI_type_node, integer_type_node,
24119 integer_type_node, NULL_TREE);
24120 /* Miscellaneous. */
24121 tree v8qi_ftype_v4hi_v4hi
24122 = build_function_type_list (V8QI_type_node,
24123 V4HI_type_node, V4HI_type_node, NULL_TREE);
24124 tree v4hi_ftype_v2si_v2si
24125 = build_function_type_list (V4HI_type_node,
24126 V2SI_type_node, V2SI_type_node, NULL_TREE);
24127 tree v8qi_ftype_v4hi_v8qi
24128 = build_function_type_list (V8QI_type_node,
24129 V4HI_type_node, V8QI_type_node, NULL_TREE);
24130 tree v2si_ftype_v4hi_v4hi
24131 = build_function_type_list (V2SI_type_node,
24132 V4HI_type_node, V4HI_type_node, NULL_TREE);
24133 tree v2si_ftype_v8qi_v8qi
24134 = build_function_type_list (V2SI_type_node,
24135 V8QI_type_node, V8QI_type_node, NULL_TREE);
24136 tree v4hi_ftype_v4hi_di
24137 = build_function_type_list (V4HI_type_node,
24138 V4HI_type_node, long_long_integer_type_node,
24139 NULL_TREE);
24140 tree v2si_ftype_v2si_di
24141 = build_function_type_list (V2SI_type_node,
24142 V2SI_type_node, long_long_integer_type_node,
24143 NULL_TREE);
24144 tree di_ftype_void
24145 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
24146 tree int_ftype_void
24147 = build_function_type_list (integer_type_node, NULL_TREE);
24148 tree di_ftype_v8qi
24149 = build_function_type_list (long_long_integer_type_node,
24150 V8QI_type_node, NULL_TREE);
24151 tree di_ftype_v4hi
24152 = build_function_type_list (long_long_integer_type_node,
24153 V4HI_type_node, NULL_TREE);
24154 tree di_ftype_v2si
24155 = build_function_type_list (long_long_integer_type_node,
24156 V2SI_type_node, NULL_TREE);
24157 tree v2si_ftype_v4hi
24158 = build_function_type_list (V2SI_type_node,
24159 V4HI_type_node, NULL_TREE);
24160 tree v4hi_ftype_v8qi
24161 = build_function_type_list (V4HI_type_node,
24162 V8QI_type_node, NULL_TREE);
24163 tree v8qi_ftype_v8qi
24164 = build_function_type_list (V8QI_type_node,
24165 V8QI_type_node, NULL_TREE);
24166 tree v4hi_ftype_v4hi
24167 = build_function_type_list (V4HI_type_node,
24168 V4HI_type_node, NULL_TREE);
24169 tree v2si_ftype_v2si
24170 = build_function_type_list (V2SI_type_node,
24171 V2SI_type_node, NULL_TREE);
24172
24173 tree di_ftype_di_v4hi_v4hi
24174 = build_function_type_list (long_long_unsigned_type_node,
24175 long_long_unsigned_type_node,
24176 V4HI_type_node, V4HI_type_node,
24177 NULL_TREE);
24178
24179 tree di_ftype_v4hi_v4hi
24180 = build_function_type_list (long_long_unsigned_type_node,
24181 V4HI_type_node,V4HI_type_node,
24182 NULL_TREE);
24183
24184 tree v2si_ftype_v2si_v4hi_v4hi
24185 = build_function_type_list (V2SI_type_node,
24186 V2SI_type_node, V4HI_type_node,
24187 V4HI_type_node, NULL_TREE);
24188
24189 tree v2si_ftype_v2si_v8qi_v8qi
24190 = build_function_type_list (V2SI_type_node,
24191 V2SI_type_node, V8QI_type_node,
24192 V8QI_type_node, NULL_TREE);
24193
24194 tree di_ftype_di_v2si_v2si
24195 = build_function_type_list (long_long_unsigned_type_node,
24196 long_long_unsigned_type_node,
24197 V2SI_type_node, V2SI_type_node,
24198 NULL_TREE);
24199
24200 tree di_ftype_di_di_int
24201 = build_function_type_list (long_long_unsigned_type_node,
24202 long_long_unsigned_type_node,
24203 long_long_unsigned_type_node,
24204 integer_type_node, NULL_TREE);
24205
24206 tree void_ftype_int
24207 = build_function_type_list (void_type_node,
24208 integer_type_node, NULL_TREE);
24209
24210 tree v8qi_ftype_char
24211 = build_function_type_list (V8QI_type_node,
24212 signed_char_type_node, NULL_TREE);
24213
24214 tree v4hi_ftype_short
24215 = build_function_type_list (V4HI_type_node,
24216 short_integer_type_node, NULL_TREE);
24217
24218 tree v2si_ftype_int
24219 = build_function_type_list (V2SI_type_node,
24220 integer_type_node, NULL_TREE);
24221
24222 /* Normal vector binops. */
24223 tree v8qi_ftype_v8qi_v8qi
24224 = build_function_type_list (V8QI_type_node,
24225 V8QI_type_node, V8QI_type_node, NULL_TREE);
24226 tree v4hi_ftype_v4hi_v4hi
24227 = build_function_type_list (V4HI_type_node,
24228 V4HI_type_node,V4HI_type_node, NULL_TREE);
24229 tree v2si_ftype_v2si_v2si
24230 = build_function_type_list (V2SI_type_node,
24231 V2SI_type_node, V2SI_type_node, NULL_TREE);
24232 tree di_ftype_di_di
24233 = build_function_type_list (long_long_unsigned_type_node,
24234 long_long_unsigned_type_node,
24235 long_long_unsigned_type_node,
24236 NULL_TREE);
24237
24238 /* Add all builtins that are more or less simple operations on two
24239 operands. */
24240 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
24241 {
24242 /* Use one of the operands; the target can have a different mode for
24243 mask-generating compares. */
24244 enum machine_mode mode;
24245 tree type;
24246
24247 if (d->name == 0)
24248 continue;
24249
24250 mode = insn_data[d->icode].operand[1].mode;
24251
24252 switch (mode)
24253 {
24254 case V8QImode:
24255 type = v8qi_ftype_v8qi_v8qi;
24256 break;
24257 case V4HImode:
24258 type = v4hi_ftype_v4hi_v4hi;
24259 break;
24260 case V2SImode:
24261 type = v2si_ftype_v2si_v2si;
24262 break;
24263 case DImode:
24264 type = di_ftype_di_di;
24265 break;
24266
24267 default:
24268 gcc_unreachable ();
24269 }
24270
24271 def_mbuiltin (d->mask, d->name, type, d->code);
24272 }
24273
24274 /* Add the remaining MMX insns with somewhat more complicated types. */
24275 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24276 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24277 ARM_BUILTIN_ ## CODE)
24278
24279 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24280 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24281 ARM_BUILTIN_ ## CODE)
24282
24283 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
24284 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
24285 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
24286 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
24287 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
24288 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
24289 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
24290 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
24291 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
24292
24293 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
24294 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
24295 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
24296 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
24297 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
24298 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
24299
24300 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
24301 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
24302 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
24303 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
24304 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
24305 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
24306
24307 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
24308 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
24309 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
24310 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
24311 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
24312 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
24313
24314 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
24315 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
24316 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
24317 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
24318 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
24319 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
24320
24321 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
24322
24323 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
24324 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
24325 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
24326 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
24327 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
24328 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
24329 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
24330 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
24331 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
24332 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
24333
24334 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
24335 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
24336 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
24337 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
24338 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
24339 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
24340 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
24341 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
24342 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
24343
24344 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
24345 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
24346 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
24347
24348 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
24349 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
24350 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
24351
24352 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
24353 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
24354
24355 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
24356 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
24357 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
24358 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
24359 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
24360 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
24361
24362 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
24363 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
24364 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
24365 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
24366 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
24367 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
24368 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
24369 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
24370 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
24371 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
24372 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
24373 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
24374
24375 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
24376 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
24377 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
24378 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
24379
24380 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
24381 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
24382 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
24383 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
24384 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
24385 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
24386 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
24387
24388 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
24389 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
24390 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
24391
24392 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
24393 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
24394 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
24395 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
24396
24397 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
24398 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
24399 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
24400 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
24401
24402 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
24403 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
24404 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
24405 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
24406
24407 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
24408 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
24409 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
24410 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
24411
24412 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
24413 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
24414 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
24415 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
24416
24417 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
24418 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
24419 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
24420 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
24421
24422 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
24423
24424 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
24425 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
24426 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
24427
24428 #undef iwmmx_mbuiltin
24429 #undef iwmmx2_mbuiltin
24430 }
24431
24432 static void
24433 arm_init_fp16_builtins (void)
24434 {
24435 tree fp16_type = make_node (REAL_TYPE);
24436 TYPE_PRECISION (fp16_type) = 16;
24437 layout_type (fp16_type);
24438 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
24439 }
24440
24441 static void
24442 arm_init_crc32_builtins ()
24443 {
24444 tree si_ftype_si_qi
24445 = build_function_type_list (unsigned_intSI_type_node,
24446 unsigned_intSI_type_node,
24447 unsigned_intQI_type_node, NULL_TREE);
24448 tree si_ftype_si_hi
24449 = build_function_type_list (unsigned_intSI_type_node,
24450 unsigned_intSI_type_node,
24451 unsigned_intHI_type_node, NULL_TREE);
24452 tree si_ftype_si_si
24453 = build_function_type_list (unsigned_intSI_type_node,
24454 unsigned_intSI_type_node,
24455 unsigned_intSI_type_node, NULL_TREE);
24456
24457 arm_builtin_decls[ARM_BUILTIN_CRC32B]
24458 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi,
24459 ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE);
24460 arm_builtin_decls[ARM_BUILTIN_CRC32H]
24461 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi,
24462 ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE);
24463 arm_builtin_decls[ARM_BUILTIN_CRC32W]
24464 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si,
24465 ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE);
24466 arm_builtin_decls[ARM_BUILTIN_CRC32CB]
24467 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi,
24468 ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE);
24469 arm_builtin_decls[ARM_BUILTIN_CRC32CH]
24470 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi,
24471 ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE);
24472 arm_builtin_decls[ARM_BUILTIN_CRC32CW]
24473 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si,
24474 ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE);
24475 }
24476
24477 static void
24478 arm_init_builtins (void)
24479 {
24480 if (TARGET_REALLY_IWMMXT)
24481 arm_init_iwmmxt_builtins ();
24482
24483 if (TARGET_NEON)
24484 arm_init_neon_builtins ();
24485
24486 if (arm_fp16_format)
24487 arm_init_fp16_builtins ();
24488
24489 if (TARGET_CRC32)
24490 arm_init_crc32_builtins ();
24491 }
24492
24493 /* Return the ARM builtin for CODE. */
24494
24495 static tree
24496 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
24497 {
24498 if (code >= ARM_BUILTIN_MAX)
24499 return error_mark_node;
24500
24501 return arm_builtin_decls[code];
24502 }
24503
24504 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24505
24506 static const char *
24507 arm_invalid_parameter_type (const_tree t)
24508 {
24509 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24510 return N_("function parameters cannot have __fp16 type");
24511 return NULL;
24512 }
24513
24514 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24515
24516 static const char *
24517 arm_invalid_return_type (const_tree t)
24518 {
24519 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24520 return N_("functions cannot return __fp16 type");
24521 return NULL;
24522 }
24523
24524 /* Implement TARGET_PROMOTED_TYPE. */
24525
24526 static tree
24527 arm_promoted_type (const_tree t)
24528 {
24529 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24530 return float_type_node;
24531 return NULL_TREE;
24532 }
24533
24534 /* Implement TARGET_CONVERT_TO_TYPE.
24535 Specifically, this hook implements the peculiarity of the ARM
24536 half-precision floating-point C semantics that requires conversions between
24537 __fp16 to or from double to do an intermediate conversion to float. */
24538
24539 static tree
24540 arm_convert_to_type (tree type, tree expr)
24541 {
24542 tree fromtype = TREE_TYPE (expr);
24543 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
24544 return NULL_TREE;
24545 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
24546 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
24547 return convert (type, convert (float_type_node, expr));
24548 return NULL_TREE;
24549 }
24550
24551 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24552 This simply adds HFmode as a supported mode; even though we don't
24553 implement arithmetic on this type directly, it's supported by
24554 optabs conversions, much the way the double-word arithmetic is
24555 special-cased in the default hook. */
24556
24557 static bool
24558 arm_scalar_mode_supported_p (enum machine_mode mode)
24559 {
24560 if (mode == HFmode)
24561 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
24562 else if (ALL_FIXED_POINT_MODE_P (mode))
24563 return true;
24564 else
24565 return default_scalar_mode_supported_p (mode);
24566 }
24567
24568 /* Errors in the source file can cause expand_expr to return const0_rtx
24569 where we expect a vector. To avoid crashing, use one of the vector
24570 clear instructions. */
24571
24572 static rtx
24573 safe_vector_operand (rtx x, enum machine_mode mode)
24574 {
24575 if (x != const0_rtx)
24576 return x;
24577 x = gen_reg_rtx (mode);
24578
24579 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
24580 : gen_rtx_SUBREG (DImode, x, 0)));
24581 return x;
24582 }
24583
24584 /* Function to expand ternary builtins. */
24585 static rtx
24586 arm_expand_ternop_builtin (enum insn_code icode,
24587 tree exp, rtx target)
24588 {
24589 rtx pat;
24590 tree arg0 = CALL_EXPR_ARG (exp, 0);
24591 tree arg1 = CALL_EXPR_ARG (exp, 1);
24592 tree arg2 = CALL_EXPR_ARG (exp, 2);
24593
24594 rtx op0 = expand_normal (arg0);
24595 rtx op1 = expand_normal (arg1);
24596 rtx op2 = expand_normal (arg2);
24597 rtx op3 = NULL_RTX;
24598
24599 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
24600 lane operand depending on endianness. */
24601 bool builtin_sha1cpm_p = false;
24602
24603 if (insn_data[icode].n_operands == 5)
24604 {
24605 gcc_assert (icode == CODE_FOR_crypto_sha1c
24606 || icode == CODE_FOR_crypto_sha1p
24607 || icode == CODE_FOR_crypto_sha1m);
24608 builtin_sha1cpm_p = true;
24609 }
24610 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24611 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24612 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24613 enum machine_mode mode2 = insn_data[icode].operand[3].mode;
24614
24615
24616 if (VECTOR_MODE_P (mode0))
24617 op0 = safe_vector_operand (op0, mode0);
24618 if (VECTOR_MODE_P (mode1))
24619 op1 = safe_vector_operand (op1, mode1);
24620 if (VECTOR_MODE_P (mode2))
24621 op2 = safe_vector_operand (op2, mode2);
24622
24623 if (! target
24624 || GET_MODE (target) != tmode
24625 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24626 target = gen_reg_rtx (tmode);
24627
24628 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
24629 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
24630 && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode));
24631
24632 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24633 op0 = copy_to_mode_reg (mode0, op0);
24634 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24635 op1 = copy_to_mode_reg (mode1, op1);
24636 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
24637 op2 = copy_to_mode_reg (mode2, op2);
24638 if (builtin_sha1cpm_p)
24639 op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
24640
24641 if (builtin_sha1cpm_p)
24642 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
24643 else
24644 pat = GEN_FCN (icode) (target, op0, op1, op2);
24645 if (! pat)
24646 return 0;
24647 emit_insn (pat);
24648 return target;
24649 }
24650
24651 /* Subroutine of arm_expand_builtin to take care of binop insns. */
24652
24653 static rtx
24654 arm_expand_binop_builtin (enum insn_code icode,
24655 tree exp, rtx target)
24656 {
24657 rtx pat;
24658 tree arg0 = CALL_EXPR_ARG (exp, 0);
24659 tree arg1 = CALL_EXPR_ARG (exp, 1);
24660 rtx op0 = expand_normal (arg0);
24661 rtx op1 = expand_normal (arg1);
24662 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24663 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24664 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24665
24666 if (VECTOR_MODE_P (mode0))
24667 op0 = safe_vector_operand (op0, mode0);
24668 if (VECTOR_MODE_P (mode1))
24669 op1 = safe_vector_operand (op1, mode1);
24670
24671 if (! target
24672 || GET_MODE (target) != tmode
24673 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24674 target = gen_reg_rtx (tmode);
24675
24676 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
24677 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
24678
24679 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24680 op0 = copy_to_mode_reg (mode0, op0);
24681 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24682 op1 = copy_to_mode_reg (mode1, op1);
24683
24684 pat = GEN_FCN (icode) (target, op0, op1);
24685 if (! pat)
24686 return 0;
24687 emit_insn (pat);
24688 return target;
24689 }
24690
24691 /* Subroutine of arm_expand_builtin to take care of unop insns. */
24692
24693 static rtx
24694 arm_expand_unop_builtin (enum insn_code icode,
24695 tree exp, rtx target, int do_load)
24696 {
24697 rtx pat;
24698 tree arg0 = CALL_EXPR_ARG (exp, 0);
24699 rtx op0 = expand_normal (arg0);
24700 rtx op1 = NULL_RTX;
24701 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24702 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24703 bool builtin_sha1h_p = false;
24704
24705 if (insn_data[icode].n_operands == 3)
24706 {
24707 gcc_assert (icode == CODE_FOR_crypto_sha1h);
24708 builtin_sha1h_p = true;
24709 }
24710
24711 if (! target
24712 || GET_MODE (target) != tmode
24713 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24714 target = gen_reg_rtx (tmode);
24715 if (do_load)
24716 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
24717 else
24718 {
24719 if (VECTOR_MODE_P (mode0))
24720 op0 = safe_vector_operand (op0, mode0);
24721
24722 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24723 op0 = copy_to_mode_reg (mode0, op0);
24724 }
24725 if (builtin_sha1h_p)
24726 op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
24727
24728 if (builtin_sha1h_p)
24729 pat = GEN_FCN (icode) (target, op0, op1);
24730 else
24731 pat = GEN_FCN (icode) (target, op0);
24732 if (! pat)
24733 return 0;
24734 emit_insn (pat);
24735 return target;
24736 }
24737
24738 typedef enum {
24739 NEON_ARG_COPY_TO_REG,
24740 NEON_ARG_CONSTANT,
24741 NEON_ARG_MEMORY,
24742 NEON_ARG_STOP
24743 } builtin_arg;
24744
24745 #define NEON_MAX_BUILTIN_ARGS 5
24746
24747 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
24748 and return an expression for the accessed memory.
24749
24750 The intrinsic function operates on a block of registers that has
24751 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
24752 function references the memory at EXP of type TYPE and in mode
24753 MEM_MODE; this mode may be BLKmode if no more suitable mode is
24754 available. */
24755
24756 static tree
24757 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
24758 enum machine_mode reg_mode,
24759 neon_builtin_type_mode type_mode)
24760 {
24761 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
24762 tree elem_type, upper_bound, array_type;
24763
24764 /* Work out the size of the register block in bytes. */
24765 reg_size = GET_MODE_SIZE (reg_mode);
24766
24767 /* Work out the size of each vector in bytes. */
24768 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
24769 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
24770
24771 /* Work out how many vectors there are. */
24772 gcc_assert (reg_size % vector_size == 0);
24773 nvectors = reg_size / vector_size;
24774
24775 /* Work out the type of each element. */
24776 gcc_assert (POINTER_TYPE_P (type));
24777 elem_type = TREE_TYPE (type);
24778
24779 /* Work out how many elements are being loaded or stored.
24780 MEM_MODE == REG_MODE implies a one-to-one mapping between register
24781 and memory elements; anything else implies a lane load or store. */
24782 if (mem_mode == reg_mode)
24783 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
24784 else
24785 nelems = nvectors;
24786
24787 /* Create a type that describes the full access. */
24788 upper_bound = build_int_cst (size_type_node, nelems - 1);
24789 array_type = build_array_type (elem_type, build_index_type (upper_bound));
24790
24791 /* Dereference EXP using that type. */
24792 return fold_build2 (MEM_REF, array_type, exp,
24793 build_int_cst (build_pointer_type (array_type), 0));
24794 }
24795
24796 /* Expand a Neon builtin. */
24797 static rtx
24798 arm_expand_neon_args (rtx target, int icode, int have_retval,
24799 neon_builtin_type_mode type_mode,
24800 tree exp, int fcode, ...)
24801 {
24802 va_list ap;
24803 rtx pat;
24804 tree arg[NEON_MAX_BUILTIN_ARGS];
24805 rtx op[NEON_MAX_BUILTIN_ARGS];
24806 tree arg_type;
24807 tree formals;
24808 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24809 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
24810 enum machine_mode other_mode;
24811 int argc = 0;
24812 int opno;
24813
24814 if (have_retval
24815 && (!target
24816 || GET_MODE (target) != tmode
24817 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
24818 target = gen_reg_rtx (tmode);
24819
24820 va_start (ap, fcode);
24821
24822 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
24823
24824 for (;;)
24825 {
24826 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
24827
24828 if (thisarg == NEON_ARG_STOP)
24829 break;
24830 else
24831 {
24832 opno = argc + have_retval;
24833 mode[argc] = insn_data[icode].operand[opno].mode;
24834 arg[argc] = CALL_EXPR_ARG (exp, argc);
24835 arg_type = TREE_VALUE (formals);
24836 if (thisarg == NEON_ARG_MEMORY)
24837 {
24838 other_mode = insn_data[icode].operand[1 - opno].mode;
24839 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
24840 mode[argc], other_mode,
24841 type_mode);
24842 }
24843
24844 op[argc] = expand_normal (arg[argc]);
24845
24846 switch (thisarg)
24847 {
24848 case NEON_ARG_COPY_TO_REG:
24849 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
24850 if (!(*insn_data[icode].operand[opno].predicate)
24851 (op[argc], mode[argc]))
24852 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
24853 break;
24854
24855 case NEON_ARG_CONSTANT:
24856 /* FIXME: This error message is somewhat unhelpful. */
24857 if (!(*insn_data[icode].operand[opno].predicate)
24858 (op[argc], mode[argc]))
24859 error ("argument must be a constant");
24860 break;
24861
24862 case NEON_ARG_MEMORY:
24863 gcc_assert (MEM_P (op[argc]));
24864 PUT_MODE (op[argc], mode[argc]);
24865 /* ??? arm_neon.h uses the same built-in functions for signed
24866 and unsigned accesses, casting where necessary. This isn't
24867 alias safe. */
24868 set_mem_alias_set (op[argc], 0);
24869 if (!(*insn_data[icode].operand[opno].predicate)
24870 (op[argc], mode[argc]))
24871 op[argc] = (replace_equiv_address
24872 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
24873 break;
24874
24875 case NEON_ARG_STOP:
24876 gcc_unreachable ();
24877 }
24878
24879 argc++;
24880 formals = TREE_CHAIN (formals);
24881 }
24882 }
24883
24884 va_end (ap);
24885
24886 if (have_retval)
24887 switch (argc)
24888 {
24889 case 1:
24890 pat = GEN_FCN (icode) (target, op[0]);
24891 break;
24892
24893 case 2:
24894 pat = GEN_FCN (icode) (target, op[0], op[1]);
24895 break;
24896
24897 case 3:
24898 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
24899 break;
24900
24901 case 4:
24902 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
24903 break;
24904
24905 case 5:
24906 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
24907 break;
24908
24909 default:
24910 gcc_unreachable ();
24911 }
24912 else
24913 switch (argc)
24914 {
24915 case 1:
24916 pat = GEN_FCN (icode) (op[0]);
24917 break;
24918
24919 case 2:
24920 pat = GEN_FCN (icode) (op[0], op[1]);
24921 break;
24922
24923 case 3:
24924 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
24925 break;
24926
24927 case 4:
24928 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
24929 break;
24930
24931 case 5:
24932 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
24933 break;
24934
24935 default:
24936 gcc_unreachable ();
24937 }
24938
24939 if (!pat)
24940 return 0;
24941
24942 emit_insn (pat);
24943
24944 return target;
24945 }
24946
24947 /* Expand a Neon builtin. These are "special" because they don't have symbolic
24948 constants defined per-instruction or per instruction-variant. Instead, the
24949 required info is looked up in the table neon_builtin_data. */
24950 static rtx
24951 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
24952 {
24953 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
24954 neon_itype itype = d->itype;
24955 enum insn_code icode = d->code;
24956 neon_builtin_type_mode type_mode = d->mode;
24957
24958 switch (itype)
24959 {
24960 case NEON_UNOP:
24961 case NEON_CONVERT:
24962 case NEON_DUPLANE:
24963 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24964 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
24965
24966 case NEON_BINOP:
24967 case NEON_SETLANE:
24968 case NEON_SCALARMUL:
24969 case NEON_SCALARMULL:
24970 case NEON_SCALARMULH:
24971 case NEON_SHIFTINSERT:
24972 case NEON_LOGICBINOP:
24973 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24974 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
24975 NEON_ARG_STOP);
24976
24977 case NEON_TERNOP:
24978 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24979 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
24980 NEON_ARG_CONSTANT, NEON_ARG_STOP);
24981
24982 case NEON_GETLANE:
24983 case NEON_FIXCONV:
24984 case NEON_SHIFTIMM:
24985 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24986 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
24987 NEON_ARG_STOP);
24988
24989 case NEON_CREATE:
24990 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24991 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
24992
24993 case NEON_DUP:
24994 case NEON_RINT:
24995 case NEON_SPLIT:
24996 case NEON_FLOAT_WIDEN:
24997 case NEON_FLOAT_NARROW:
24998 case NEON_REINTERP:
24999 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25000 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25001
25002 case NEON_COMBINE:
25003 case NEON_VTBL:
25004 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25005 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25006
25007 case NEON_RESULTPAIR:
25008 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25009 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25010 NEON_ARG_STOP);
25011
25012 case NEON_LANEMUL:
25013 case NEON_LANEMULL:
25014 case NEON_LANEMULH:
25015 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25016 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25017 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25018
25019 case NEON_LANEMAC:
25020 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25021 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25022 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25023
25024 case NEON_SHIFTACC:
25025 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25026 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25027 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25028
25029 case NEON_SCALARMAC:
25030 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25031 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25032 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25033
25034 case NEON_SELECT:
25035 case NEON_VTBX:
25036 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25037 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25038 NEON_ARG_STOP);
25039
25040 case NEON_LOAD1:
25041 case NEON_LOADSTRUCT:
25042 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25043 NEON_ARG_MEMORY, NEON_ARG_STOP);
25044
25045 case NEON_LOAD1LANE:
25046 case NEON_LOADSTRUCTLANE:
25047 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25048 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25049 NEON_ARG_STOP);
25050
25051 case NEON_STORE1:
25052 case NEON_STORESTRUCT:
25053 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25054 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25055
25056 case NEON_STORE1LANE:
25057 case NEON_STORESTRUCTLANE:
25058 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25059 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25060 NEON_ARG_STOP);
25061 }
25062
25063 gcc_unreachable ();
25064 }
25065
25066 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25067 void
25068 neon_reinterpret (rtx dest, rtx src)
25069 {
25070 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
25071 }
25072
25073 /* Emit code to place a Neon pair result in memory locations (with equal
25074 registers). */
25075 void
25076 neon_emit_pair_result_insn (enum machine_mode mode,
25077 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
25078 rtx op1, rtx op2)
25079 {
25080 rtx mem = gen_rtx_MEM (mode, destaddr);
25081 rtx tmp1 = gen_reg_rtx (mode);
25082 rtx tmp2 = gen_reg_rtx (mode);
25083
25084 emit_insn (intfn (tmp1, op1, op2, tmp2));
25085
25086 emit_move_insn (mem, tmp1);
25087 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
25088 emit_move_insn (mem, tmp2);
25089 }
25090
25091 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25092 not to early-clobber SRC registers in the process.
25093
25094 We assume that the operands described by SRC and DEST represent a
25095 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25096 number of components into which the copy has been decomposed. */
25097 void
25098 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25099 {
25100 unsigned int i;
25101
25102 if (!reg_overlap_mentioned_p (operands[0], operands[1])
25103 || REGNO (operands[0]) < REGNO (operands[1]))
25104 {
25105 for (i = 0; i < count; i++)
25106 {
25107 operands[2 * i] = dest[i];
25108 operands[2 * i + 1] = src[i];
25109 }
25110 }
25111 else
25112 {
25113 for (i = 0; i < count; i++)
25114 {
25115 operands[2 * i] = dest[count - i - 1];
25116 operands[2 * i + 1] = src[count - i - 1];
25117 }
25118 }
25119 }
25120
25121 /* Split operands into moves from op[1] + op[2] into op[0]. */
25122
25123 void
25124 neon_split_vcombine (rtx operands[3])
25125 {
25126 unsigned int dest = REGNO (operands[0]);
25127 unsigned int src1 = REGNO (operands[1]);
25128 unsigned int src2 = REGNO (operands[2]);
25129 enum machine_mode halfmode = GET_MODE (operands[1]);
25130 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
25131 rtx destlo, desthi;
25132
25133 if (src1 == dest && src2 == dest + halfregs)
25134 {
25135 /* No-op move. Can't split to nothing; emit something. */
25136 emit_note (NOTE_INSN_DELETED);
25137 return;
25138 }
25139
25140 /* Preserve register attributes for variable tracking. */
25141 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25142 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25143 GET_MODE_SIZE (halfmode));
25144
25145 /* Special case of reversed high/low parts. Use VSWP. */
25146 if (src2 == dest && src1 == dest + halfregs)
25147 {
25148 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
25149 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
25150 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25151 return;
25152 }
25153
25154 if (!reg_overlap_mentioned_p (operands[2], destlo))
25155 {
25156 /* Try to avoid unnecessary moves if part of the result
25157 is in the right place already. */
25158 if (src1 != dest)
25159 emit_move_insn (destlo, operands[1]);
25160 if (src2 != dest + halfregs)
25161 emit_move_insn (desthi, operands[2]);
25162 }
25163 else
25164 {
25165 if (src2 != dest + halfregs)
25166 emit_move_insn (desthi, operands[2]);
25167 if (src1 != dest)
25168 emit_move_insn (destlo, operands[1]);
25169 }
25170 }
25171
25172 /* Expand an expression EXP that calls a built-in function,
25173 with result going to TARGET if that's convenient
25174 (and in mode MODE if that's convenient).
25175 SUBTARGET may be used as the target for computing one of EXP's operands.
25176 IGNORE is nonzero if the value is to be ignored. */
25177
25178 static rtx
25179 arm_expand_builtin (tree exp,
25180 rtx target,
25181 rtx subtarget ATTRIBUTE_UNUSED,
25182 enum machine_mode mode ATTRIBUTE_UNUSED,
25183 int ignore ATTRIBUTE_UNUSED)
25184 {
25185 const struct builtin_description * d;
25186 enum insn_code icode;
25187 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25188 tree arg0;
25189 tree arg1;
25190 tree arg2;
25191 rtx op0;
25192 rtx op1;
25193 rtx op2;
25194 rtx pat;
25195 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25196 size_t i;
25197 enum machine_mode tmode;
25198 enum machine_mode mode0;
25199 enum machine_mode mode1;
25200 enum machine_mode mode2;
25201 int opint;
25202 int selector;
25203 int mask;
25204 int imm;
25205
25206 if (fcode >= ARM_BUILTIN_NEON_BASE)
25207 return arm_expand_neon_builtin (fcode, exp, target);
25208
25209 switch (fcode)
25210 {
25211 case ARM_BUILTIN_TEXTRMSB:
25212 case ARM_BUILTIN_TEXTRMUB:
25213 case ARM_BUILTIN_TEXTRMSH:
25214 case ARM_BUILTIN_TEXTRMUH:
25215 case ARM_BUILTIN_TEXTRMSW:
25216 case ARM_BUILTIN_TEXTRMUW:
25217 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
25218 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
25219 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
25220 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
25221 : CODE_FOR_iwmmxt_textrmw);
25222
25223 arg0 = CALL_EXPR_ARG (exp, 0);
25224 arg1 = CALL_EXPR_ARG (exp, 1);
25225 op0 = expand_normal (arg0);
25226 op1 = expand_normal (arg1);
25227 tmode = insn_data[icode].operand[0].mode;
25228 mode0 = insn_data[icode].operand[1].mode;
25229 mode1 = insn_data[icode].operand[2].mode;
25230
25231 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25232 op0 = copy_to_mode_reg (mode0, op0);
25233 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25234 {
25235 /* @@@ better error message */
25236 error ("selector must be an immediate");
25237 return gen_reg_rtx (tmode);
25238 }
25239
25240 opint = INTVAL (op1);
25241 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
25242 {
25243 if (opint > 7 || opint < 0)
25244 error ("the range of selector should be in 0 to 7");
25245 }
25246 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
25247 {
25248 if (opint > 3 || opint < 0)
25249 error ("the range of selector should be in 0 to 3");
25250 }
25251 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25252 {
25253 if (opint > 1 || opint < 0)
25254 error ("the range of selector should be in 0 to 1");
25255 }
25256
25257 if (target == 0
25258 || GET_MODE (target) != tmode
25259 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25260 target = gen_reg_rtx (tmode);
25261 pat = GEN_FCN (icode) (target, op0, op1);
25262 if (! pat)
25263 return 0;
25264 emit_insn (pat);
25265 return target;
25266
25267 case ARM_BUILTIN_WALIGNI:
25268 /* If op2 is immediate, call walighi, else call walighr. */
25269 arg0 = CALL_EXPR_ARG (exp, 0);
25270 arg1 = CALL_EXPR_ARG (exp, 1);
25271 arg2 = CALL_EXPR_ARG (exp, 2);
25272 op0 = expand_normal (arg0);
25273 op1 = expand_normal (arg1);
25274 op2 = expand_normal (arg2);
25275 if (CONST_INT_P (op2))
25276 {
25277 icode = CODE_FOR_iwmmxt_waligni;
25278 tmode = insn_data[icode].operand[0].mode;
25279 mode0 = insn_data[icode].operand[1].mode;
25280 mode1 = insn_data[icode].operand[2].mode;
25281 mode2 = insn_data[icode].operand[3].mode;
25282 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25283 op0 = copy_to_mode_reg (mode0, op0);
25284 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25285 op1 = copy_to_mode_reg (mode1, op1);
25286 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
25287 selector = INTVAL (op2);
25288 if (selector > 7 || selector < 0)
25289 error ("the range of selector should be in 0 to 7");
25290 }
25291 else
25292 {
25293 icode = CODE_FOR_iwmmxt_walignr;
25294 tmode = insn_data[icode].operand[0].mode;
25295 mode0 = insn_data[icode].operand[1].mode;
25296 mode1 = insn_data[icode].operand[2].mode;
25297 mode2 = insn_data[icode].operand[3].mode;
25298 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25299 op0 = copy_to_mode_reg (mode0, op0);
25300 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25301 op1 = copy_to_mode_reg (mode1, op1);
25302 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
25303 op2 = copy_to_mode_reg (mode2, op2);
25304 }
25305 if (target == 0
25306 || GET_MODE (target) != tmode
25307 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25308 target = gen_reg_rtx (tmode);
25309 pat = GEN_FCN (icode) (target, op0, op1, op2);
25310 if (!pat)
25311 return 0;
25312 emit_insn (pat);
25313 return target;
25314
25315 case ARM_BUILTIN_TINSRB:
25316 case ARM_BUILTIN_TINSRH:
25317 case ARM_BUILTIN_TINSRW:
25318 case ARM_BUILTIN_WMERGE:
25319 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
25320 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
25321 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
25322 : CODE_FOR_iwmmxt_tinsrw);
25323 arg0 = CALL_EXPR_ARG (exp, 0);
25324 arg1 = CALL_EXPR_ARG (exp, 1);
25325 arg2 = CALL_EXPR_ARG (exp, 2);
25326 op0 = expand_normal (arg0);
25327 op1 = expand_normal (arg1);
25328 op2 = expand_normal (arg2);
25329 tmode = insn_data[icode].operand[0].mode;
25330 mode0 = insn_data[icode].operand[1].mode;
25331 mode1 = insn_data[icode].operand[2].mode;
25332 mode2 = insn_data[icode].operand[3].mode;
25333
25334 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25335 op0 = copy_to_mode_reg (mode0, op0);
25336 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25337 op1 = copy_to_mode_reg (mode1, op1);
25338 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25339 {
25340 error ("selector must be an immediate");
25341 return const0_rtx;
25342 }
25343 if (icode == CODE_FOR_iwmmxt_wmerge)
25344 {
25345 selector = INTVAL (op2);
25346 if (selector > 7 || selector < 0)
25347 error ("the range of selector should be in 0 to 7");
25348 }
25349 if ((icode == CODE_FOR_iwmmxt_tinsrb)
25350 || (icode == CODE_FOR_iwmmxt_tinsrh)
25351 || (icode == CODE_FOR_iwmmxt_tinsrw))
25352 {
25353 mask = 0x01;
25354 selector= INTVAL (op2);
25355 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
25356 error ("the range of selector should be in 0 to 7");
25357 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
25358 error ("the range of selector should be in 0 to 3");
25359 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
25360 error ("the range of selector should be in 0 to 1");
25361 mask <<= selector;
25362 op2 = GEN_INT (mask);
25363 }
25364 if (target == 0
25365 || GET_MODE (target) != tmode
25366 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25367 target = gen_reg_rtx (tmode);
25368 pat = GEN_FCN (icode) (target, op0, op1, op2);
25369 if (! pat)
25370 return 0;
25371 emit_insn (pat);
25372 return target;
25373
25374 case ARM_BUILTIN_SETWCGR0:
25375 case ARM_BUILTIN_SETWCGR1:
25376 case ARM_BUILTIN_SETWCGR2:
25377 case ARM_BUILTIN_SETWCGR3:
25378 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
25379 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
25380 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
25381 : CODE_FOR_iwmmxt_setwcgr3);
25382 arg0 = CALL_EXPR_ARG (exp, 0);
25383 op0 = expand_normal (arg0);
25384 mode0 = insn_data[icode].operand[0].mode;
25385 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
25386 op0 = copy_to_mode_reg (mode0, op0);
25387 pat = GEN_FCN (icode) (op0);
25388 if (!pat)
25389 return 0;
25390 emit_insn (pat);
25391 return 0;
25392
25393 case ARM_BUILTIN_GETWCGR0:
25394 case ARM_BUILTIN_GETWCGR1:
25395 case ARM_BUILTIN_GETWCGR2:
25396 case ARM_BUILTIN_GETWCGR3:
25397 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
25398 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
25399 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
25400 : CODE_FOR_iwmmxt_getwcgr3);
25401 tmode = insn_data[icode].operand[0].mode;
25402 if (target == 0
25403 || GET_MODE (target) != tmode
25404 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25405 target = gen_reg_rtx (tmode);
25406 pat = GEN_FCN (icode) (target);
25407 if (!pat)
25408 return 0;
25409 emit_insn (pat);
25410 return target;
25411
25412 case ARM_BUILTIN_WSHUFH:
25413 icode = CODE_FOR_iwmmxt_wshufh;
25414 arg0 = CALL_EXPR_ARG (exp, 0);
25415 arg1 = CALL_EXPR_ARG (exp, 1);
25416 op0 = expand_normal (arg0);
25417 op1 = expand_normal (arg1);
25418 tmode = insn_data[icode].operand[0].mode;
25419 mode1 = insn_data[icode].operand[1].mode;
25420 mode2 = insn_data[icode].operand[2].mode;
25421
25422 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
25423 op0 = copy_to_mode_reg (mode1, op0);
25424 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
25425 {
25426 error ("mask must be an immediate");
25427 return const0_rtx;
25428 }
25429 selector = INTVAL (op1);
25430 if (selector < 0 || selector > 255)
25431 error ("the range of mask should be in 0 to 255");
25432 if (target == 0
25433 || GET_MODE (target) != tmode
25434 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25435 target = gen_reg_rtx (tmode);
25436 pat = GEN_FCN (icode) (target, op0, op1);
25437 if (! pat)
25438 return 0;
25439 emit_insn (pat);
25440 return target;
25441
25442 case ARM_BUILTIN_WMADDS:
25443 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
25444 case ARM_BUILTIN_WMADDSX:
25445 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
25446 case ARM_BUILTIN_WMADDSN:
25447 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
25448 case ARM_BUILTIN_WMADDU:
25449 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
25450 case ARM_BUILTIN_WMADDUX:
25451 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
25452 case ARM_BUILTIN_WMADDUN:
25453 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
25454 case ARM_BUILTIN_WSADBZ:
25455 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
25456 case ARM_BUILTIN_WSADHZ:
25457 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
25458
25459 /* Several three-argument builtins. */
25460 case ARM_BUILTIN_WMACS:
25461 case ARM_BUILTIN_WMACU:
25462 case ARM_BUILTIN_TMIA:
25463 case ARM_BUILTIN_TMIAPH:
25464 case ARM_BUILTIN_TMIATT:
25465 case ARM_BUILTIN_TMIATB:
25466 case ARM_BUILTIN_TMIABT:
25467 case ARM_BUILTIN_TMIABB:
25468 case ARM_BUILTIN_WQMIABB:
25469 case ARM_BUILTIN_WQMIABT:
25470 case ARM_BUILTIN_WQMIATB:
25471 case ARM_BUILTIN_WQMIATT:
25472 case ARM_BUILTIN_WQMIABBN:
25473 case ARM_BUILTIN_WQMIABTN:
25474 case ARM_BUILTIN_WQMIATBN:
25475 case ARM_BUILTIN_WQMIATTN:
25476 case ARM_BUILTIN_WMIABB:
25477 case ARM_BUILTIN_WMIABT:
25478 case ARM_BUILTIN_WMIATB:
25479 case ARM_BUILTIN_WMIATT:
25480 case ARM_BUILTIN_WMIABBN:
25481 case ARM_BUILTIN_WMIABTN:
25482 case ARM_BUILTIN_WMIATBN:
25483 case ARM_BUILTIN_WMIATTN:
25484 case ARM_BUILTIN_WMIAWBB:
25485 case ARM_BUILTIN_WMIAWBT:
25486 case ARM_BUILTIN_WMIAWTB:
25487 case ARM_BUILTIN_WMIAWTT:
25488 case ARM_BUILTIN_WMIAWBBN:
25489 case ARM_BUILTIN_WMIAWBTN:
25490 case ARM_BUILTIN_WMIAWTBN:
25491 case ARM_BUILTIN_WMIAWTTN:
25492 case ARM_BUILTIN_WSADB:
25493 case ARM_BUILTIN_WSADH:
25494 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
25495 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
25496 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
25497 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
25498 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
25499 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
25500 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
25501 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
25502 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
25503 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
25504 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
25505 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
25506 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
25507 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
25508 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
25509 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
25510 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
25511 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
25512 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
25513 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
25514 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
25515 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
25516 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
25517 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
25518 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
25519 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
25520 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
25521 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
25522 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
25523 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
25524 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
25525 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
25526 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
25527 : CODE_FOR_iwmmxt_wsadh);
25528 arg0 = CALL_EXPR_ARG (exp, 0);
25529 arg1 = CALL_EXPR_ARG (exp, 1);
25530 arg2 = CALL_EXPR_ARG (exp, 2);
25531 op0 = expand_normal (arg0);
25532 op1 = expand_normal (arg1);
25533 op2 = expand_normal (arg2);
25534 tmode = insn_data[icode].operand[0].mode;
25535 mode0 = insn_data[icode].operand[1].mode;
25536 mode1 = insn_data[icode].operand[2].mode;
25537 mode2 = insn_data[icode].operand[3].mode;
25538
25539 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25540 op0 = copy_to_mode_reg (mode0, op0);
25541 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25542 op1 = copy_to_mode_reg (mode1, op1);
25543 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25544 op2 = copy_to_mode_reg (mode2, op2);
25545 if (target == 0
25546 || GET_MODE (target) != tmode
25547 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25548 target = gen_reg_rtx (tmode);
25549 pat = GEN_FCN (icode) (target, op0, op1, op2);
25550 if (! pat)
25551 return 0;
25552 emit_insn (pat);
25553 return target;
25554
25555 case ARM_BUILTIN_WZERO:
25556 target = gen_reg_rtx (DImode);
25557 emit_insn (gen_iwmmxt_clrdi (target));
25558 return target;
25559
25560 case ARM_BUILTIN_WSRLHI:
25561 case ARM_BUILTIN_WSRLWI:
25562 case ARM_BUILTIN_WSRLDI:
25563 case ARM_BUILTIN_WSLLHI:
25564 case ARM_BUILTIN_WSLLWI:
25565 case ARM_BUILTIN_WSLLDI:
25566 case ARM_BUILTIN_WSRAHI:
25567 case ARM_BUILTIN_WSRAWI:
25568 case ARM_BUILTIN_WSRADI:
25569 case ARM_BUILTIN_WRORHI:
25570 case ARM_BUILTIN_WRORWI:
25571 case ARM_BUILTIN_WRORDI:
25572 case ARM_BUILTIN_WSRLH:
25573 case ARM_BUILTIN_WSRLW:
25574 case ARM_BUILTIN_WSRLD:
25575 case ARM_BUILTIN_WSLLH:
25576 case ARM_BUILTIN_WSLLW:
25577 case ARM_BUILTIN_WSLLD:
25578 case ARM_BUILTIN_WSRAH:
25579 case ARM_BUILTIN_WSRAW:
25580 case ARM_BUILTIN_WSRAD:
25581 case ARM_BUILTIN_WRORH:
25582 case ARM_BUILTIN_WRORW:
25583 case ARM_BUILTIN_WRORD:
25584 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
25585 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
25586 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
25587 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
25588 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
25589 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
25590 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
25591 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
25592 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
25593 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
25594 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
25595 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
25596 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
25597 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
25598 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
25599 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
25600 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
25601 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
25602 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
25603 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
25604 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
25605 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
25606 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
25607 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
25608 : CODE_FOR_nothing);
25609 arg1 = CALL_EXPR_ARG (exp, 1);
25610 op1 = expand_normal (arg1);
25611 if (GET_MODE (op1) == VOIDmode)
25612 {
25613 imm = INTVAL (op1);
25614 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
25615 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
25616 && (imm < 0 || imm > 32))
25617 {
25618 if (fcode == ARM_BUILTIN_WRORHI)
25619 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
25620 else if (fcode == ARM_BUILTIN_WRORWI)
25621 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
25622 else if (fcode == ARM_BUILTIN_WRORH)
25623 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
25624 else
25625 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
25626 }
25627 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
25628 && (imm < 0 || imm > 64))
25629 {
25630 if (fcode == ARM_BUILTIN_WRORDI)
25631 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
25632 else
25633 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
25634 }
25635 else if (imm < 0)
25636 {
25637 if (fcode == ARM_BUILTIN_WSRLHI)
25638 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
25639 else if (fcode == ARM_BUILTIN_WSRLWI)
25640 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
25641 else if (fcode == ARM_BUILTIN_WSRLDI)
25642 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
25643 else if (fcode == ARM_BUILTIN_WSLLHI)
25644 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
25645 else if (fcode == ARM_BUILTIN_WSLLWI)
25646 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
25647 else if (fcode == ARM_BUILTIN_WSLLDI)
25648 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
25649 else if (fcode == ARM_BUILTIN_WSRAHI)
25650 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
25651 else if (fcode == ARM_BUILTIN_WSRAWI)
25652 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
25653 else if (fcode == ARM_BUILTIN_WSRADI)
25654 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
25655 else if (fcode == ARM_BUILTIN_WSRLH)
25656 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
25657 else if (fcode == ARM_BUILTIN_WSRLW)
25658 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
25659 else if (fcode == ARM_BUILTIN_WSRLD)
25660 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
25661 else if (fcode == ARM_BUILTIN_WSLLH)
25662 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
25663 else if (fcode == ARM_BUILTIN_WSLLW)
25664 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
25665 else if (fcode == ARM_BUILTIN_WSLLD)
25666 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
25667 else if (fcode == ARM_BUILTIN_WSRAH)
25668 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
25669 else if (fcode == ARM_BUILTIN_WSRAW)
25670 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
25671 else
25672 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
25673 }
25674 }
25675 return arm_expand_binop_builtin (icode, exp, target);
25676
25677 default:
25678 break;
25679 }
25680
25681 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
25682 if (d->code == (const enum arm_builtins) fcode)
25683 return arm_expand_binop_builtin (d->icode, exp, target);
25684
25685 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
25686 if (d->code == (const enum arm_builtins) fcode)
25687 return arm_expand_unop_builtin (d->icode, exp, target, 0);
25688
25689 for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
25690 if (d->code == (const enum arm_builtins) fcode)
25691 return arm_expand_ternop_builtin (d->icode, exp, target);
25692
25693 /* @@@ Should really do something sensible here. */
25694 return NULL_RTX;
25695 }
25696 \f
25697 /* Return the number (counting from 0) of
25698 the least significant set bit in MASK. */
25699
25700 inline static int
25701 number_of_first_bit_set (unsigned mask)
25702 {
25703 return ctz_hwi (mask);
25704 }
25705
25706 /* Like emit_multi_reg_push, but allowing for a different set of
25707 registers to be described as saved. MASK is the set of registers
25708 to be saved; REAL_REGS is the set of registers to be described as
25709 saved. If REAL_REGS is 0, only describe the stack adjustment. */
25710
25711 static rtx
25712 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
25713 {
25714 unsigned long regno;
25715 rtx par[10], tmp, reg, insn;
25716 int i, j;
25717
25718 /* Build the parallel of the registers actually being stored. */
25719 for (i = 0; mask; ++i, mask &= mask - 1)
25720 {
25721 regno = ctz_hwi (mask);
25722 reg = gen_rtx_REG (SImode, regno);
25723
25724 if (i == 0)
25725 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
25726 else
25727 tmp = gen_rtx_USE (VOIDmode, reg);
25728
25729 par[i] = tmp;
25730 }
25731
25732 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25733 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
25734 tmp = gen_frame_mem (BLKmode, tmp);
25735 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
25736 par[0] = tmp;
25737
25738 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
25739 insn = emit_insn (tmp);
25740
25741 /* Always build the stack adjustment note for unwind info. */
25742 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25743 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
25744 par[0] = tmp;
25745
25746 /* Build the parallel of the registers recorded as saved for unwind. */
25747 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
25748 {
25749 regno = ctz_hwi (real_regs);
25750 reg = gen_rtx_REG (SImode, regno);
25751
25752 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
25753 tmp = gen_frame_mem (SImode, tmp);
25754 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
25755 RTX_FRAME_RELATED_P (tmp) = 1;
25756 par[j + 1] = tmp;
25757 }
25758
25759 if (j == 0)
25760 tmp = par[0];
25761 else
25762 {
25763 RTX_FRAME_RELATED_P (par[0]) = 1;
25764 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
25765 }
25766
25767 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
25768
25769 return insn;
25770 }
25771
25772 /* Emit code to push or pop registers to or from the stack. F is the
25773 assembly file. MASK is the registers to pop. */
25774 static void
25775 thumb_pop (FILE *f, unsigned long mask)
25776 {
25777 int regno;
25778 int lo_mask = mask & 0xFF;
25779 int pushed_words = 0;
25780
25781 gcc_assert (mask);
25782
25783 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
25784 {
25785 /* Special case. Do not generate a POP PC statement here, do it in
25786 thumb_exit() */
25787 thumb_exit (f, -1);
25788 return;
25789 }
25790
25791 fprintf (f, "\tpop\t{");
25792
25793 /* Look at the low registers first. */
25794 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
25795 {
25796 if (lo_mask & 1)
25797 {
25798 asm_fprintf (f, "%r", regno);
25799
25800 if ((lo_mask & ~1) != 0)
25801 fprintf (f, ", ");
25802
25803 pushed_words++;
25804 }
25805 }
25806
25807 if (mask & (1 << PC_REGNUM))
25808 {
25809 /* Catch popping the PC. */
25810 if (TARGET_INTERWORK || TARGET_BACKTRACE
25811 || crtl->calls_eh_return)
25812 {
25813 /* The PC is never poped directly, instead
25814 it is popped into r3 and then BX is used. */
25815 fprintf (f, "}\n");
25816
25817 thumb_exit (f, -1);
25818
25819 return;
25820 }
25821 else
25822 {
25823 if (mask & 0xFF)
25824 fprintf (f, ", ");
25825
25826 asm_fprintf (f, "%r", PC_REGNUM);
25827 }
25828 }
25829
25830 fprintf (f, "}\n");
25831 }
25832
25833 /* Generate code to return from a thumb function.
25834 If 'reg_containing_return_addr' is -1, then the return address is
25835 actually on the stack, at the stack pointer. */
25836 static void
25837 thumb_exit (FILE *f, int reg_containing_return_addr)
25838 {
25839 unsigned regs_available_for_popping;
25840 unsigned regs_to_pop;
25841 int pops_needed;
25842 unsigned available;
25843 unsigned required;
25844 int mode;
25845 int size;
25846 int restore_a4 = FALSE;
25847
25848 /* Compute the registers we need to pop. */
25849 regs_to_pop = 0;
25850 pops_needed = 0;
25851
25852 if (reg_containing_return_addr == -1)
25853 {
25854 regs_to_pop |= 1 << LR_REGNUM;
25855 ++pops_needed;
25856 }
25857
25858 if (TARGET_BACKTRACE)
25859 {
25860 /* Restore the (ARM) frame pointer and stack pointer. */
25861 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
25862 pops_needed += 2;
25863 }
25864
25865 /* If there is nothing to pop then just emit the BX instruction and
25866 return. */
25867 if (pops_needed == 0)
25868 {
25869 if (crtl->calls_eh_return)
25870 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
25871
25872 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
25873 return;
25874 }
25875 /* Otherwise if we are not supporting interworking and we have not created
25876 a backtrace structure and the function was not entered in ARM mode then
25877 just pop the return address straight into the PC. */
25878 else if (!TARGET_INTERWORK
25879 && !TARGET_BACKTRACE
25880 && !is_called_in_ARM_mode (current_function_decl)
25881 && !crtl->calls_eh_return)
25882 {
25883 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
25884 return;
25885 }
25886
25887 /* Find out how many of the (return) argument registers we can corrupt. */
25888 regs_available_for_popping = 0;
25889
25890 /* If returning via __builtin_eh_return, the bottom three registers
25891 all contain information needed for the return. */
25892 if (crtl->calls_eh_return)
25893 size = 12;
25894 else
25895 {
25896 /* If we can deduce the registers used from the function's
25897 return value. This is more reliable that examining
25898 df_regs_ever_live_p () because that will be set if the register is
25899 ever used in the function, not just if the register is used
25900 to hold a return value. */
25901
25902 if (crtl->return_rtx != 0)
25903 mode = GET_MODE (crtl->return_rtx);
25904 else
25905 mode = DECL_MODE (DECL_RESULT (current_function_decl));
25906
25907 size = GET_MODE_SIZE (mode);
25908
25909 if (size == 0)
25910 {
25911 /* In a void function we can use any argument register.
25912 In a function that returns a structure on the stack
25913 we can use the second and third argument registers. */
25914 if (mode == VOIDmode)
25915 regs_available_for_popping =
25916 (1 << ARG_REGISTER (1))
25917 | (1 << ARG_REGISTER (2))
25918 | (1 << ARG_REGISTER (3));
25919 else
25920 regs_available_for_popping =
25921 (1 << ARG_REGISTER (2))
25922 | (1 << ARG_REGISTER (3));
25923 }
25924 else if (size <= 4)
25925 regs_available_for_popping =
25926 (1 << ARG_REGISTER (2))
25927 | (1 << ARG_REGISTER (3));
25928 else if (size <= 8)
25929 regs_available_for_popping =
25930 (1 << ARG_REGISTER (3));
25931 }
25932
25933 /* Match registers to be popped with registers into which we pop them. */
25934 for (available = regs_available_for_popping,
25935 required = regs_to_pop;
25936 required != 0 && available != 0;
25937 available &= ~(available & - available),
25938 required &= ~(required & - required))
25939 -- pops_needed;
25940
25941 /* If we have any popping registers left over, remove them. */
25942 if (available > 0)
25943 regs_available_for_popping &= ~available;
25944
25945 /* Otherwise if we need another popping register we can use
25946 the fourth argument register. */
25947 else if (pops_needed)
25948 {
25949 /* If we have not found any free argument registers and
25950 reg a4 contains the return address, we must move it. */
25951 if (regs_available_for_popping == 0
25952 && reg_containing_return_addr == LAST_ARG_REGNUM)
25953 {
25954 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
25955 reg_containing_return_addr = LR_REGNUM;
25956 }
25957 else if (size > 12)
25958 {
25959 /* Register a4 is being used to hold part of the return value,
25960 but we have dire need of a free, low register. */
25961 restore_a4 = TRUE;
25962
25963 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
25964 }
25965
25966 if (reg_containing_return_addr != LAST_ARG_REGNUM)
25967 {
25968 /* The fourth argument register is available. */
25969 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
25970
25971 --pops_needed;
25972 }
25973 }
25974
25975 /* Pop as many registers as we can. */
25976 thumb_pop (f, regs_available_for_popping);
25977
25978 /* Process the registers we popped. */
25979 if (reg_containing_return_addr == -1)
25980 {
25981 /* The return address was popped into the lowest numbered register. */
25982 regs_to_pop &= ~(1 << LR_REGNUM);
25983
25984 reg_containing_return_addr =
25985 number_of_first_bit_set (regs_available_for_popping);
25986
25987 /* Remove this register for the mask of available registers, so that
25988 the return address will not be corrupted by further pops. */
25989 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
25990 }
25991
25992 /* If we popped other registers then handle them here. */
25993 if (regs_available_for_popping)
25994 {
25995 int frame_pointer;
25996
25997 /* Work out which register currently contains the frame pointer. */
25998 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
25999
26000 /* Move it into the correct place. */
26001 asm_fprintf (f, "\tmov\t%r, %r\n",
26002 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26003
26004 /* (Temporarily) remove it from the mask of popped registers. */
26005 regs_available_for_popping &= ~(1 << frame_pointer);
26006 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26007
26008 if (regs_available_for_popping)
26009 {
26010 int stack_pointer;
26011
26012 /* We popped the stack pointer as well,
26013 find the register that contains it. */
26014 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26015
26016 /* Move it into the stack register. */
26017 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26018
26019 /* At this point we have popped all necessary registers, so
26020 do not worry about restoring regs_available_for_popping
26021 to its correct value:
26022
26023 assert (pops_needed == 0)
26024 assert (regs_available_for_popping == (1 << frame_pointer))
26025 assert (regs_to_pop == (1 << STACK_POINTER)) */
26026 }
26027 else
26028 {
26029 /* Since we have just move the popped value into the frame
26030 pointer, the popping register is available for reuse, and
26031 we know that we still have the stack pointer left to pop. */
26032 regs_available_for_popping |= (1 << frame_pointer);
26033 }
26034 }
26035
26036 /* If we still have registers left on the stack, but we no longer have
26037 any registers into which we can pop them, then we must move the return
26038 address into the link register and make available the register that
26039 contained it. */
26040 if (regs_available_for_popping == 0 && pops_needed > 0)
26041 {
26042 regs_available_for_popping |= 1 << reg_containing_return_addr;
26043
26044 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26045 reg_containing_return_addr);
26046
26047 reg_containing_return_addr = LR_REGNUM;
26048 }
26049
26050 /* If we have registers left on the stack then pop some more.
26051 We know that at most we will want to pop FP and SP. */
26052 if (pops_needed > 0)
26053 {
26054 int popped_into;
26055 int move_to;
26056
26057 thumb_pop (f, regs_available_for_popping);
26058
26059 /* We have popped either FP or SP.
26060 Move whichever one it is into the correct register. */
26061 popped_into = number_of_first_bit_set (regs_available_for_popping);
26062 move_to = number_of_first_bit_set (regs_to_pop);
26063
26064 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26065
26066 regs_to_pop &= ~(1 << move_to);
26067
26068 --pops_needed;
26069 }
26070
26071 /* If we still have not popped everything then we must have only
26072 had one register available to us and we are now popping the SP. */
26073 if (pops_needed > 0)
26074 {
26075 int popped_into;
26076
26077 thumb_pop (f, regs_available_for_popping);
26078
26079 popped_into = number_of_first_bit_set (regs_available_for_popping);
26080
26081 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26082 /*
26083 assert (regs_to_pop == (1 << STACK_POINTER))
26084 assert (pops_needed == 1)
26085 */
26086 }
26087
26088 /* If necessary restore the a4 register. */
26089 if (restore_a4)
26090 {
26091 if (reg_containing_return_addr != LR_REGNUM)
26092 {
26093 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26094 reg_containing_return_addr = LR_REGNUM;
26095 }
26096
26097 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26098 }
26099
26100 if (crtl->calls_eh_return)
26101 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26102
26103 /* Return to caller. */
26104 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26105 }
26106 \f
26107 /* Scan INSN just before assembler is output for it.
26108 For Thumb-1, we track the status of the condition codes; this
26109 information is used in the cbranchsi4_insn pattern. */
26110 void
26111 thumb1_final_prescan_insn (rtx insn)
26112 {
26113 if (flag_print_asm_name)
26114 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26115 INSN_ADDRESSES (INSN_UID (insn)));
26116 /* Don't overwrite the previous setter when we get to a cbranch. */
26117 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26118 {
26119 enum attr_conds conds;
26120
26121 if (cfun->machine->thumb1_cc_insn)
26122 {
26123 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26124 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26125 CC_STATUS_INIT;
26126 }
26127 conds = get_attr_conds (insn);
26128 if (conds == CONDS_SET)
26129 {
26130 rtx set = single_set (insn);
26131 cfun->machine->thumb1_cc_insn = insn;
26132 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26133 cfun->machine->thumb1_cc_op1 = const0_rtx;
26134 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
26135 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26136 {
26137 rtx src1 = XEXP (SET_SRC (set), 1);
26138 if (src1 == const0_rtx)
26139 cfun->machine->thumb1_cc_mode = CCmode;
26140 }
26141 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26142 {
26143 /* Record the src register operand instead of dest because
26144 cprop_hardreg pass propagates src. */
26145 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26146 }
26147 }
26148 else if (conds != CONDS_NOCOND)
26149 cfun->machine->thumb1_cc_insn = NULL_RTX;
26150 }
26151
26152 /* Check if unexpected far jump is used. */
26153 if (cfun->machine->lr_save_eliminated
26154 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26155 internal_error("Unexpected thumb1 far jump");
26156 }
26157
26158 int
26159 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26160 {
26161 unsigned HOST_WIDE_INT mask = 0xff;
26162 int i;
26163
26164 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26165 if (val == 0) /* XXX */
26166 return 0;
26167
26168 for (i = 0; i < 25; i++)
26169 if ((val & (mask << i)) == val)
26170 return 1;
26171
26172 return 0;
26173 }
26174
26175 /* Returns nonzero if the current function contains,
26176 or might contain a far jump. */
26177 static int
26178 thumb_far_jump_used_p (void)
26179 {
26180 rtx insn;
26181 bool far_jump = false;
26182 unsigned int func_size = 0;
26183
26184 /* This test is only important for leaf functions. */
26185 /* assert (!leaf_function_p ()); */
26186
26187 /* If we have already decided that far jumps may be used,
26188 do not bother checking again, and always return true even if
26189 it turns out that they are not being used. Once we have made
26190 the decision that far jumps are present (and that hence the link
26191 register will be pushed onto the stack) we cannot go back on it. */
26192 if (cfun->machine->far_jump_used)
26193 return 1;
26194
26195 /* If this function is not being called from the prologue/epilogue
26196 generation code then it must be being called from the
26197 INITIAL_ELIMINATION_OFFSET macro. */
26198 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26199 {
26200 /* In this case we know that we are being asked about the elimination
26201 of the arg pointer register. If that register is not being used,
26202 then there are no arguments on the stack, and we do not have to
26203 worry that a far jump might force the prologue to push the link
26204 register, changing the stack offsets. In this case we can just
26205 return false, since the presence of far jumps in the function will
26206 not affect stack offsets.
26207
26208 If the arg pointer is live (or if it was live, but has now been
26209 eliminated and so set to dead) then we do have to test to see if
26210 the function might contain a far jump. This test can lead to some
26211 false negatives, since before reload is completed, then length of
26212 branch instructions is not known, so gcc defaults to returning their
26213 longest length, which in turn sets the far jump attribute to true.
26214
26215 A false negative will not result in bad code being generated, but it
26216 will result in a needless push and pop of the link register. We
26217 hope that this does not occur too often.
26218
26219 If we need doubleword stack alignment this could affect the other
26220 elimination offsets so we can't risk getting it wrong. */
26221 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26222 cfun->machine->arg_pointer_live = 1;
26223 else if (!cfun->machine->arg_pointer_live)
26224 return 0;
26225 }
26226
26227 /* Check to see if the function contains a branch
26228 insn with the far jump attribute set. */
26229 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26230 {
26231 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26232 {
26233 far_jump = true;
26234 }
26235 func_size += get_attr_length (insn);
26236 }
26237
26238 /* Attribute far_jump will always be true for thumb1 before
26239 shorten_branch pass. So checking far_jump attribute before
26240 shorten_branch isn't much useful.
26241
26242 Following heuristic tries to estimate more accurately if a far jump
26243 may finally be used. The heuristic is very conservative as there is
26244 no chance to roll-back the decision of not to use far jump.
26245
26246 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26247 2-byte insn is associated with a 4 byte constant pool. Using
26248 function size 2048/3 as the threshold is conservative enough. */
26249 if (far_jump)
26250 {
26251 if ((func_size * 3) >= 2048)
26252 {
26253 /* Record the fact that we have decided that
26254 the function does use far jumps. */
26255 cfun->machine->far_jump_used = 1;
26256 return 1;
26257 }
26258 }
26259
26260 return 0;
26261 }
26262
26263 /* Return nonzero if FUNC must be entered in ARM mode. */
26264 int
26265 is_called_in_ARM_mode (tree func)
26266 {
26267 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26268
26269 /* Ignore the problem about functions whose address is taken. */
26270 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26271 return TRUE;
26272
26273 #ifdef ARM_PE
26274 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26275 #else
26276 return FALSE;
26277 #endif
26278 }
26279
26280 /* Given the stack offsets and register mask in OFFSETS, decide how
26281 many additional registers to push instead of subtracting a constant
26282 from SP. For epilogues the principle is the same except we use pop.
26283 FOR_PROLOGUE indicates which we're generating. */
26284 static int
26285 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26286 {
26287 HOST_WIDE_INT amount;
26288 unsigned long live_regs_mask = offsets->saved_regs_mask;
26289 /* Extract a mask of the ones we can give to the Thumb's push/pop
26290 instruction. */
26291 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26292 /* Then count how many other high registers will need to be pushed. */
26293 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26294 int n_free, reg_base, size;
26295
26296 if (!for_prologue && frame_pointer_needed)
26297 amount = offsets->locals_base - offsets->saved_regs;
26298 else
26299 amount = offsets->outgoing_args - offsets->saved_regs;
26300
26301 /* If the stack frame size is 512 exactly, we can save one load
26302 instruction, which should make this a win even when optimizing
26303 for speed. */
26304 if (!optimize_size && amount != 512)
26305 return 0;
26306
26307 /* Can't do this if there are high registers to push. */
26308 if (high_regs_pushed != 0)
26309 return 0;
26310
26311 /* Shouldn't do it in the prologue if no registers would normally
26312 be pushed at all. In the epilogue, also allow it if we'll have
26313 a pop insn for the PC. */
26314 if (l_mask == 0
26315 && (for_prologue
26316 || TARGET_BACKTRACE
26317 || (live_regs_mask & 1 << LR_REGNUM) == 0
26318 || TARGET_INTERWORK
26319 || crtl->args.pretend_args_size != 0))
26320 return 0;
26321
26322 /* Don't do this if thumb_expand_prologue wants to emit instructions
26323 between the push and the stack frame allocation. */
26324 if (for_prologue
26325 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26326 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26327 return 0;
26328
26329 reg_base = 0;
26330 n_free = 0;
26331 if (!for_prologue)
26332 {
26333 size = arm_size_return_regs ();
26334 reg_base = ARM_NUM_INTS (size);
26335 live_regs_mask >>= reg_base;
26336 }
26337
26338 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26339 && (for_prologue || call_used_regs[reg_base + n_free]))
26340 {
26341 live_regs_mask >>= 1;
26342 n_free++;
26343 }
26344
26345 if (n_free == 0)
26346 return 0;
26347 gcc_assert (amount / 4 * 4 == amount);
26348
26349 if (amount >= 512 && (amount - n_free * 4) < 512)
26350 return (amount - 508) / 4;
26351 if (amount <= n_free * 4)
26352 return amount / 4;
26353 return 0;
26354 }
26355
26356 /* The bits which aren't usefully expanded as rtl. */
26357 const char *
26358 thumb1_unexpanded_epilogue (void)
26359 {
26360 arm_stack_offsets *offsets;
26361 int regno;
26362 unsigned long live_regs_mask = 0;
26363 int high_regs_pushed = 0;
26364 int extra_pop;
26365 int had_to_push_lr;
26366 int size;
26367
26368 if (cfun->machine->return_used_this_function != 0)
26369 return "";
26370
26371 if (IS_NAKED (arm_current_func_type ()))
26372 return "";
26373
26374 offsets = arm_get_frame_offsets ();
26375 live_regs_mask = offsets->saved_regs_mask;
26376 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26377
26378 /* If we can deduce the registers used from the function's return value.
26379 This is more reliable that examining df_regs_ever_live_p () because that
26380 will be set if the register is ever used in the function, not just if
26381 the register is used to hold a return value. */
26382 size = arm_size_return_regs ();
26383
26384 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26385 if (extra_pop > 0)
26386 {
26387 unsigned long extra_mask = (1 << extra_pop) - 1;
26388 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26389 }
26390
26391 /* The prolog may have pushed some high registers to use as
26392 work registers. e.g. the testsuite file:
26393 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26394 compiles to produce:
26395 push {r4, r5, r6, r7, lr}
26396 mov r7, r9
26397 mov r6, r8
26398 push {r6, r7}
26399 as part of the prolog. We have to undo that pushing here. */
26400
26401 if (high_regs_pushed)
26402 {
26403 unsigned long mask = live_regs_mask & 0xff;
26404 int next_hi_reg;
26405
26406 /* The available low registers depend on the size of the value we are
26407 returning. */
26408 if (size <= 12)
26409 mask |= 1 << 3;
26410 if (size <= 8)
26411 mask |= 1 << 2;
26412
26413 if (mask == 0)
26414 /* Oh dear! We have no low registers into which we can pop
26415 high registers! */
26416 internal_error
26417 ("no low registers available for popping high registers");
26418
26419 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
26420 if (live_regs_mask & (1 << next_hi_reg))
26421 break;
26422
26423 while (high_regs_pushed)
26424 {
26425 /* Find lo register(s) into which the high register(s) can
26426 be popped. */
26427 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26428 {
26429 if (mask & (1 << regno))
26430 high_regs_pushed--;
26431 if (high_regs_pushed == 0)
26432 break;
26433 }
26434
26435 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
26436
26437 /* Pop the values into the low register(s). */
26438 thumb_pop (asm_out_file, mask);
26439
26440 /* Move the value(s) into the high registers. */
26441 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26442 {
26443 if (mask & (1 << regno))
26444 {
26445 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26446 regno);
26447
26448 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
26449 if (live_regs_mask & (1 << next_hi_reg))
26450 break;
26451 }
26452 }
26453 }
26454 live_regs_mask &= ~0x0f00;
26455 }
26456
26457 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26458 live_regs_mask &= 0xff;
26459
26460 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26461 {
26462 /* Pop the return address into the PC. */
26463 if (had_to_push_lr)
26464 live_regs_mask |= 1 << PC_REGNUM;
26465
26466 /* Either no argument registers were pushed or a backtrace
26467 structure was created which includes an adjusted stack
26468 pointer, so just pop everything. */
26469 if (live_regs_mask)
26470 thumb_pop (asm_out_file, live_regs_mask);
26471
26472 /* We have either just popped the return address into the
26473 PC or it is was kept in LR for the entire function.
26474 Note that thumb_pop has already called thumb_exit if the
26475 PC was in the list. */
26476 if (!had_to_push_lr)
26477 thumb_exit (asm_out_file, LR_REGNUM);
26478 }
26479 else
26480 {
26481 /* Pop everything but the return address. */
26482 if (live_regs_mask)
26483 thumb_pop (asm_out_file, live_regs_mask);
26484
26485 if (had_to_push_lr)
26486 {
26487 if (size > 12)
26488 {
26489 /* We have no free low regs, so save one. */
26490 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26491 LAST_ARG_REGNUM);
26492 }
26493
26494 /* Get the return address into a temporary register. */
26495 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26496
26497 if (size > 12)
26498 {
26499 /* Move the return address to lr. */
26500 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26501 LAST_ARG_REGNUM);
26502 /* Restore the low register. */
26503 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26504 IP_REGNUM);
26505 regno = LR_REGNUM;
26506 }
26507 else
26508 regno = LAST_ARG_REGNUM;
26509 }
26510 else
26511 regno = LR_REGNUM;
26512
26513 /* Remove the argument registers that were pushed onto the stack. */
26514 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26515 SP_REGNUM, SP_REGNUM,
26516 crtl->args.pretend_args_size);
26517
26518 thumb_exit (asm_out_file, regno);
26519 }
26520
26521 return "";
26522 }
26523
26524 /* Functions to save and restore machine-specific function data. */
26525 static struct machine_function *
26526 arm_init_machine_status (void)
26527 {
26528 struct machine_function *machine;
26529 machine = ggc_alloc_cleared_machine_function ();
26530
26531 #if ARM_FT_UNKNOWN != 0
26532 machine->func_type = ARM_FT_UNKNOWN;
26533 #endif
26534 return machine;
26535 }
26536
26537 /* Return an RTX indicating where the return address to the
26538 calling function can be found. */
26539 rtx
26540 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26541 {
26542 if (count != 0)
26543 return NULL_RTX;
26544
26545 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26546 }
26547
26548 /* Do anything needed before RTL is emitted for each function. */
26549 void
26550 arm_init_expanders (void)
26551 {
26552 /* Arrange to initialize and mark the machine per-function status. */
26553 init_machine_status = arm_init_machine_status;
26554
26555 /* This is to stop the combine pass optimizing away the alignment
26556 adjustment of va_arg. */
26557 /* ??? It is claimed that this should not be necessary. */
26558 if (cfun)
26559 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26560 }
26561
26562
26563 /* Like arm_compute_initial_elimination offset. Simpler because there
26564 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26565 to point at the base of the local variables after static stack
26566 space for a function has been allocated. */
26567
26568 HOST_WIDE_INT
26569 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
26570 {
26571 arm_stack_offsets *offsets;
26572
26573 offsets = arm_get_frame_offsets ();
26574
26575 switch (from)
26576 {
26577 case ARG_POINTER_REGNUM:
26578 switch (to)
26579 {
26580 case STACK_POINTER_REGNUM:
26581 return offsets->outgoing_args - offsets->saved_args;
26582
26583 case FRAME_POINTER_REGNUM:
26584 return offsets->soft_frame - offsets->saved_args;
26585
26586 case ARM_HARD_FRAME_POINTER_REGNUM:
26587 return offsets->saved_regs - offsets->saved_args;
26588
26589 case THUMB_HARD_FRAME_POINTER_REGNUM:
26590 return offsets->locals_base - offsets->saved_args;
26591
26592 default:
26593 gcc_unreachable ();
26594 }
26595 break;
26596
26597 case FRAME_POINTER_REGNUM:
26598 switch (to)
26599 {
26600 case STACK_POINTER_REGNUM:
26601 return offsets->outgoing_args - offsets->soft_frame;
26602
26603 case ARM_HARD_FRAME_POINTER_REGNUM:
26604 return offsets->saved_regs - offsets->soft_frame;
26605
26606 case THUMB_HARD_FRAME_POINTER_REGNUM:
26607 return offsets->locals_base - offsets->soft_frame;
26608
26609 default:
26610 gcc_unreachable ();
26611 }
26612 break;
26613
26614 default:
26615 gcc_unreachable ();
26616 }
26617 }
26618
26619 /* Generate the function's prologue. */
26620
26621 void
26622 thumb1_expand_prologue (void)
26623 {
26624 rtx insn;
26625
26626 HOST_WIDE_INT amount;
26627 arm_stack_offsets *offsets;
26628 unsigned long func_type;
26629 int regno;
26630 unsigned long live_regs_mask;
26631 unsigned long l_mask;
26632 unsigned high_regs_pushed = 0;
26633
26634 func_type = arm_current_func_type ();
26635
26636 /* Naked functions don't have prologues. */
26637 if (IS_NAKED (func_type))
26638 return;
26639
26640 if (IS_INTERRUPT (func_type))
26641 {
26642 error ("interrupt Service Routines cannot be coded in Thumb mode");
26643 return;
26644 }
26645
26646 if (is_called_in_ARM_mode (current_function_decl))
26647 emit_insn (gen_prologue_thumb1_interwork ());
26648
26649 offsets = arm_get_frame_offsets ();
26650 live_regs_mask = offsets->saved_regs_mask;
26651
26652 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26653 l_mask = live_regs_mask & 0x40ff;
26654 /* Then count how many other high registers will need to be pushed. */
26655 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26656
26657 if (crtl->args.pretend_args_size)
26658 {
26659 rtx x = GEN_INT (-crtl->args.pretend_args_size);
26660
26661 if (cfun->machine->uses_anonymous_args)
26662 {
26663 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
26664 unsigned long mask;
26665
26666 mask = 1ul << (LAST_ARG_REGNUM + 1);
26667 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
26668
26669 insn = thumb1_emit_multi_reg_push (mask, 0);
26670 }
26671 else
26672 {
26673 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26674 stack_pointer_rtx, x));
26675 }
26676 RTX_FRAME_RELATED_P (insn) = 1;
26677 }
26678
26679 if (TARGET_BACKTRACE)
26680 {
26681 HOST_WIDE_INT offset = 0;
26682 unsigned work_register;
26683 rtx work_reg, x, arm_hfp_rtx;
26684
26685 /* We have been asked to create a stack backtrace structure.
26686 The code looks like this:
26687
26688 0 .align 2
26689 0 func:
26690 0 sub SP, #16 Reserve space for 4 registers.
26691 2 push {R7} Push low registers.
26692 4 add R7, SP, #20 Get the stack pointer before the push.
26693 6 str R7, [SP, #8] Store the stack pointer
26694 (before reserving the space).
26695 8 mov R7, PC Get hold of the start of this code + 12.
26696 10 str R7, [SP, #16] Store it.
26697 12 mov R7, FP Get hold of the current frame pointer.
26698 14 str R7, [SP, #4] Store it.
26699 16 mov R7, LR Get hold of the current return address.
26700 18 str R7, [SP, #12] Store it.
26701 20 add R7, SP, #16 Point at the start of the
26702 backtrace structure.
26703 22 mov FP, R7 Put this value into the frame pointer. */
26704
26705 work_register = thumb_find_work_register (live_regs_mask);
26706 work_reg = gen_rtx_REG (SImode, work_register);
26707 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
26708
26709 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26710 stack_pointer_rtx, GEN_INT (-16)));
26711 RTX_FRAME_RELATED_P (insn) = 1;
26712
26713 if (l_mask)
26714 {
26715 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
26716 RTX_FRAME_RELATED_P (insn) = 1;
26717
26718 offset = bit_count (l_mask) * UNITS_PER_WORD;
26719 }
26720
26721 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
26722 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26723
26724 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
26725 x = gen_frame_mem (SImode, x);
26726 emit_move_insn (x, work_reg);
26727
26728 /* Make sure that the instruction fetching the PC is in the right place
26729 to calculate "start of backtrace creation code + 12". */
26730 /* ??? The stores using the common WORK_REG ought to be enough to
26731 prevent the scheduler from doing anything weird. Failing that
26732 we could always move all of the following into an UNSPEC_VOLATILE. */
26733 if (l_mask)
26734 {
26735 x = gen_rtx_REG (SImode, PC_REGNUM);
26736 emit_move_insn (work_reg, x);
26737
26738 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26739 x = gen_frame_mem (SImode, x);
26740 emit_move_insn (x, work_reg);
26741
26742 emit_move_insn (work_reg, arm_hfp_rtx);
26743
26744 x = plus_constant (Pmode, stack_pointer_rtx, offset);
26745 x = gen_frame_mem (SImode, x);
26746 emit_move_insn (x, work_reg);
26747 }
26748 else
26749 {
26750 emit_move_insn (work_reg, arm_hfp_rtx);
26751
26752 x = plus_constant (Pmode, stack_pointer_rtx, offset);
26753 x = gen_frame_mem (SImode, x);
26754 emit_move_insn (x, work_reg);
26755
26756 x = gen_rtx_REG (SImode, PC_REGNUM);
26757 emit_move_insn (work_reg, x);
26758
26759 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26760 x = gen_frame_mem (SImode, x);
26761 emit_move_insn (x, work_reg);
26762 }
26763
26764 x = gen_rtx_REG (SImode, LR_REGNUM);
26765 emit_move_insn (work_reg, x);
26766
26767 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
26768 x = gen_frame_mem (SImode, x);
26769 emit_move_insn (x, work_reg);
26770
26771 x = GEN_INT (offset + 12);
26772 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26773
26774 emit_move_insn (arm_hfp_rtx, work_reg);
26775 }
26776 /* Optimization: If we are not pushing any low registers but we are going
26777 to push some high registers then delay our first push. This will just
26778 be a push of LR and we can combine it with the push of the first high
26779 register. */
26780 else if ((l_mask & 0xff) != 0
26781 || (high_regs_pushed == 0 && l_mask))
26782 {
26783 unsigned long mask = l_mask;
26784 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
26785 insn = thumb1_emit_multi_reg_push (mask, mask);
26786 RTX_FRAME_RELATED_P (insn) = 1;
26787 }
26788
26789 if (high_regs_pushed)
26790 {
26791 unsigned pushable_regs;
26792 unsigned next_hi_reg;
26793 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
26794 : crtl->args.info.nregs;
26795 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
26796
26797 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26798 if (live_regs_mask & (1 << next_hi_reg))
26799 break;
26800
26801 /* Here we need to mask out registers used for passing arguments
26802 even if they can be pushed. This is to avoid using them to stash the high
26803 registers. Such kind of stash may clobber the use of arguments. */
26804 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
26805
26806 if (pushable_regs == 0)
26807 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
26808
26809 while (high_regs_pushed > 0)
26810 {
26811 unsigned long real_regs_mask = 0;
26812
26813 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
26814 {
26815 if (pushable_regs & (1 << regno))
26816 {
26817 emit_move_insn (gen_rtx_REG (SImode, regno),
26818 gen_rtx_REG (SImode, next_hi_reg));
26819
26820 high_regs_pushed --;
26821 real_regs_mask |= (1 << next_hi_reg);
26822
26823 if (high_regs_pushed)
26824 {
26825 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
26826 next_hi_reg --)
26827 if (live_regs_mask & (1 << next_hi_reg))
26828 break;
26829 }
26830 else
26831 {
26832 pushable_regs &= ~((1 << regno) - 1);
26833 break;
26834 }
26835 }
26836 }
26837
26838 /* If we had to find a work register and we have not yet
26839 saved the LR then add it to the list of regs to push. */
26840 if (l_mask == (1 << LR_REGNUM))
26841 {
26842 pushable_regs |= l_mask;
26843 real_regs_mask |= l_mask;
26844 l_mask = 0;
26845 }
26846
26847 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
26848 RTX_FRAME_RELATED_P (insn) = 1;
26849 }
26850 }
26851
26852 /* Load the pic register before setting the frame pointer,
26853 so we can use r7 as a temporary work register. */
26854 if (flag_pic && arm_pic_register != INVALID_REGNUM)
26855 arm_load_pic_register (live_regs_mask);
26856
26857 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
26858 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
26859 stack_pointer_rtx);
26860
26861 if (flag_stack_usage_info)
26862 current_function_static_stack_size
26863 = offsets->outgoing_args - offsets->saved_args;
26864
26865 amount = offsets->outgoing_args - offsets->saved_regs;
26866 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
26867 if (amount)
26868 {
26869 if (amount < 512)
26870 {
26871 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
26872 GEN_INT (- amount)));
26873 RTX_FRAME_RELATED_P (insn) = 1;
26874 }
26875 else
26876 {
26877 rtx reg, dwarf;
26878
26879 /* The stack decrement is too big for an immediate value in a single
26880 insn. In theory we could issue multiple subtracts, but after
26881 three of them it becomes more space efficient to place the full
26882 value in the constant pool and load into a register. (Also the
26883 ARM debugger really likes to see only one stack decrement per
26884 function). So instead we look for a scratch register into which
26885 we can load the decrement, and then we subtract this from the
26886 stack pointer. Unfortunately on the thumb the only available
26887 scratch registers are the argument registers, and we cannot use
26888 these as they may hold arguments to the function. Instead we
26889 attempt to locate a call preserved register which is used by this
26890 function. If we can find one, then we know that it will have
26891 been pushed at the start of the prologue and so we can corrupt
26892 it now. */
26893 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
26894 if (live_regs_mask & (1 << regno))
26895 break;
26896
26897 gcc_assert(regno <= LAST_LO_REGNUM);
26898
26899 reg = gen_rtx_REG (SImode, regno);
26900
26901 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
26902
26903 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26904 stack_pointer_rtx, reg));
26905
26906 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26907 plus_constant (Pmode, stack_pointer_rtx,
26908 -amount));
26909 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
26910 RTX_FRAME_RELATED_P (insn) = 1;
26911 }
26912 }
26913
26914 if (frame_pointer_needed)
26915 thumb_set_frame_pointer (offsets);
26916
26917 /* If we are profiling, make sure no instructions are scheduled before
26918 the call to mcount. Similarly if the user has requested no
26919 scheduling in the prolog. Similarly if we want non-call exceptions
26920 using the EABI unwinder, to prevent faulting instructions from being
26921 swapped with a stack adjustment. */
26922 if (crtl->profile || !TARGET_SCHED_PROLOG
26923 || (arm_except_unwind_info (&global_options) == UI_TARGET
26924 && cfun->can_throw_non_call_exceptions))
26925 emit_insn (gen_blockage ());
26926
26927 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
26928 if (live_regs_mask & 0xff)
26929 cfun->machine->lr_save_eliminated = 0;
26930 }
26931
26932 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
26933 POP instruction can be generated. LR should be replaced by PC. All
26934 the checks required are already done by USE_RETURN_INSN (). Hence,
26935 all we really need to check here is if single register is to be
26936 returned, or multiple register return. */
26937 void
26938 thumb2_expand_return (bool simple_return)
26939 {
26940 int i, num_regs;
26941 unsigned long saved_regs_mask;
26942 arm_stack_offsets *offsets;
26943
26944 offsets = arm_get_frame_offsets ();
26945 saved_regs_mask = offsets->saved_regs_mask;
26946
26947 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
26948 if (saved_regs_mask & (1 << i))
26949 num_regs++;
26950
26951 if (!simple_return && saved_regs_mask)
26952 {
26953 if (num_regs == 1)
26954 {
26955 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
26956 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
26957 rtx addr = gen_rtx_MEM (SImode,
26958 gen_rtx_POST_INC (SImode,
26959 stack_pointer_rtx));
26960 set_mem_alias_set (addr, get_frame_alias_set ());
26961 XVECEXP (par, 0, 0) = ret_rtx;
26962 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
26963 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
26964 emit_jump_insn (par);
26965 }
26966 else
26967 {
26968 saved_regs_mask &= ~ (1 << LR_REGNUM);
26969 saved_regs_mask |= (1 << PC_REGNUM);
26970 arm_emit_multi_reg_pop (saved_regs_mask);
26971 }
26972 }
26973 else
26974 {
26975 emit_jump_insn (simple_return_rtx);
26976 }
26977 }
26978
26979 void
26980 thumb1_expand_epilogue (void)
26981 {
26982 HOST_WIDE_INT amount;
26983 arm_stack_offsets *offsets;
26984 int regno;
26985
26986 /* Naked functions don't have prologues. */
26987 if (IS_NAKED (arm_current_func_type ()))
26988 return;
26989
26990 offsets = arm_get_frame_offsets ();
26991 amount = offsets->outgoing_args - offsets->saved_regs;
26992
26993 if (frame_pointer_needed)
26994 {
26995 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
26996 amount = offsets->locals_base - offsets->saved_regs;
26997 }
26998 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
26999
27000 gcc_assert (amount >= 0);
27001 if (amount)
27002 {
27003 emit_insn (gen_blockage ());
27004
27005 if (amount < 512)
27006 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27007 GEN_INT (amount)));
27008 else
27009 {
27010 /* r3 is always free in the epilogue. */
27011 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27012
27013 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27014 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27015 }
27016 }
27017
27018 /* Emit a USE (stack_pointer_rtx), so that
27019 the stack adjustment will not be deleted. */
27020 emit_insn (gen_force_register_use (stack_pointer_rtx));
27021
27022 if (crtl->profile || !TARGET_SCHED_PROLOG)
27023 emit_insn (gen_blockage ());
27024
27025 /* Emit a clobber for each insn that will be restored in the epilogue,
27026 so that flow2 will get register lifetimes correct. */
27027 for (regno = 0; regno < 13; regno++)
27028 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
27029 emit_clobber (gen_rtx_REG (SImode, regno));
27030
27031 if (! df_regs_ever_live_p (LR_REGNUM))
27032 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27033 }
27034
27035 /* Epilogue code for APCS frame. */
27036 static void
27037 arm_expand_epilogue_apcs_frame (bool really_return)
27038 {
27039 unsigned long func_type;
27040 unsigned long saved_regs_mask;
27041 int num_regs = 0;
27042 int i;
27043 int floats_from_frame = 0;
27044 arm_stack_offsets *offsets;
27045
27046 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27047 func_type = arm_current_func_type ();
27048
27049 /* Get frame offsets for ARM. */
27050 offsets = arm_get_frame_offsets ();
27051 saved_regs_mask = offsets->saved_regs_mask;
27052
27053 /* Find the offset of the floating-point save area in the frame. */
27054 floats_from_frame
27055 = (offsets->saved_args
27056 + arm_compute_static_chain_stack_bytes ()
27057 - offsets->frame);
27058
27059 /* Compute how many core registers saved and how far away the floats are. */
27060 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27061 if (saved_regs_mask & (1 << i))
27062 {
27063 num_regs++;
27064 floats_from_frame += 4;
27065 }
27066
27067 if (TARGET_HARD_FLOAT && TARGET_VFP)
27068 {
27069 int start_reg;
27070
27071 /* The offset is from IP_REGNUM. */
27072 int saved_size = arm_get_vfp_saved_size ();
27073 if (saved_size > 0)
27074 {
27075 floats_from_frame += saved_size;
27076 emit_insn (gen_addsi3 (gen_rtx_REG (SImode, IP_REGNUM),
27077 hard_frame_pointer_rtx,
27078 GEN_INT (-floats_from_frame)));
27079 }
27080
27081 /* Generate VFP register multi-pop. */
27082 start_reg = FIRST_VFP_REGNUM;
27083
27084 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27085 /* Look for a case where a reg does not need restoring. */
27086 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27087 && (!df_regs_ever_live_p (i + 1)
27088 || call_used_regs[i + 1]))
27089 {
27090 if (start_reg != i)
27091 arm_emit_vfp_multi_reg_pop (start_reg,
27092 (i - start_reg) / 2,
27093 gen_rtx_REG (SImode,
27094 IP_REGNUM));
27095 start_reg = i + 2;
27096 }
27097
27098 /* Restore the remaining regs that we have discovered (or possibly
27099 even all of them, if the conditional in the for loop never
27100 fired). */
27101 if (start_reg != i)
27102 arm_emit_vfp_multi_reg_pop (start_reg,
27103 (i - start_reg) / 2,
27104 gen_rtx_REG (SImode, IP_REGNUM));
27105 }
27106
27107 if (TARGET_IWMMXT)
27108 {
27109 /* The frame pointer is guaranteed to be non-double-word aligned, as
27110 it is set to double-word-aligned old_stack_pointer - 4. */
27111 rtx insn;
27112 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27113
27114 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27115 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27116 {
27117 rtx addr = gen_frame_mem (V2SImode,
27118 plus_constant (Pmode, hard_frame_pointer_rtx,
27119 - lrm_count * 4));
27120 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27121 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27122 gen_rtx_REG (V2SImode, i),
27123 NULL_RTX);
27124 lrm_count += 2;
27125 }
27126 }
27127
27128 /* saved_regs_mask should contain IP which contains old stack pointer
27129 at the time of activation creation. Since SP and IP are adjacent registers,
27130 we can restore the value directly into SP. */
27131 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27132 saved_regs_mask &= ~(1 << IP_REGNUM);
27133 saved_regs_mask |= (1 << SP_REGNUM);
27134
27135 /* There are two registers left in saved_regs_mask - LR and PC. We
27136 only need to restore LR (the return address), but to
27137 save time we can load it directly into PC, unless we need a
27138 special function exit sequence, or we are not really returning. */
27139 if (really_return
27140 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27141 && !crtl->calls_eh_return)
27142 /* Delete LR from the register mask, so that LR on
27143 the stack is loaded into the PC in the register mask. */
27144 saved_regs_mask &= ~(1 << LR_REGNUM);
27145 else
27146 saved_regs_mask &= ~(1 << PC_REGNUM);
27147
27148 num_regs = bit_count (saved_regs_mask);
27149 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27150 {
27151 emit_insn (gen_blockage ());
27152 /* Unwind the stack to just below the saved registers. */
27153 emit_insn (gen_addsi3 (stack_pointer_rtx,
27154 hard_frame_pointer_rtx,
27155 GEN_INT (- 4 * num_regs)));
27156 }
27157
27158 arm_emit_multi_reg_pop (saved_regs_mask);
27159
27160 if (IS_INTERRUPT (func_type))
27161 {
27162 /* Interrupt handlers will have pushed the
27163 IP onto the stack, so restore it now. */
27164 rtx insn;
27165 rtx addr = gen_rtx_MEM (SImode,
27166 gen_rtx_POST_INC (SImode,
27167 stack_pointer_rtx));
27168 set_mem_alias_set (addr, get_frame_alias_set ());
27169 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27170 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27171 gen_rtx_REG (SImode, IP_REGNUM),
27172 NULL_RTX);
27173 }
27174
27175 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27176 return;
27177
27178 if (crtl->calls_eh_return)
27179 emit_insn (gen_addsi3 (stack_pointer_rtx,
27180 stack_pointer_rtx,
27181 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27182
27183 if (IS_STACKALIGN (func_type))
27184 /* Restore the original stack pointer. Before prologue, the stack was
27185 realigned and the original stack pointer saved in r0. For details,
27186 see comment in arm_expand_prologue. */
27187 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27188
27189 emit_jump_insn (simple_return_rtx);
27190 }
27191
27192 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27193 function is not a sibcall. */
27194 void
27195 arm_expand_epilogue (bool really_return)
27196 {
27197 unsigned long func_type;
27198 unsigned long saved_regs_mask;
27199 int num_regs = 0;
27200 int i;
27201 int amount;
27202 arm_stack_offsets *offsets;
27203
27204 func_type = arm_current_func_type ();
27205
27206 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27207 let output_return_instruction take care of instruction emission if any. */
27208 if (IS_NAKED (func_type)
27209 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27210 {
27211 if (really_return)
27212 emit_jump_insn (simple_return_rtx);
27213 return;
27214 }
27215
27216 /* If we are throwing an exception, then we really must be doing a
27217 return, so we can't tail-call. */
27218 gcc_assert (!crtl->calls_eh_return || really_return);
27219
27220 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27221 {
27222 arm_expand_epilogue_apcs_frame (really_return);
27223 return;
27224 }
27225
27226 /* Get frame offsets for ARM. */
27227 offsets = arm_get_frame_offsets ();
27228 saved_regs_mask = offsets->saved_regs_mask;
27229 num_regs = bit_count (saved_regs_mask);
27230
27231 if (frame_pointer_needed)
27232 {
27233 rtx insn;
27234 /* Restore stack pointer if necessary. */
27235 if (TARGET_ARM)
27236 {
27237 /* In ARM mode, frame pointer points to first saved register.
27238 Restore stack pointer to last saved register. */
27239 amount = offsets->frame - offsets->saved_regs;
27240
27241 /* Force out any pending memory operations that reference stacked data
27242 before stack de-allocation occurs. */
27243 emit_insn (gen_blockage ());
27244 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27245 hard_frame_pointer_rtx,
27246 GEN_INT (amount)));
27247 arm_add_cfa_adjust_cfa_note (insn, amount,
27248 stack_pointer_rtx,
27249 hard_frame_pointer_rtx);
27250
27251 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27252 deleted. */
27253 emit_insn (gen_force_register_use (stack_pointer_rtx));
27254 }
27255 else
27256 {
27257 /* In Thumb-2 mode, the frame pointer points to the last saved
27258 register. */
27259 amount = offsets->locals_base - offsets->saved_regs;
27260 if (amount)
27261 {
27262 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27263 hard_frame_pointer_rtx,
27264 GEN_INT (amount)));
27265 arm_add_cfa_adjust_cfa_note (insn, amount,
27266 hard_frame_pointer_rtx,
27267 hard_frame_pointer_rtx);
27268 }
27269
27270 /* Force out any pending memory operations that reference stacked data
27271 before stack de-allocation occurs. */
27272 emit_insn (gen_blockage ());
27273 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27274 hard_frame_pointer_rtx));
27275 arm_add_cfa_adjust_cfa_note (insn, 0,
27276 stack_pointer_rtx,
27277 hard_frame_pointer_rtx);
27278 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27279 deleted. */
27280 emit_insn (gen_force_register_use (stack_pointer_rtx));
27281 }
27282 }
27283 else
27284 {
27285 /* Pop off outgoing args and local frame to adjust stack pointer to
27286 last saved register. */
27287 amount = offsets->outgoing_args - offsets->saved_regs;
27288 if (amount)
27289 {
27290 rtx tmp;
27291 /* Force out any pending memory operations that reference stacked data
27292 before stack de-allocation occurs. */
27293 emit_insn (gen_blockage ());
27294 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27295 stack_pointer_rtx,
27296 GEN_INT (amount)));
27297 arm_add_cfa_adjust_cfa_note (tmp, amount,
27298 stack_pointer_rtx, stack_pointer_rtx);
27299 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27300 not deleted. */
27301 emit_insn (gen_force_register_use (stack_pointer_rtx));
27302 }
27303 }
27304
27305 if (TARGET_HARD_FLOAT && TARGET_VFP)
27306 {
27307 /* Generate VFP register multi-pop. */
27308 int end_reg = LAST_VFP_REGNUM + 1;
27309
27310 /* Scan the registers in reverse order. We need to match
27311 any groupings made in the prologue and generate matching
27312 vldm operations. The need to match groups is because,
27313 unlike pop, vldm can only do consecutive regs. */
27314 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27315 /* Look for a case where a reg does not need restoring. */
27316 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27317 && (!df_regs_ever_live_p (i + 1)
27318 || call_used_regs[i + 1]))
27319 {
27320 /* Restore the regs discovered so far (from reg+2 to
27321 end_reg). */
27322 if (end_reg > i + 2)
27323 arm_emit_vfp_multi_reg_pop (i + 2,
27324 (end_reg - (i + 2)) / 2,
27325 stack_pointer_rtx);
27326 end_reg = i;
27327 }
27328
27329 /* Restore the remaining regs that we have discovered (or possibly
27330 even all of them, if the conditional in the for loop never
27331 fired). */
27332 if (end_reg > i + 2)
27333 arm_emit_vfp_multi_reg_pop (i + 2,
27334 (end_reg - (i + 2)) / 2,
27335 stack_pointer_rtx);
27336 }
27337
27338 if (TARGET_IWMMXT)
27339 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27340 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27341 {
27342 rtx insn;
27343 rtx addr = gen_rtx_MEM (V2SImode,
27344 gen_rtx_POST_INC (SImode,
27345 stack_pointer_rtx));
27346 set_mem_alias_set (addr, get_frame_alias_set ());
27347 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27348 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27349 gen_rtx_REG (V2SImode, i),
27350 NULL_RTX);
27351 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27352 stack_pointer_rtx, stack_pointer_rtx);
27353 }
27354
27355 if (saved_regs_mask)
27356 {
27357 rtx insn;
27358 bool return_in_pc = false;
27359
27360 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27361 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27362 && !IS_STACKALIGN (func_type)
27363 && really_return
27364 && crtl->args.pretend_args_size == 0
27365 && saved_regs_mask & (1 << LR_REGNUM)
27366 && !crtl->calls_eh_return)
27367 {
27368 saved_regs_mask &= ~(1 << LR_REGNUM);
27369 saved_regs_mask |= (1 << PC_REGNUM);
27370 return_in_pc = true;
27371 }
27372
27373 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27374 {
27375 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27376 if (saved_regs_mask & (1 << i))
27377 {
27378 rtx addr = gen_rtx_MEM (SImode,
27379 gen_rtx_POST_INC (SImode,
27380 stack_pointer_rtx));
27381 set_mem_alias_set (addr, get_frame_alias_set ());
27382
27383 if (i == PC_REGNUM)
27384 {
27385 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27386 XVECEXP (insn, 0, 0) = ret_rtx;
27387 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
27388 gen_rtx_REG (SImode, i),
27389 addr);
27390 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27391 insn = emit_jump_insn (insn);
27392 }
27393 else
27394 {
27395 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27396 addr));
27397 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27398 gen_rtx_REG (SImode, i),
27399 NULL_RTX);
27400 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27401 stack_pointer_rtx,
27402 stack_pointer_rtx);
27403 }
27404 }
27405 }
27406 else
27407 {
27408 if (TARGET_LDRD
27409 && current_tune->prefer_ldrd_strd
27410 && !optimize_function_for_size_p (cfun))
27411 {
27412 if (TARGET_THUMB2)
27413 thumb2_emit_ldrd_pop (saved_regs_mask);
27414 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27415 arm_emit_ldrd_pop (saved_regs_mask);
27416 else
27417 arm_emit_multi_reg_pop (saved_regs_mask);
27418 }
27419 else
27420 arm_emit_multi_reg_pop (saved_regs_mask);
27421 }
27422
27423 if (return_in_pc == true)
27424 return;
27425 }
27426
27427 if (crtl->args.pretend_args_size)
27428 {
27429 int i, j;
27430 rtx dwarf = NULL_RTX;
27431 rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27432 stack_pointer_rtx,
27433 GEN_INT (crtl->args.pretend_args_size)));
27434
27435 RTX_FRAME_RELATED_P (tmp) = 1;
27436
27437 if (cfun->machine->uses_anonymous_args)
27438 {
27439 /* Restore pretend args. Refer arm_expand_prologue on how to save
27440 pretend_args in stack. */
27441 int num_regs = crtl->args.pretend_args_size / 4;
27442 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27443 for (j = 0, i = 0; j < num_regs; i++)
27444 if (saved_regs_mask & (1 << i))
27445 {
27446 rtx reg = gen_rtx_REG (SImode, i);
27447 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27448 j++;
27449 }
27450 REG_NOTES (tmp) = dwarf;
27451 }
27452 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
27453 stack_pointer_rtx, stack_pointer_rtx);
27454 }
27455
27456 if (!really_return)
27457 return;
27458
27459 if (crtl->calls_eh_return)
27460 emit_insn (gen_addsi3 (stack_pointer_rtx,
27461 stack_pointer_rtx,
27462 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27463
27464 if (IS_STACKALIGN (func_type))
27465 /* Restore the original stack pointer. Before prologue, the stack was
27466 realigned and the original stack pointer saved in r0. For details,
27467 see comment in arm_expand_prologue. */
27468 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27469
27470 emit_jump_insn (simple_return_rtx);
27471 }
27472
27473 /* Implementation of insn prologue_thumb1_interwork. This is the first
27474 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27475
27476 const char *
27477 thumb1_output_interwork (void)
27478 {
27479 const char * name;
27480 FILE *f = asm_out_file;
27481
27482 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
27483 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
27484 == SYMBOL_REF);
27485 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27486
27487 /* Generate code sequence to switch us into Thumb mode. */
27488 /* The .code 32 directive has already been emitted by
27489 ASM_DECLARE_FUNCTION_NAME. */
27490 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
27491 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
27492
27493 /* Generate a label, so that the debugger will notice the
27494 change in instruction sets. This label is also used by
27495 the assembler to bypass the ARM code when this function
27496 is called from a Thumb encoded function elsewhere in the
27497 same file. Hence the definition of STUB_NAME here must
27498 agree with the definition in gas/config/tc-arm.c. */
27499
27500 #define STUB_NAME ".real_start_of"
27501
27502 fprintf (f, "\t.code\t16\n");
27503 #ifdef ARM_PE
27504 if (arm_dllexport_name_p (name))
27505 name = arm_strip_name_encoding (name);
27506 #endif
27507 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
27508 fprintf (f, "\t.thumb_func\n");
27509 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
27510
27511 return "";
27512 }
27513
27514 /* Handle the case of a double word load into a low register from
27515 a computed memory address. The computed address may involve a
27516 register which is overwritten by the load. */
27517 const char *
27518 thumb_load_double_from_address (rtx *operands)
27519 {
27520 rtx addr;
27521 rtx base;
27522 rtx offset;
27523 rtx arg1;
27524 rtx arg2;
27525
27526 gcc_assert (REG_P (operands[0]));
27527 gcc_assert (MEM_P (operands[1]));
27528
27529 /* Get the memory address. */
27530 addr = XEXP (operands[1], 0);
27531
27532 /* Work out how the memory address is computed. */
27533 switch (GET_CODE (addr))
27534 {
27535 case REG:
27536 operands[2] = adjust_address (operands[1], SImode, 4);
27537
27538 if (REGNO (operands[0]) == REGNO (addr))
27539 {
27540 output_asm_insn ("ldr\t%H0, %2", operands);
27541 output_asm_insn ("ldr\t%0, %1", operands);
27542 }
27543 else
27544 {
27545 output_asm_insn ("ldr\t%0, %1", operands);
27546 output_asm_insn ("ldr\t%H0, %2", operands);
27547 }
27548 break;
27549
27550 case CONST:
27551 /* Compute <address> + 4 for the high order load. */
27552 operands[2] = adjust_address (operands[1], SImode, 4);
27553
27554 output_asm_insn ("ldr\t%0, %1", operands);
27555 output_asm_insn ("ldr\t%H0, %2", operands);
27556 break;
27557
27558 case PLUS:
27559 arg1 = XEXP (addr, 0);
27560 arg2 = XEXP (addr, 1);
27561
27562 if (CONSTANT_P (arg1))
27563 base = arg2, offset = arg1;
27564 else
27565 base = arg1, offset = arg2;
27566
27567 gcc_assert (REG_P (base));
27568
27569 /* Catch the case of <address> = <reg> + <reg> */
27570 if (REG_P (offset))
27571 {
27572 int reg_offset = REGNO (offset);
27573 int reg_base = REGNO (base);
27574 int reg_dest = REGNO (operands[0]);
27575
27576 /* Add the base and offset registers together into the
27577 higher destination register. */
27578 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
27579 reg_dest + 1, reg_base, reg_offset);
27580
27581 /* Load the lower destination register from the address in
27582 the higher destination register. */
27583 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
27584 reg_dest, reg_dest + 1);
27585
27586 /* Load the higher destination register from its own address
27587 plus 4. */
27588 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
27589 reg_dest + 1, reg_dest + 1);
27590 }
27591 else
27592 {
27593 /* Compute <address> + 4 for the high order load. */
27594 operands[2] = adjust_address (operands[1], SImode, 4);
27595
27596 /* If the computed address is held in the low order register
27597 then load the high order register first, otherwise always
27598 load the low order register first. */
27599 if (REGNO (operands[0]) == REGNO (base))
27600 {
27601 output_asm_insn ("ldr\t%H0, %2", operands);
27602 output_asm_insn ("ldr\t%0, %1", operands);
27603 }
27604 else
27605 {
27606 output_asm_insn ("ldr\t%0, %1", operands);
27607 output_asm_insn ("ldr\t%H0, %2", operands);
27608 }
27609 }
27610 break;
27611
27612 case LABEL_REF:
27613 /* With no registers to worry about we can just load the value
27614 directly. */
27615 operands[2] = adjust_address (operands[1], SImode, 4);
27616
27617 output_asm_insn ("ldr\t%H0, %2", operands);
27618 output_asm_insn ("ldr\t%0, %1", operands);
27619 break;
27620
27621 default:
27622 gcc_unreachable ();
27623 }
27624
27625 return "";
27626 }
27627
27628 const char *
27629 thumb_output_move_mem_multiple (int n, rtx *operands)
27630 {
27631 rtx tmp;
27632
27633 switch (n)
27634 {
27635 case 2:
27636 if (REGNO (operands[4]) > REGNO (operands[5]))
27637 {
27638 tmp = operands[4];
27639 operands[4] = operands[5];
27640 operands[5] = tmp;
27641 }
27642 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
27643 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
27644 break;
27645
27646 case 3:
27647 if (REGNO (operands[4]) > REGNO (operands[5]))
27648 {
27649 tmp = operands[4];
27650 operands[4] = operands[5];
27651 operands[5] = tmp;
27652 }
27653 if (REGNO (operands[5]) > REGNO (operands[6]))
27654 {
27655 tmp = operands[5];
27656 operands[5] = operands[6];
27657 operands[6] = tmp;
27658 }
27659 if (REGNO (operands[4]) > REGNO (operands[5]))
27660 {
27661 tmp = operands[4];
27662 operands[4] = operands[5];
27663 operands[5] = tmp;
27664 }
27665
27666 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
27667 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
27668 break;
27669
27670 default:
27671 gcc_unreachable ();
27672 }
27673
27674 return "";
27675 }
27676
27677 /* Output a call-via instruction for thumb state. */
27678 const char *
27679 thumb_call_via_reg (rtx reg)
27680 {
27681 int regno = REGNO (reg);
27682 rtx *labelp;
27683
27684 gcc_assert (regno < LR_REGNUM);
27685
27686 /* If we are in the normal text section we can use a single instance
27687 per compilation unit. If we are doing function sections, then we need
27688 an entry per section, since we can't rely on reachability. */
27689 if (in_section == text_section)
27690 {
27691 thumb_call_reg_needed = 1;
27692
27693 if (thumb_call_via_label[regno] == NULL)
27694 thumb_call_via_label[regno] = gen_label_rtx ();
27695 labelp = thumb_call_via_label + regno;
27696 }
27697 else
27698 {
27699 if (cfun->machine->call_via[regno] == NULL)
27700 cfun->machine->call_via[regno] = gen_label_rtx ();
27701 labelp = cfun->machine->call_via + regno;
27702 }
27703
27704 output_asm_insn ("bl\t%a0", labelp);
27705 return "";
27706 }
27707
27708 /* Routines for generating rtl. */
27709 void
27710 thumb_expand_movmemqi (rtx *operands)
27711 {
27712 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
27713 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
27714 HOST_WIDE_INT len = INTVAL (operands[2]);
27715 HOST_WIDE_INT offset = 0;
27716
27717 while (len >= 12)
27718 {
27719 emit_insn (gen_movmem12b (out, in, out, in));
27720 len -= 12;
27721 }
27722
27723 if (len >= 8)
27724 {
27725 emit_insn (gen_movmem8b (out, in, out, in));
27726 len -= 8;
27727 }
27728
27729 if (len >= 4)
27730 {
27731 rtx reg = gen_reg_rtx (SImode);
27732 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
27733 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
27734 len -= 4;
27735 offset += 4;
27736 }
27737
27738 if (len >= 2)
27739 {
27740 rtx reg = gen_reg_rtx (HImode);
27741 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
27742 plus_constant (Pmode, in,
27743 offset))));
27744 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
27745 offset)),
27746 reg));
27747 len -= 2;
27748 offset += 2;
27749 }
27750
27751 if (len)
27752 {
27753 rtx reg = gen_reg_rtx (QImode);
27754 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
27755 plus_constant (Pmode, in,
27756 offset))));
27757 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
27758 offset)),
27759 reg));
27760 }
27761 }
27762
27763 void
27764 thumb_reload_out_hi (rtx *operands)
27765 {
27766 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
27767 }
27768
27769 /* Handle reading a half-word from memory during reload. */
27770 void
27771 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
27772 {
27773 gcc_unreachable ();
27774 }
27775
27776 /* Return the length of a function name prefix
27777 that starts with the character 'c'. */
27778 static int
27779 arm_get_strip_length (int c)
27780 {
27781 switch (c)
27782 {
27783 ARM_NAME_ENCODING_LENGTHS
27784 default: return 0;
27785 }
27786 }
27787
27788 /* Return a pointer to a function's name with any
27789 and all prefix encodings stripped from it. */
27790 const char *
27791 arm_strip_name_encoding (const char *name)
27792 {
27793 int skip;
27794
27795 while ((skip = arm_get_strip_length (* name)))
27796 name += skip;
27797
27798 return name;
27799 }
27800
27801 /* If there is a '*' anywhere in the name's prefix, then
27802 emit the stripped name verbatim, otherwise prepend an
27803 underscore if leading underscores are being used. */
27804 void
27805 arm_asm_output_labelref (FILE *stream, const char *name)
27806 {
27807 int skip;
27808 int verbatim = 0;
27809
27810 while ((skip = arm_get_strip_length (* name)))
27811 {
27812 verbatim |= (*name == '*');
27813 name += skip;
27814 }
27815
27816 if (verbatim)
27817 fputs (name, stream);
27818 else
27819 asm_fprintf (stream, "%U%s", name);
27820 }
27821
27822 /* This function is used to emit an EABI tag and its associated value.
27823 We emit the numerical value of the tag in case the assembler does not
27824 support textual tags. (Eg gas prior to 2.20). If requested we include
27825 the tag name in a comment so that anyone reading the assembler output
27826 will know which tag is being set.
27827
27828 This function is not static because arm-c.c needs it too. */
27829
27830 void
27831 arm_emit_eabi_attribute (const char *name, int num, int val)
27832 {
27833 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
27834 if (flag_verbose_asm || flag_debug_asm)
27835 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
27836 asm_fprintf (asm_out_file, "\n");
27837 }
27838
27839 static void
27840 arm_file_start (void)
27841 {
27842 int val;
27843
27844 if (TARGET_UNIFIED_ASM)
27845 asm_fprintf (asm_out_file, "\t.syntax unified\n");
27846
27847 if (TARGET_BPABI)
27848 {
27849 const char *fpu_name;
27850 if (arm_selected_arch)
27851 {
27852 const char* pos = strchr (arm_selected_arch->name, '+');
27853 if (pos)
27854 {
27855 char buf[15];
27856 gcc_assert (strlen (arm_selected_arch->name)
27857 <= sizeof (buf) / sizeof (*pos));
27858 strncpy (buf, arm_selected_arch->name,
27859 (pos - arm_selected_arch->name) * sizeof (*pos));
27860 buf[pos - arm_selected_arch->name] = '\0';
27861 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
27862 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
27863 }
27864 else
27865 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
27866 }
27867 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
27868 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
27869 else
27870 {
27871 const char* truncated_name
27872 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
27873 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
27874 }
27875
27876 if (TARGET_SOFT_FLOAT)
27877 {
27878 fpu_name = "softvfp";
27879 }
27880 else
27881 {
27882 fpu_name = arm_fpu_desc->name;
27883 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
27884 {
27885 if (TARGET_HARD_FLOAT)
27886 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
27887 if (TARGET_HARD_FLOAT_ABI)
27888 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
27889 }
27890 }
27891 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
27892
27893 /* Some of these attributes only apply when the corresponding features
27894 are used. However we don't have any easy way of figuring this out.
27895 Conservatively record the setting that would have been used. */
27896
27897 if (flag_rounding_math)
27898 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
27899
27900 if (!flag_unsafe_math_optimizations)
27901 {
27902 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
27903 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
27904 }
27905 if (flag_signaling_nans)
27906 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
27907
27908 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
27909 flag_finite_math_only ? 1 : 3);
27910
27911 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
27912 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
27913 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
27914 flag_short_enums ? 1 : 2);
27915
27916 /* Tag_ABI_optimization_goals. */
27917 if (optimize_size)
27918 val = 4;
27919 else if (optimize >= 2)
27920 val = 2;
27921 else if (optimize)
27922 val = 1;
27923 else
27924 val = 6;
27925 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
27926
27927 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
27928 unaligned_access);
27929
27930 if (arm_fp16_format)
27931 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
27932 (int) arm_fp16_format);
27933
27934 if (arm_lang_output_object_attributes_hook)
27935 arm_lang_output_object_attributes_hook();
27936 }
27937
27938 default_file_start ();
27939 }
27940
27941 static void
27942 arm_file_end (void)
27943 {
27944 int regno;
27945
27946 if (NEED_INDICATE_EXEC_STACK)
27947 /* Add .note.GNU-stack. */
27948 file_end_indicate_exec_stack ();
27949
27950 if (! thumb_call_reg_needed)
27951 return;
27952
27953 switch_to_section (text_section);
27954 asm_fprintf (asm_out_file, "\t.code 16\n");
27955 ASM_OUTPUT_ALIGN (asm_out_file, 1);
27956
27957 for (regno = 0; regno < LR_REGNUM; regno++)
27958 {
27959 rtx label = thumb_call_via_label[regno];
27960
27961 if (label != 0)
27962 {
27963 targetm.asm_out.internal_label (asm_out_file, "L",
27964 CODE_LABEL_NUMBER (label));
27965 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
27966 }
27967 }
27968 }
27969
27970 #ifndef ARM_PE
27971 /* Symbols in the text segment can be accessed without indirecting via the
27972 constant pool; it may take an extra binary operation, but this is still
27973 faster than indirecting via memory. Don't do this when not optimizing,
27974 since we won't be calculating al of the offsets necessary to do this
27975 simplification. */
27976
27977 static void
27978 arm_encode_section_info (tree decl, rtx rtl, int first)
27979 {
27980 if (optimize > 0 && TREE_CONSTANT (decl))
27981 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
27982
27983 default_encode_section_info (decl, rtl, first);
27984 }
27985 #endif /* !ARM_PE */
27986
27987 static void
27988 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
27989 {
27990 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
27991 && !strcmp (prefix, "L"))
27992 {
27993 arm_ccfsm_state = 0;
27994 arm_target_insn = NULL;
27995 }
27996 default_internal_label (stream, prefix, labelno);
27997 }
27998
27999 /* Output code to add DELTA to the first argument, and then jump
28000 to FUNCTION. Used for C++ multiple inheritance. */
28001 static void
28002 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
28003 HOST_WIDE_INT delta,
28004 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
28005 tree function)
28006 {
28007 static int thunk_label = 0;
28008 char label[256];
28009 char labelpc[256];
28010 int mi_delta = delta;
28011 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28012 int shift = 0;
28013 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28014 ? 1 : 0);
28015 if (mi_delta < 0)
28016 mi_delta = - mi_delta;
28017
28018 final_start_function (emit_barrier (), file, 1);
28019
28020 if (TARGET_THUMB1)
28021 {
28022 int labelno = thunk_label++;
28023 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28024 /* Thunks are entered in arm mode when avaiable. */
28025 if (TARGET_THUMB1_ONLY)
28026 {
28027 /* push r3 so we can use it as a temporary. */
28028 /* TODO: Omit this save if r3 is not used. */
28029 fputs ("\tpush {r3}\n", file);
28030 fputs ("\tldr\tr3, ", file);
28031 }
28032 else
28033 {
28034 fputs ("\tldr\tr12, ", file);
28035 }
28036 assemble_name (file, label);
28037 fputc ('\n', file);
28038 if (flag_pic)
28039 {
28040 /* If we are generating PIC, the ldr instruction below loads
28041 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28042 the address of the add + 8, so we have:
28043
28044 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28045 = target + 1.
28046
28047 Note that we have "+ 1" because some versions of GNU ld
28048 don't set the low bit of the result for R_ARM_REL32
28049 relocations against thumb function symbols.
28050 On ARMv6M this is +4, not +8. */
28051 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28052 assemble_name (file, labelpc);
28053 fputs (":\n", file);
28054 if (TARGET_THUMB1_ONLY)
28055 {
28056 /* This is 2 insns after the start of the thunk, so we know it
28057 is 4-byte aligned. */
28058 fputs ("\tadd\tr3, pc, r3\n", file);
28059 fputs ("\tmov r12, r3\n", file);
28060 }
28061 else
28062 fputs ("\tadd\tr12, pc, r12\n", file);
28063 }
28064 else if (TARGET_THUMB1_ONLY)
28065 fputs ("\tmov r12, r3\n", file);
28066 }
28067 if (TARGET_THUMB1_ONLY)
28068 {
28069 if (mi_delta > 255)
28070 {
28071 fputs ("\tldr\tr3, ", file);
28072 assemble_name (file, label);
28073 fputs ("+4\n", file);
28074 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
28075 mi_op, this_regno, this_regno);
28076 }
28077 else if (mi_delta != 0)
28078 {
28079 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28080 mi_op, this_regno, this_regno,
28081 mi_delta);
28082 }
28083 }
28084 else
28085 {
28086 /* TODO: Use movw/movt for large constants when available. */
28087 while (mi_delta != 0)
28088 {
28089 if ((mi_delta & (3 << shift)) == 0)
28090 shift += 2;
28091 else
28092 {
28093 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28094 mi_op, this_regno, this_regno,
28095 mi_delta & (0xff << shift));
28096 mi_delta &= ~(0xff << shift);
28097 shift += 8;
28098 }
28099 }
28100 }
28101 if (TARGET_THUMB1)
28102 {
28103 if (TARGET_THUMB1_ONLY)
28104 fputs ("\tpop\t{r3}\n", file);
28105
28106 fprintf (file, "\tbx\tr12\n");
28107 ASM_OUTPUT_ALIGN (file, 2);
28108 assemble_name (file, label);
28109 fputs (":\n", file);
28110 if (flag_pic)
28111 {
28112 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
28113 rtx tem = XEXP (DECL_RTL (function), 0);
28114 tem = plus_constant (GET_MODE (tem), tem, -7);
28115 tem = gen_rtx_MINUS (GET_MODE (tem),
28116 tem,
28117 gen_rtx_SYMBOL_REF (Pmode,
28118 ggc_strdup (labelpc)));
28119 assemble_integer (tem, 4, BITS_PER_WORD, 1);
28120 }
28121 else
28122 /* Output ".word .LTHUNKn". */
28123 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28124
28125 if (TARGET_THUMB1_ONLY && mi_delta > 255)
28126 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
28127 }
28128 else
28129 {
28130 fputs ("\tb\t", file);
28131 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28132 if (NEED_PLT_RELOC)
28133 fputs ("(PLT)", file);
28134 fputc ('\n', file);
28135 }
28136
28137 final_end_function ();
28138 }
28139
28140 int
28141 arm_emit_vector_const (FILE *file, rtx x)
28142 {
28143 int i;
28144 const char * pattern;
28145
28146 gcc_assert (GET_CODE (x) == CONST_VECTOR);
28147
28148 switch (GET_MODE (x))
28149 {
28150 case V2SImode: pattern = "%08x"; break;
28151 case V4HImode: pattern = "%04x"; break;
28152 case V8QImode: pattern = "%02x"; break;
28153 default: gcc_unreachable ();
28154 }
28155
28156 fprintf (file, "0x");
28157 for (i = CONST_VECTOR_NUNITS (x); i--;)
28158 {
28159 rtx element;
28160
28161 element = CONST_VECTOR_ELT (x, i);
28162 fprintf (file, pattern, INTVAL (element));
28163 }
28164
28165 return 1;
28166 }
28167
28168 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28169 HFmode constant pool entries are actually loaded with ldr. */
28170 void
28171 arm_emit_fp16_const (rtx c)
28172 {
28173 REAL_VALUE_TYPE r;
28174 long bits;
28175
28176 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
28177 bits = real_to_target (NULL, &r, HFmode);
28178 if (WORDS_BIG_ENDIAN)
28179 assemble_zeros (2);
28180 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28181 if (!WORDS_BIG_ENDIAN)
28182 assemble_zeros (2);
28183 }
28184
28185 const char *
28186 arm_output_load_gr (rtx *operands)
28187 {
28188 rtx reg;
28189 rtx offset;
28190 rtx wcgr;
28191 rtx sum;
28192
28193 if (!MEM_P (operands [1])
28194 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28195 || !REG_P (reg = XEXP (sum, 0))
28196 || !CONST_INT_P (offset = XEXP (sum, 1))
28197 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28198 return "wldrw%?\t%0, %1";
28199
28200 /* Fix up an out-of-range load of a GR register. */
28201 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28202 wcgr = operands[0];
28203 operands[0] = reg;
28204 output_asm_insn ("ldr%?\t%0, %1", operands);
28205
28206 operands[0] = wcgr;
28207 operands[1] = reg;
28208 output_asm_insn ("tmcr%?\t%0, %1", operands);
28209 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28210
28211 return "";
28212 }
28213
28214 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28215
28216 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28217 named arg and all anonymous args onto the stack.
28218 XXX I know the prologue shouldn't be pushing registers, but it is faster
28219 that way. */
28220
28221 static void
28222 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28223 enum machine_mode mode,
28224 tree type,
28225 int *pretend_size,
28226 int second_time ATTRIBUTE_UNUSED)
28227 {
28228 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28229 int nregs;
28230
28231 cfun->machine->uses_anonymous_args = 1;
28232 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28233 {
28234 nregs = pcum->aapcs_ncrn;
28235 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
28236 nregs++;
28237 }
28238 else
28239 nregs = pcum->nregs;
28240
28241 if (nregs < NUM_ARG_REGS)
28242 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28243 }
28244
28245 /* We can't rely on the caller doing the proper promotion when
28246 using APCS or ATPCS. */
28247
28248 static bool
28249 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28250 {
28251 return !TARGET_AAPCS_BASED;
28252 }
28253
28254 static enum machine_mode
28255 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28256 enum machine_mode mode,
28257 int *punsignedp ATTRIBUTE_UNUSED,
28258 const_tree fntype ATTRIBUTE_UNUSED,
28259 int for_return ATTRIBUTE_UNUSED)
28260 {
28261 if (GET_MODE_CLASS (mode) == MODE_INT
28262 && GET_MODE_SIZE (mode) < 4)
28263 return SImode;
28264
28265 return mode;
28266 }
28267
28268 /* AAPCS based ABIs use short enums by default. */
28269
28270 static bool
28271 arm_default_short_enums (void)
28272 {
28273 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
28274 }
28275
28276
28277 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28278
28279 static bool
28280 arm_align_anon_bitfield (void)
28281 {
28282 return TARGET_AAPCS_BASED;
28283 }
28284
28285
28286 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28287
28288 static tree
28289 arm_cxx_guard_type (void)
28290 {
28291 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28292 }
28293
28294
28295 /* The EABI says test the least significant bit of a guard variable. */
28296
28297 static bool
28298 arm_cxx_guard_mask_bit (void)
28299 {
28300 return TARGET_AAPCS_BASED;
28301 }
28302
28303
28304 /* The EABI specifies that all array cookies are 8 bytes long. */
28305
28306 static tree
28307 arm_get_cookie_size (tree type)
28308 {
28309 tree size;
28310
28311 if (!TARGET_AAPCS_BASED)
28312 return default_cxx_get_cookie_size (type);
28313
28314 size = build_int_cst (sizetype, 8);
28315 return size;
28316 }
28317
28318
28319 /* The EABI says that array cookies should also contain the element size. */
28320
28321 static bool
28322 arm_cookie_has_size (void)
28323 {
28324 return TARGET_AAPCS_BASED;
28325 }
28326
28327
28328 /* The EABI says constructors and destructors should return a pointer to
28329 the object constructed/destroyed. */
28330
28331 static bool
28332 arm_cxx_cdtor_returns_this (void)
28333 {
28334 return TARGET_AAPCS_BASED;
28335 }
28336
28337 /* The EABI says that an inline function may never be the key
28338 method. */
28339
28340 static bool
28341 arm_cxx_key_method_may_be_inline (void)
28342 {
28343 return !TARGET_AAPCS_BASED;
28344 }
28345
28346 static void
28347 arm_cxx_determine_class_data_visibility (tree decl)
28348 {
28349 if (!TARGET_AAPCS_BASED
28350 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28351 return;
28352
28353 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28354 is exported. However, on systems without dynamic vague linkage,
28355 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28356 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28357 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28358 else
28359 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28360 DECL_VISIBILITY_SPECIFIED (decl) = 1;
28361 }
28362
28363 static bool
28364 arm_cxx_class_data_always_comdat (void)
28365 {
28366 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28367 vague linkage if the class has no key function. */
28368 return !TARGET_AAPCS_BASED;
28369 }
28370
28371
28372 /* The EABI says __aeabi_atexit should be used to register static
28373 destructors. */
28374
28375 static bool
28376 arm_cxx_use_aeabi_atexit (void)
28377 {
28378 return TARGET_AAPCS_BASED;
28379 }
28380
28381
28382 void
28383 arm_set_return_address (rtx source, rtx scratch)
28384 {
28385 arm_stack_offsets *offsets;
28386 HOST_WIDE_INT delta;
28387 rtx addr;
28388 unsigned long saved_regs;
28389
28390 offsets = arm_get_frame_offsets ();
28391 saved_regs = offsets->saved_regs_mask;
28392
28393 if ((saved_regs & (1 << LR_REGNUM)) == 0)
28394 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28395 else
28396 {
28397 if (frame_pointer_needed)
28398 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
28399 else
28400 {
28401 /* LR will be the first saved register. */
28402 delta = offsets->outgoing_args - (offsets->frame + 4);
28403
28404
28405 if (delta >= 4096)
28406 {
28407 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
28408 GEN_INT (delta & ~4095)));
28409 addr = scratch;
28410 delta &= 4095;
28411 }
28412 else
28413 addr = stack_pointer_rtx;
28414
28415 addr = plus_constant (Pmode, addr, delta);
28416 }
28417 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28418 }
28419 }
28420
28421
28422 void
28423 thumb_set_return_address (rtx source, rtx scratch)
28424 {
28425 arm_stack_offsets *offsets;
28426 HOST_WIDE_INT delta;
28427 HOST_WIDE_INT limit;
28428 int reg;
28429 rtx addr;
28430 unsigned long mask;
28431
28432 emit_use (source);
28433
28434 offsets = arm_get_frame_offsets ();
28435 mask = offsets->saved_regs_mask;
28436 if (mask & (1 << LR_REGNUM))
28437 {
28438 limit = 1024;
28439 /* Find the saved regs. */
28440 if (frame_pointer_needed)
28441 {
28442 delta = offsets->soft_frame - offsets->saved_args;
28443 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
28444 if (TARGET_THUMB1)
28445 limit = 128;
28446 }
28447 else
28448 {
28449 delta = offsets->outgoing_args - offsets->saved_args;
28450 reg = SP_REGNUM;
28451 }
28452 /* Allow for the stack frame. */
28453 if (TARGET_THUMB1 && TARGET_BACKTRACE)
28454 delta -= 16;
28455 /* The link register is always the first saved register. */
28456 delta -= 4;
28457
28458 /* Construct the address. */
28459 addr = gen_rtx_REG (SImode, reg);
28460 if (delta > limit)
28461 {
28462 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
28463 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
28464 addr = scratch;
28465 }
28466 else
28467 addr = plus_constant (Pmode, addr, delta);
28468
28469 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28470 }
28471 else
28472 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28473 }
28474
28475 /* Implements target hook vector_mode_supported_p. */
28476 bool
28477 arm_vector_mode_supported_p (enum machine_mode mode)
28478 {
28479 /* Neon also supports V2SImode, etc. listed in the clause below. */
28480 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
28481 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
28482 return true;
28483
28484 if ((TARGET_NEON || TARGET_IWMMXT)
28485 && ((mode == V2SImode)
28486 || (mode == V4HImode)
28487 || (mode == V8QImode)))
28488 return true;
28489
28490 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
28491 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
28492 || mode == V2HAmode))
28493 return true;
28494
28495 return false;
28496 }
28497
28498 /* Implements target hook array_mode_supported_p. */
28499
28500 static bool
28501 arm_array_mode_supported_p (enum machine_mode mode,
28502 unsigned HOST_WIDE_INT nelems)
28503 {
28504 if (TARGET_NEON
28505 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
28506 && (nelems >= 2 && nelems <= 4))
28507 return true;
28508
28509 return false;
28510 }
28511
28512 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28513 registers when autovectorizing for Neon, at least until multiple vector
28514 widths are supported properly by the middle-end. */
28515
28516 static enum machine_mode
28517 arm_preferred_simd_mode (enum machine_mode mode)
28518 {
28519 if (TARGET_NEON)
28520 switch (mode)
28521 {
28522 case SFmode:
28523 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
28524 case SImode:
28525 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
28526 case HImode:
28527 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
28528 case QImode:
28529 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
28530 case DImode:
28531 if (!TARGET_NEON_VECTORIZE_DOUBLE)
28532 return V2DImode;
28533 break;
28534
28535 default:;
28536 }
28537
28538 if (TARGET_REALLY_IWMMXT)
28539 switch (mode)
28540 {
28541 case SImode:
28542 return V2SImode;
28543 case HImode:
28544 return V4HImode;
28545 case QImode:
28546 return V8QImode;
28547
28548 default:;
28549 }
28550
28551 return word_mode;
28552 }
28553
28554 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
28555
28556 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
28557 using r0-r4 for function arguments, r7 for the stack frame and don't have
28558 enough left over to do doubleword arithmetic. For Thumb-2 all the
28559 potentially problematic instructions accept high registers so this is not
28560 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
28561 that require many low registers. */
28562 static bool
28563 arm_class_likely_spilled_p (reg_class_t rclass)
28564 {
28565 if ((TARGET_THUMB1 && rclass == LO_REGS)
28566 || rclass == CC_REG)
28567 return true;
28568
28569 return false;
28570 }
28571
28572 /* Implements target hook small_register_classes_for_mode_p. */
28573 bool
28574 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
28575 {
28576 return TARGET_THUMB1;
28577 }
28578
28579 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
28580 ARM insns and therefore guarantee that the shift count is modulo 256.
28581 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
28582 guarantee no particular behavior for out-of-range counts. */
28583
28584 static unsigned HOST_WIDE_INT
28585 arm_shift_truncation_mask (enum machine_mode mode)
28586 {
28587 return mode == SImode ? 255 : 0;
28588 }
28589
28590
28591 /* Map internal gcc register numbers to DWARF2 register numbers. */
28592
28593 unsigned int
28594 arm_dbx_register_number (unsigned int regno)
28595 {
28596 if (regno < 16)
28597 return regno;
28598
28599 if (IS_VFP_REGNUM (regno))
28600 {
28601 /* See comment in arm_dwarf_register_span. */
28602 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28603 return 64 + regno - FIRST_VFP_REGNUM;
28604 else
28605 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
28606 }
28607
28608 if (IS_IWMMXT_GR_REGNUM (regno))
28609 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
28610
28611 if (IS_IWMMXT_REGNUM (regno))
28612 return 112 + regno - FIRST_IWMMXT_REGNUM;
28613
28614 gcc_unreachable ();
28615 }
28616
28617 /* Dwarf models VFPv3 registers as 32 64-bit registers.
28618 GCC models tham as 64 32-bit registers, so we need to describe this to
28619 the DWARF generation code. Other registers can use the default. */
28620 static rtx
28621 arm_dwarf_register_span (rtx rtl)
28622 {
28623 enum machine_mode mode;
28624 unsigned regno;
28625 rtx parts[8];
28626 int nregs;
28627 int i;
28628
28629 regno = REGNO (rtl);
28630 if (!IS_VFP_REGNUM (regno))
28631 return NULL_RTX;
28632
28633 /* XXX FIXME: The EABI defines two VFP register ranges:
28634 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
28635 256-287: D0-D31
28636 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
28637 corresponding D register. Until GDB supports this, we shall use the
28638 legacy encodings. We also use these encodings for D0-D15 for
28639 compatibility with older debuggers. */
28640 mode = GET_MODE (rtl);
28641 if (GET_MODE_SIZE (mode) < 8)
28642 return NULL_RTX;
28643
28644 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28645 {
28646 nregs = GET_MODE_SIZE (mode) / 4;
28647 for (i = 0; i < nregs; i += 2)
28648 if (TARGET_BIG_END)
28649 {
28650 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
28651 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
28652 }
28653 else
28654 {
28655 parts[i] = gen_rtx_REG (SImode, regno + i);
28656 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
28657 }
28658 }
28659 else
28660 {
28661 nregs = GET_MODE_SIZE (mode) / 8;
28662 for (i = 0; i < nregs; i++)
28663 parts[i] = gen_rtx_REG (DImode, regno + i);
28664 }
28665
28666 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
28667 }
28668
28669 #if ARM_UNWIND_INFO
28670 /* Emit unwind directives for a store-multiple instruction or stack pointer
28671 push during alignment.
28672 These should only ever be generated by the function prologue code, so
28673 expect them to have a particular form. */
28674
28675 static void
28676 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
28677 {
28678 int i;
28679 HOST_WIDE_INT offset;
28680 HOST_WIDE_INT nregs;
28681 int reg_size;
28682 unsigned reg;
28683 unsigned lastreg;
28684 rtx e;
28685
28686 e = XVECEXP (p, 0, 0);
28687 if (GET_CODE (e) != SET)
28688 abort ();
28689
28690 /* First insn will adjust the stack pointer. */
28691 if (GET_CODE (e) != SET
28692 || !REG_P (XEXP (e, 0))
28693 || REGNO (XEXP (e, 0)) != SP_REGNUM
28694 || GET_CODE (XEXP (e, 1)) != PLUS)
28695 abort ();
28696
28697 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
28698 nregs = XVECLEN (p, 0) - 1;
28699
28700 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
28701 if (reg < 16)
28702 {
28703 /* The function prologue may also push pc, but not annotate it as it is
28704 never restored. We turn this into a stack pointer adjustment. */
28705 if (nregs * 4 == offset - 4)
28706 {
28707 fprintf (asm_out_file, "\t.pad #4\n");
28708 offset -= 4;
28709 }
28710 reg_size = 4;
28711 fprintf (asm_out_file, "\t.save {");
28712 }
28713 else if (IS_VFP_REGNUM (reg))
28714 {
28715 reg_size = 8;
28716 fprintf (asm_out_file, "\t.vsave {");
28717 }
28718 else
28719 /* Unknown register type. */
28720 abort ();
28721
28722 /* If the stack increment doesn't match the size of the saved registers,
28723 something has gone horribly wrong. */
28724 if (offset != nregs * reg_size)
28725 abort ();
28726
28727 offset = 0;
28728 lastreg = 0;
28729 /* The remaining insns will describe the stores. */
28730 for (i = 1; i <= nregs; i++)
28731 {
28732 /* Expect (set (mem <addr>) (reg)).
28733 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
28734 e = XVECEXP (p, 0, i);
28735 if (GET_CODE (e) != SET
28736 || !MEM_P (XEXP (e, 0))
28737 || !REG_P (XEXP (e, 1)))
28738 abort ();
28739
28740 reg = REGNO (XEXP (e, 1));
28741 if (reg < lastreg)
28742 abort ();
28743
28744 if (i != 1)
28745 fprintf (asm_out_file, ", ");
28746 /* We can't use %r for vfp because we need to use the
28747 double precision register names. */
28748 if (IS_VFP_REGNUM (reg))
28749 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
28750 else
28751 asm_fprintf (asm_out_file, "%r", reg);
28752
28753 #ifdef ENABLE_CHECKING
28754 /* Check that the addresses are consecutive. */
28755 e = XEXP (XEXP (e, 0), 0);
28756 if (GET_CODE (e) == PLUS)
28757 {
28758 offset += reg_size;
28759 if (!REG_P (XEXP (e, 0))
28760 || REGNO (XEXP (e, 0)) != SP_REGNUM
28761 || !CONST_INT_P (XEXP (e, 1))
28762 || offset != INTVAL (XEXP (e, 1)))
28763 abort ();
28764 }
28765 else if (i != 1
28766 || !REG_P (e)
28767 || REGNO (e) != SP_REGNUM)
28768 abort ();
28769 #endif
28770 }
28771 fprintf (asm_out_file, "}\n");
28772 }
28773
28774 /* Emit unwind directives for a SET. */
28775
28776 static void
28777 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
28778 {
28779 rtx e0;
28780 rtx e1;
28781 unsigned reg;
28782
28783 e0 = XEXP (p, 0);
28784 e1 = XEXP (p, 1);
28785 switch (GET_CODE (e0))
28786 {
28787 case MEM:
28788 /* Pushing a single register. */
28789 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
28790 || !REG_P (XEXP (XEXP (e0, 0), 0))
28791 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
28792 abort ();
28793
28794 asm_fprintf (asm_out_file, "\t.save ");
28795 if (IS_VFP_REGNUM (REGNO (e1)))
28796 asm_fprintf(asm_out_file, "{d%d}\n",
28797 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
28798 else
28799 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
28800 break;
28801
28802 case REG:
28803 if (REGNO (e0) == SP_REGNUM)
28804 {
28805 /* A stack increment. */
28806 if (GET_CODE (e1) != PLUS
28807 || !REG_P (XEXP (e1, 0))
28808 || REGNO (XEXP (e1, 0)) != SP_REGNUM
28809 || !CONST_INT_P (XEXP (e1, 1)))
28810 abort ();
28811
28812 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
28813 -INTVAL (XEXP (e1, 1)));
28814 }
28815 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
28816 {
28817 HOST_WIDE_INT offset;
28818
28819 if (GET_CODE (e1) == PLUS)
28820 {
28821 if (!REG_P (XEXP (e1, 0))
28822 || !CONST_INT_P (XEXP (e1, 1)))
28823 abort ();
28824 reg = REGNO (XEXP (e1, 0));
28825 offset = INTVAL (XEXP (e1, 1));
28826 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
28827 HARD_FRAME_POINTER_REGNUM, reg,
28828 offset);
28829 }
28830 else if (REG_P (e1))
28831 {
28832 reg = REGNO (e1);
28833 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
28834 HARD_FRAME_POINTER_REGNUM, reg);
28835 }
28836 else
28837 abort ();
28838 }
28839 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
28840 {
28841 /* Move from sp to reg. */
28842 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
28843 }
28844 else if (GET_CODE (e1) == PLUS
28845 && REG_P (XEXP (e1, 0))
28846 && REGNO (XEXP (e1, 0)) == SP_REGNUM
28847 && CONST_INT_P (XEXP (e1, 1)))
28848 {
28849 /* Set reg to offset from sp. */
28850 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
28851 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
28852 }
28853 else
28854 abort ();
28855 break;
28856
28857 default:
28858 abort ();
28859 }
28860 }
28861
28862
28863 /* Emit unwind directives for the given insn. */
28864
28865 static void
28866 arm_unwind_emit (FILE * asm_out_file, rtx insn)
28867 {
28868 rtx note, pat;
28869 bool handled_one = false;
28870
28871 if (arm_except_unwind_info (&global_options) != UI_TARGET)
28872 return;
28873
28874 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
28875 && (TREE_NOTHROW (current_function_decl)
28876 || crtl->all_throwers_are_sibcalls))
28877 return;
28878
28879 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
28880 return;
28881
28882 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
28883 {
28884 switch (REG_NOTE_KIND (note))
28885 {
28886 case REG_FRAME_RELATED_EXPR:
28887 pat = XEXP (note, 0);
28888 goto found;
28889
28890 case REG_CFA_REGISTER:
28891 pat = XEXP (note, 0);
28892 if (pat == NULL)
28893 {
28894 pat = PATTERN (insn);
28895 if (GET_CODE (pat) == PARALLEL)
28896 pat = XVECEXP (pat, 0, 0);
28897 }
28898
28899 /* Only emitted for IS_STACKALIGN re-alignment. */
28900 {
28901 rtx dest, src;
28902 unsigned reg;
28903
28904 src = SET_SRC (pat);
28905 dest = SET_DEST (pat);
28906
28907 gcc_assert (src == stack_pointer_rtx);
28908 reg = REGNO (dest);
28909 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
28910 reg + 0x90, reg);
28911 }
28912 handled_one = true;
28913 break;
28914
28915 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
28916 to get correct dwarf information for shrink-wrap. We should not
28917 emit unwind information for it because these are used either for
28918 pretend arguments or notes to adjust sp and restore registers from
28919 stack. */
28920 case REG_CFA_ADJUST_CFA:
28921 case REG_CFA_RESTORE:
28922 return;
28923
28924 case REG_CFA_DEF_CFA:
28925 case REG_CFA_EXPRESSION:
28926 case REG_CFA_OFFSET:
28927 /* ??? Only handling here what we actually emit. */
28928 gcc_unreachable ();
28929
28930 default:
28931 break;
28932 }
28933 }
28934 if (handled_one)
28935 return;
28936 pat = PATTERN (insn);
28937 found:
28938
28939 switch (GET_CODE (pat))
28940 {
28941 case SET:
28942 arm_unwind_emit_set (asm_out_file, pat);
28943 break;
28944
28945 case SEQUENCE:
28946 /* Store multiple. */
28947 arm_unwind_emit_sequence (asm_out_file, pat);
28948 break;
28949
28950 default:
28951 abort();
28952 }
28953 }
28954
28955
28956 /* Output a reference from a function exception table to the type_info
28957 object X. The EABI specifies that the symbol should be relocated by
28958 an R_ARM_TARGET2 relocation. */
28959
28960 static bool
28961 arm_output_ttype (rtx x)
28962 {
28963 fputs ("\t.word\t", asm_out_file);
28964 output_addr_const (asm_out_file, x);
28965 /* Use special relocations for symbol references. */
28966 if (!CONST_INT_P (x))
28967 fputs ("(TARGET2)", asm_out_file);
28968 fputc ('\n', asm_out_file);
28969
28970 return TRUE;
28971 }
28972
28973 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
28974
28975 static void
28976 arm_asm_emit_except_personality (rtx personality)
28977 {
28978 fputs ("\t.personality\t", asm_out_file);
28979 output_addr_const (asm_out_file, personality);
28980 fputc ('\n', asm_out_file);
28981 }
28982
28983 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
28984
28985 static void
28986 arm_asm_init_sections (void)
28987 {
28988 exception_section = get_unnamed_section (0, output_section_asm_op,
28989 "\t.handlerdata");
28990 }
28991 #endif /* ARM_UNWIND_INFO */
28992
28993 /* Output unwind directives for the start/end of a function. */
28994
28995 void
28996 arm_output_fn_unwind (FILE * f, bool prologue)
28997 {
28998 if (arm_except_unwind_info (&global_options) != UI_TARGET)
28999 return;
29000
29001 if (prologue)
29002 fputs ("\t.fnstart\n", f);
29003 else
29004 {
29005 /* If this function will never be unwound, then mark it as such.
29006 The came condition is used in arm_unwind_emit to suppress
29007 the frame annotations. */
29008 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29009 && (TREE_NOTHROW (current_function_decl)
29010 || crtl->all_throwers_are_sibcalls))
29011 fputs("\t.cantunwind\n", f);
29012
29013 fputs ("\t.fnend\n", f);
29014 }
29015 }
29016
29017 static bool
29018 arm_emit_tls_decoration (FILE *fp, rtx x)
29019 {
29020 enum tls_reloc reloc;
29021 rtx val;
29022
29023 val = XVECEXP (x, 0, 0);
29024 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29025
29026 output_addr_const (fp, val);
29027
29028 switch (reloc)
29029 {
29030 case TLS_GD32:
29031 fputs ("(tlsgd)", fp);
29032 break;
29033 case TLS_LDM32:
29034 fputs ("(tlsldm)", fp);
29035 break;
29036 case TLS_LDO32:
29037 fputs ("(tlsldo)", fp);
29038 break;
29039 case TLS_IE32:
29040 fputs ("(gottpoff)", fp);
29041 break;
29042 case TLS_LE32:
29043 fputs ("(tpoff)", fp);
29044 break;
29045 case TLS_DESCSEQ:
29046 fputs ("(tlsdesc)", fp);
29047 break;
29048 default:
29049 gcc_unreachable ();
29050 }
29051
29052 switch (reloc)
29053 {
29054 case TLS_GD32:
29055 case TLS_LDM32:
29056 case TLS_IE32:
29057 case TLS_DESCSEQ:
29058 fputs (" + (. - ", fp);
29059 output_addr_const (fp, XVECEXP (x, 0, 2));
29060 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29061 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29062 output_addr_const (fp, XVECEXP (x, 0, 3));
29063 fputc (')', fp);
29064 break;
29065 default:
29066 break;
29067 }
29068
29069 return TRUE;
29070 }
29071
29072 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29073
29074 static void
29075 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29076 {
29077 gcc_assert (size == 4);
29078 fputs ("\t.word\t", file);
29079 output_addr_const (file, x);
29080 fputs ("(tlsldo)", file);
29081 }
29082
29083 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29084
29085 static bool
29086 arm_output_addr_const_extra (FILE *fp, rtx x)
29087 {
29088 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29089 return arm_emit_tls_decoration (fp, x);
29090 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29091 {
29092 char label[256];
29093 int labelno = INTVAL (XVECEXP (x, 0, 0));
29094
29095 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29096 assemble_name_raw (fp, label);
29097
29098 return TRUE;
29099 }
29100 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29101 {
29102 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29103 if (GOT_PCREL)
29104 fputs ("+.", fp);
29105 fputs ("-(", fp);
29106 output_addr_const (fp, XVECEXP (x, 0, 0));
29107 fputc (')', fp);
29108 return TRUE;
29109 }
29110 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29111 {
29112 output_addr_const (fp, XVECEXP (x, 0, 0));
29113 if (GOT_PCREL)
29114 fputs ("+.", fp);
29115 fputs ("-(", fp);
29116 output_addr_const (fp, XVECEXP (x, 0, 1));
29117 fputc (')', fp);
29118 return TRUE;
29119 }
29120 else if (GET_CODE (x) == CONST_VECTOR)
29121 return arm_emit_vector_const (fp, x);
29122
29123 return FALSE;
29124 }
29125
29126 /* Output assembly for a shift instruction.
29127 SET_FLAGS determines how the instruction modifies the condition codes.
29128 0 - Do not set condition codes.
29129 1 - Set condition codes.
29130 2 - Use smallest instruction. */
29131 const char *
29132 arm_output_shift(rtx * operands, int set_flags)
29133 {
29134 char pattern[100];
29135 static const char flag_chars[3] = {'?', '.', '!'};
29136 const char *shift;
29137 HOST_WIDE_INT val;
29138 char c;
29139
29140 c = flag_chars[set_flags];
29141 if (TARGET_UNIFIED_ASM)
29142 {
29143 shift = shift_op(operands[3], &val);
29144 if (shift)
29145 {
29146 if (val != -1)
29147 operands[2] = GEN_INT(val);
29148 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29149 }
29150 else
29151 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29152 }
29153 else
29154 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
29155 output_asm_insn (pattern, operands);
29156 return "";
29157 }
29158
29159 /* Output assembly for a WMMX immediate shift instruction. */
29160 const char *
29161 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29162 {
29163 int shift = INTVAL (operands[2]);
29164 char templ[50];
29165 enum machine_mode opmode = GET_MODE (operands[0]);
29166
29167 gcc_assert (shift >= 0);
29168
29169 /* If the shift value in the register versions is > 63 (for D qualifier),
29170 31 (for W qualifier) or 15 (for H qualifier). */
29171 if (((opmode == V4HImode) && (shift > 15))
29172 || ((opmode == V2SImode) && (shift > 31))
29173 || ((opmode == DImode) && (shift > 63)))
29174 {
29175 if (wror_or_wsra)
29176 {
29177 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29178 output_asm_insn (templ, operands);
29179 if (opmode == DImode)
29180 {
29181 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29182 output_asm_insn (templ, operands);
29183 }
29184 }
29185 else
29186 {
29187 /* The destination register will contain all zeros. */
29188 sprintf (templ, "wzero\t%%0");
29189 output_asm_insn (templ, operands);
29190 }
29191 return "";
29192 }
29193
29194 if ((opmode == DImode) && (shift > 32))
29195 {
29196 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29197 output_asm_insn (templ, operands);
29198 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29199 output_asm_insn (templ, operands);
29200 }
29201 else
29202 {
29203 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29204 output_asm_insn (templ, operands);
29205 }
29206 return "";
29207 }
29208
29209 /* Output assembly for a WMMX tinsr instruction. */
29210 const char *
29211 arm_output_iwmmxt_tinsr (rtx *operands)
29212 {
29213 int mask = INTVAL (operands[3]);
29214 int i;
29215 char templ[50];
29216 int units = mode_nunits[GET_MODE (operands[0])];
29217 gcc_assert ((mask & (mask - 1)) == 0);
29218 for (i = 0; i < units; ++i)
29219 {
29220 if ((mask & 0x01) == 1)
29221 {
29222 break;
29223 }
29224 mask >>= 1;
29225 }
29226 gcc_assert (i < units);
29227 {
29228 switch (GET_MODE (operands[0]))
29229 {
29230 case V8QImode:
29231 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29232 break;
29233 case V4HImode:
29234 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29235 break;
29236 case V2SImode:
29237 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29238 break;
29239 default:
29240 gcc_unreachable ();
29241 break;
29242 }
29243 output_asm_insn (templ, operands);
29244 }
29245 return "";
29246 }
29247
29248 /* Output a Thumb-1 casesi dispatch sequence. */
29249 const char *
29250 thumb1_output_casesi (rtx *operands)
29251 {
29252 rtx diff_vec = PATTERN (NEXT_INSN (operands[0]));
29253
29254 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29255
29256 switch (GET_MODE(diff_vec))
29257 {
29258 case QImode:
29259 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29260 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29261 case HImode:
29262 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29263 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29264 case SImode:
29265 return "bl\t%___gnu_thumb1_case_si";
29266 default:
29267 gcc_unreachable ();
29268 }
29269 }
29270
29271 /* Output a Thumb-2 casesi instruction. */
29272 const char *
29273 thumb2_output_casesi (rtx *operands)
29274 {
29275 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
29276
29277 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29278
29279 output_asm_insn ("cmp\t%0, %1", operands);
29280 output_asm_insn ("bhi\t%l3", operands);
29281 switch (GET_MODE(diff_vec))
29282 {
29283 case QImode:
29284 return "tbb\t[%|pc, %0]";
29285 case HImode:
29286 return "tbh\t[%|pc, %0, lsl #1]";
29287 case SImode:
29288 if (flag_pic)
29289 {
29290 output_asm_insn ("adr\t%4, %l2", operands);
29291 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29292 output_asm_insn ("add\t%4, %4, %5", operands);
29293 return "bx\t%4";
29294 }
29295 else
29296 {
29297 output_asm_insn ("adr\t%4, %l2", operands);
29298 return "ldr\t%|pc, [%4, %0, lsl #2]";
29299 }
29300 default:
29301 gcc_unreachable ();
29302 }
29303 }
29304
29305 /* Most ARM cores are single issue, but some newer ones can dual issue.
29306 The scheduler descriptions rely on this being correct. */
29307 static int
29308 arm_issue_rate (void)
29309 {
29310 switch (arm_tune)
29311 {
29312 case cortexa15:
29313 return 3;
29314
29315 case cortexr4:
29316 case cortexr4f:
29317 case cortexr5:
29318 case genericv7a:
29319 case cortexa5:
29320 case cortexa7:
29321 case cortexa8:
29322 case cortexa9:
29323 case cortexa12:
29324 case cortexa53:
29325 case fa726te:
29326 case marvell_pj4:
29327 return 2;
29328
29329 default:
29330 return 1;
29331 }
29332 }
29333
29334 /* A table and a function to perform ARM-specific name mangling for
29335 NEON vector types in order to conform to the AAPCS (see "Procedure
29336 Call Standard for the ARM Architecture", Appendix A). To qualify
29337 for emission with the mangled names defined in that document, a
29338 vector type must not only be of the correct mode but also be
29339 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29340 typedef struct
29341 {
29342 enum machine_mode mode;
29343 const char *element_type_name;
29344 const char *aapcs_name;
29345 } arm_mangle_map_entry;
29346
29347 static arm_mangle_map_entry arm_mangle_map[] = {
29348 /* 64-bit containerized types. */
29349 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
29350 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29351 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
29352 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29353 { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
29354 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
29355 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
29356 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
29357 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29358 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29359
29360 /* 128-bit containerized types. */
29361 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
29362 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29363 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
29364 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29365 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
29366 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
29367 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
29368 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29369 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29370 { VOIDmode, NULL, NULL }
29371 };
29372
29373 const char *
29374 arm_mangle_type (const_tree type)
29375 {
29376 arm_mangle_map_entry *pos = arm_mangle_map;
29377
29378 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29379 has to be managled as if it is in the "std" namespace. */
29380 if (TARGET_AAPCS_BASED
29381 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
29382 return "St9__va_list";
29383
29384 /* Half-precision float. */
29385 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
29386 return "Dh";
29387
29388 if (TREE_CODE (type) != VECTOR_TYPE)
29389 return NULL;
29390
29391 /* Check the mode of the vector type, and the name of the vector
29392 element type, against the table. */
29393 while (pos->mode != VOIDmode)
29394 {
29395 tree elt_type = TREE_TYPE (type);
29396
29397 if (pos->mode == TYPE_MODE (type)
29398 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
29399 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
29400 pos->element_type_name))
29401 return pos->aapcs_name;
29402
29403 pos++;
29404 }
29405
29406 /* Use the default mangling for unrecognized (possibly user-defined)
29407 vector types. */
29408 return NULL;
29409 }
29410
29411 /* Order of allocation of core registers for Thumb: this allocation is
29412 written over the corresponding initial entries of the array
29413 initialized with REG_ALLOC_ORDER. We allocate all low registers
29414 first. Saving and restoring a low register is usually cheaper than
29415 using a call-clobbered high register. */
29416
29417 static const int thumb_core_reg_alloc_order[] =
29418 {
29419 3, 2, 1, 0, 4, 5, 6, 7,
29420 14, 12, 8, 9, 10, 11
29421 };
29422
29423 /* Adjust register allocation order when compiling for Thumb. */
29424
29425 void
29426 arm_order_regs_for_local_alloc (void)
29427 {
29428 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
29429 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
29430 if (TARGET_THUMB)
29431 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
29432 sizeof (thumb_core_reg_alloc_order));
29433 }
29434
29435 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
29436
29437 bool
29438 arm_frame_pointer_required (void)
29439 {
29440 return (cfun->has_nonlocal_label
29441 || SUBTARGET_FRAME_POINTER_REQUIRED
29442 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
29443 }
29444
29445 /* Only thumb1 can't support conditional execution, so return true if
29446 the target is not thumb1. */
29447 static bool
29448 arm_have_conditional_execution (void)
29449 {
29450 return !TARGET_THUMB1;
29451 }
29452
29453 tree
29454 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
29455 {
29456 enum machine_mode in_mode, out_mode;
29457 int in_n, out_n;
29458
29459 if (TREE_CODE (type_out) != VECTOR_TYPE
29460 || TREE_CODE (type_in) != VECTOR_TYPE
29461 || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
29462 return NULL_TREE;
29463
29464 out_mode = TYPE_MODE (TREE_TYPE (type_out));
29465 out_n = TYPE_VECTOR_SUBPARTS (type_out);
29466 in_mode = TYPE_MODE (TREE_TYPE (type_in));
29467 in_n = TYPE_VECTOR_SUBPARTS (type_in);
29468
29469 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
29470 decl of the vectorized builtin for the appropriate vector mode.
29471 NULL_TREE is returned if no such builtin is available. */
29472 #undef ARM_CHECK_BUILTIN_MODE
29473 #define ARM_CHECK_BUILTIN_MODE(C) \
29474 (out_mode == SFmode && out_n == C \
29475 && in_mode == SFmode && in_n == C)
29476
29477 #undef ARM_FIND_VRINT_VARIANT
29478 #define ARM_FIND_VRINT_VARIANT(N) \
29479 (ARM_CHECK_BUILTIN_MODE (2) \
29480 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
29481 : (ARM_CHECK_BUILTIN_MODE (4) \
29482 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
29483 : NULL_TREE))
29484
29485 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
29486 {
29487 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
29488 switch (fn)
29489 {
29490 case BUILT_IN_FLOORF:
29491 return ARM_FIND_VRINT_VARIANT (vrintm);
29492 case BUILT_IN_CEILF:
29493 return ARM_FIND_VRINT_VARIANT (vrintp);
29494 case BUILT_IN_TRUNCF:
29495 return ARM_FIND_VRINT_VARIANT (vrintz);
29496 case BUILT_IN_ROUNDF:
29497 return ARM_FIND_VRINT_VARIANT (vrinta);
29498 default:
29499 return NULL_TREE;
29500 }
29501 }
29502 return NULL_TREE;
29503 }
29504 #undef ARM_CHECK_BUILTIN_MODE
29505 #undef ARM_FIND_VRINT_VARIANT
29506
29507 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
29508 static HOST_WIDE_INT
29509 arm_vector_alignment (const_tree type)
29510 {
29511 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
29512
29513 if (TARGET_AAPCS_BASED)
29514 align = MIN (align, 64);
29515
29516 return align;
29517 }
29518
29519 static unsigned int
29520 arm_autovectorize_vector_sizes (void)
29521 {
29522 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
29523 }
29524
29525 static bool
29526 arm_vector_alignment_reachable (const_tree type, bool is_packed)
29527 {
29528 /* Vectors which aren't in packed structures will not be less aligned than
29529 the natural alignment of their element type, so this is safe. */
29530 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
29531 return !is_packed;
29532
29533 return default_builtin_vector_alignment_reachable (type, is_packed);
29534 }
29535
29536 static bool
29537 arm_builtin_support_vector_misalignment (enum machine_mode mode,
29538 const_tree type, int misalignment,
29539 bool is_packed)
29540 {
29541 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
29542 {
29543 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
29544
29545 if (is_packed)
29546 return align == 1;
29547
29548 /* If the misalignment is unknown, we should be able to handle the access
29549 so long as it is not to a member of a packed data structure. */
29550 if (misalignment == -1)
29551 return true;
29552
29553 /* Return true if the misalignment is a multiple of the natural alignment
29554 of the vector's element type. This is probably always going to be
29555 true in practice, since we've already established that this isn't a
29556 packed access. */
29557 return ((misalignment % align) == 0);
29558 }
29559
29560 return default_builtin_support_vector_misalignment (mode, type, misalignment,
29561 is_packed);
29562 }
29563
29564 static void
29565 arm_conditional_register_usage (void)
29566 {
29567 int regno;
29568
29569 if (TARGET_THUMB1 && optimize_size)
29570 {
29571 /* When optimizing for size on Thumb-1, it's better not
29572 to use the HI regs, because of the overhead of
29573 stacking them. */
29574 for (regno = FIRST_HI_REGNUM;
29575 regno <= LAST_HI_REGNUM; ++regno)
29576 fixed_regs[regno] = call_used_regs[regno] = 1;
29577 }
29578
29579 /* The link register can be clobbered by any branch insn,
29580 but we have no way to track that at present, so mark
29581 it as unavailable. */
29582 if (TARGET_THUMB1)
29583 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
29584
29585 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
29586 {
29587 /* VFPv3 registers are disabled when earlier VFP
29588 versions are selected due to the definition of
29589 LAST_VFP_REGNUM. */
29590 for (regno = FIRST_VFP_REGNUM;
29591 regno <= LAST_VFP_REGNUM; ++ regno)
29592 {
29593 fixed_regs[regno] = 0;
29594 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
29595 || regno >= FIRST_VFP_REGNUM + 32;
29596 }
29597 }
29598
29599 if (TARGET_REALLY_IWMMXT)
29600 {
29601 regno = FIRST_IWMMXT_GR_REGNUM;
29602 /* The 2002/10/09 revision of the XScale ABI has wCG0
29603 and wCG1 as call-preserved registers. The 2002/11/21
29604 revision changed this so that all wCG registers are
29605 scratch registers. */
29606 for (regno = FIRST_IWMMXT_GR_REGNUM;
29607 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
29608 fixed_regs[regno] = 0;
29609 /* The XScale ABI has wR0 - wR9 as scratch registers,
29610 the rest as call-preserved registers. */
29611 for (regno = FIRST_IWMMXT_REGNUM;
29612 regno <= LAST_IWMMXT_REGNUM; ++ regno)
29613 {
29614 fixed_regs[regno] = 0;
29615 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
29616 }
29617 }
29618
29619 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
29620 {
29621 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29622 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29623 }
29624 else if (TARGET_APCS_STACK)
29625 {
29626 fixed_regs[10] = 1;
29627 call_used_regs[10] = 1;
29628 }
29629 /* -mcaller-super-interworking reserves r11 for calls to
29630 _interwork_r11_call_via_rN(). Making the register global
29631 is an easy way of ensuring that it remains valid for all
29632 calls. */
29633 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
29634 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
29635 {
29636 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29637 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29638 if (TARGET_CALLER_INTERWORKING)
29639 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29640 }
29641 SUBTARGET_CONDITIONAL_REGISTER_USAGE
29642 }
29643
29644 static reg_class_t
29645 arm_preferred_rename_class (reg_class_t rclass)
29646 {
29647 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
29648 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
29649 and code size can be reduced. */
29650 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
29651 return LO_REGS;
29652 else
29653 return NO_REGS;
29654 }
29655
29656 /* Compute the atrribute "length" of insn "*push_multi".
29657 So this function MUST be kept in sync with that insn pattern. */
29658 int
29659 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
29660 {
29661 int i, regno, hi_reg;
29662 int num_saves = XVECLEN (parallel_op, 0);
29663
29664 /* ARM mode. */
29665 if (TARGET_ARM)
29666 return 4;
29667 /* Thumb1 mode. */
29668 if (TARGET_THUMB1)
29669 return 2;
29670
29671 /* Thumb2 mode. */
29672 regno = REGNO (first_op);
29673 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
29674 for (i = 1; i < num_saves && !hi_reg; i++)
29675 {
29676 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
29677 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
29678 }
29679
29680 if (!hi_reg)
29681 return 2;
29682 return 4;
29683 }
29684
29685 /* Compute the number of instructions emitted by output_move_double. */
29686 int
29687 arm_count_output_move_double_insns (rtx *operands)
29688 {
29689 int count;
29690 rtx ops[2];
29691 /* output_move_double may modify the operands array, so call it
29692 here on a copy of the array. */
29693 ops[0] = operands[0];
29694 ops[1] = operands[1];
29695 output_move_double (ops, false, &count);
29696 return count;
29697 }
29698
29699 int
29700 vfp3_const_double_for_fract_bits (rtx operand)
29701 {
29702 REAL_VALUE_TYPE r0;
29703
29704 if (!CONST_DOUBLE_P (operand))
29705 return 0;
29706
29707 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
29708 if (exact_real_inverse (DFmode, &r0))
29709 {
29710 if (exact_real_truncate (DFmode, &r0))
29711 {
29712 HOST_WIDE_INT value = real_to_integer (&r0);
29713 value = value & 0xffffffff;
29714 if ((value != 0) && ( (value & (value - 1)) == 0))
29715 return int_log2 (value);
29716 }
29717 }
29718 return 0;
29719 }
29720
29721 int
29722 vfp3_const_double_for_bits (rtx operand)
29723 {
29724 REAL_VALUE_TYPE r0;
29725
29726 if (!CONST_DOUBLE_P (operand))
29727 return 0;
29728
29729 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
29730 if (exact_real_truncate (DFmode, &r0))
29731 {
29732 HOST_WIDE_INT value = real_to_integer (&r0);
29733 value = value & 0xffffffff;
29734 if ((value != 0) && ( (value & (value - 1)) == 0))
29735 return int_log2 (value);
29736 }
29737
29738 return 0;
29739 }
29740 \f
29741 /* Emit a memory barrier around an atomic sequence according to MODEL. */
29742
29743 static void
29744 arm_pre_atomic_barrier (enum memmodel model)
29745 {
29746 if (need_atomic_barrier_p (model, true))
29747 emit_insn (gen_memory_barrier ());
29748 }
29749
29750 static void
29751 arm_post_atomic_barrier (enum memmodel model)
29752 {
29753 if (need_atomic_barrier_p (model, false))
29754 emit_insn (gen_memory_barrier ());
29755 }
29756
29757 /* Emit the load-exclusive and store-exclusive instructions.
29758 Use acquire and release versions if necessary. */
29759
29760 static void
29761 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
29762 {
29763 rtx (*gen) (rtx, rtx);
29764
29765 if (acq)
29766 {
29767 switch (mode)
29768 {
29769 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
29770 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
29771 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
29772 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
29773 default:
29774 gcc_unreachable ();
29775 }
29776 }
29777 else
29778 {
29779 switch (mode)
29780 {
29781 case QImode: gen = gen_arm_load_exclusiveqi; break;
29782 case HImode: gen = gen_arm_load_exclusivehi; break;
29783 case SImode: gen = gen_arm_load_exclusivesi; break;
29784 case DImode: gen = gen_arm_load_exclusivedi; break;
29785 default:
29786 gcc_unreachable ();
29787 }
29788 }
29789
29790 emit_insn (gen (rval, mem));
29791 }
29792
29793 static void
29794 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
29795 rtx mem, bool rel)
29796 {
29797 rtx (*gen) (rtx, rtx, rtx);
29798
29799 if (rel)
29800 {
29801 switch (mode)
29802 {
29803 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
29804 case HImode: gen = gen_arm_store_release_exclusivehi; break;
29805 case SImode: gen = gen_arm_store_release_exclusivesi; break;
29806 case DImode: gen = gen_arm_store_release_exclusivedi; break;
29807 default:
29808 gcc_unreachable ();
29809 }
29810 }
29811 else
29812 {
29813 switch (mode)
29814 {
29815 case QImode: gen = gen_arm_store_exclusiveqi; break;
29816 case HImode: gen = gen_arm_store_exclusivehi; break;
29817 case SImode: gen = gen_arm_store_exclusivesi; break;
29818 case DImode: gen = gen_arm_store_exclusivedi; break;
29819 default:
29820 gcc_unreachable ();
29821 }
29822 }
29823
29824 emit_insn (gen (bval, rval, mem));
29825 }
29826
29827 /* Mark the previous jump instruction as unlikely. */
29828
29829 static void
29830 emit_unlikely_jump (rtx insn)
29831 {
29832 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
29833
29834 insn = emit_jump_insn (insn);
29835 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
29836 }
29837
29838 /* Expand a compare and swap pattern. */
29839
29840 void
29841 arm_expand_compare_and_swap (rtx operands[])
29842 {
29843 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
29844 enum machine_mode mode;
29845 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
29846
29847 bval = operands[0];
29848 rval = operands[1];
29849 mem = operands[2];
29850 oldval = operands[3];
29851 newval = operands[4];
29852 is_weak = operands[5];
29853 mod_s = operands[6];
29854 mod_f = operands[7];
29855 mode = GET_MODE (mem);
29856
29857 /* Normally the succ memory model must be stronger than fail, but in the
29858 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
29859 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
29860
29861 if (TARGET_HAVE_LDACQ
29862 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
29863 && INTVAL (mod_s) == MEMMODEL_RELEASE)
29864 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
29865
29866 switch (mode)
29867 {
29868 case QImode:
29869 case HImode:
29870 /* For narrow modes, we're going to perform the comparison in SImode,
29871 so do the zero-extension now. */
29872 rval = gen_reg_rtx (SImode);
29873 oldval = convert_modes (SImode, mode, oldval, true);
29874 /* FALLTHRU */
29875
29876 case SImode:
29877 /* Force the value into a register if needed. We waited until after
29878 the zero-extension above to do this properly. */
29879 if (!arm_add_operand (oldval, SImode))
29880 oldval = force_reg (SImode, oldval);
29881 break;
29882
29883 case DImode:
29884 if (!cmpdi_operand (oldval, mode))
29885 oldval = force_reg (mode, oldval);
29886 break;
29887
29888 default:
29889 gcc_unreachable ();
29890 }
29891
29892 switch (mode)
29893 {
29894 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
29895 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
29896 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
29897 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
29898 default:
29899 gcc_unreachable ();
29900 }
29901
29902 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
29903
29904 if (mode == QImode || mode == HImode)
29905 emit_move_insn (operands[1], gen_lowpart (mode, rval));
29906
29907 /* In all cases, we arrange for success to be signaled by Z set.
29908 This arrangement allows for the boolean result to be used directly
29909 in a subsequent branch, post optimization. */
29910 x = gen_rtx_REG (CCmode, CC_REGNUM);
29911 x = gen_rtx_EQ (SImode, x, const0_rtx);
29912 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
29913 }
29914
29915 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
29916 another memory store between the load-exclusive and store-exclusive can
29917 reset the monitor from Exclusive to Open state. This means we must wait
29918 until after reload to split the pattern, lest we get a register spill in
29919 the middle of the atomic sequence. */
29920
29921 void
29922 arm_split_compare_and_swap (rtx operands[])
29923 {
29924 rtx rval, mem, oldval, newval, scratch;
29925 enum machine_mode mode;
29926 enum memmodel mod_s, mod_f;
29927 bool is_weak;
29928 rtx label1, label2, x, cond;
29929
29930 rval = operands[0];
29931 mem = operands[1];
29932 oldval = operands[2];
29933 newval = operands[3];
29934 is_weak = (operands[4] != const0_rtx);
29935 mod_s = (enum memmodel) INTVAL (operands[5]);
29936 mod_f = (enum memmodel) INTVAL (operands[6]);
29937 scratch = operands[7];
29938 mode = GET_MODE (mem);
29939
29940 bool use_acquire = TARGET_HAVE_LDACQ
29941 && !(mod_s == MEMMODEL_RELAXED
29942 || mod_s == MEMMODEL_CONSUME
29943 || mod_s == MEMMODEL_RELEASE);
29944
29945 bool use_release = TARGET_HAVE_LDACQ
29946 && !(mod_s == MEMMODEL_RELAXED
29947 || mod_s == MEMMODEL_CONSUME
29948 || mod_s == MEMMODEL_ACQUIRE);
29949
29950 /* Checks whether a barrier is needed and emits one accordingly. */
29951 if (!(use_acquire || use_release))
29952 arm_pre_atomic_barrier (mod_s);
29953
29954 label1 = NULL_RTX;
29955 if (!is_weak)
29956 {
29957 label1 = gen_label_rtx ();
29958 emit_label (label1);
29959 }
29960 label2 = gen_label_rtx ();
29961
29962 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
29963
29964 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
29965 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
29966 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
29967 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
29968 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
29969
29970 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
29971
29972 /* Weak or strong, we want EQ to be true for success, so that we
29973 match the flags that we got from the compare above. */
29974 cond = gen_rtx_REG (CCmode, CC_REGNUM);
29975 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
29976 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
29977
29978 if (!is_weak)
29979 {
29980 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
29981 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
29982 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
29983 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
29984 }
29985
29986 if (mod_f != MEMMODEL_RELAXED)
29987 emit_label (label2);
29988
29989 /* Checks whether a barrier is needed and emits one accordingly. */
29990 if (!(use_acquire || use_release))
29991 arm_post_atomic_barrier (mod_s);
29992
29993 if (mod_f == MEMMODEL_RELAXED)
29994 emit_label (label2);
29995 }
29996
29997 void
29998 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
29999 rtx value, rtx model_rtx, rtx cond)
30000 {
30001 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
30002 enum machine_mode mode = GET_MODE (mem);
30003 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
30004 rtx label, x;
30005
30006 bool use_acquire = TARGET_HAVE_LDACQ
30007 && !(model == MEMMODEL_RELAXED
30008 || model == MEMMODEL_CONSUME
30009 || model == MEMMODEL_RELEASE);
30010
30011 bool use_release = TARGET_HAVE_LDACQ
30012 && !(model == MEMMODEL_RELAXED
30013 || model == MEMMODEL_CONSUME
30014 || model == MEMMODEL_ACQUIRE);
30015
30016 /* Checks whether a barrier is needed and emits one accordingly. */
30017 if (!(use_acquire || use_release))
30018 arm_pre_atomic_barrier (model);
30019
30020 label = gen_label_rtx ();
30021 emit_label (label);
30022
30023 if (new_out)
30024 new_out = gen_lowpart (wmode, new_out);
30025 if (old_out)
30026 old_out = gen_lowpart (wmode, old_out);
30027 else
30028 old_out = new_out;
30029 value = simplify_gen_subreg (wmode, value, mode, 0);
30030
30031 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30032
30033 switch (code)
30034 {
30035 case SET:
30036 new_out = value;
30037 break;
30038
30039 case NOT:
30040 x = gen_rtx_AND (wmode, old_out, value);
30041 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30042 x = gen_rtx_NOT (wmode, new_out);
30043 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30044 break;
30045
30046 case MINUS:
30047 if (CONST_INT_P (value))
30048 {
30049 value = GEN_INT (-INTVAL (value));
30050 code = PLUS;
30051 }
30052 /* FALLTHRU */
30053
30054 case PLUS:
30055 if (mode == DImode)
30056 {
30057 /* DImode plus/minus need to clobber flags. */
30058 /* The adddi3 and subdi3 patterns are incorrectly written so that
30059 they require matching operands, even when we could easily support
30060 three operands. Thankfully, this can be fixed up post-splitting,
30061 as the individual add+adc patterns do accept three operands and
30062 post-reload cprop can make these moves go away. */
30063 emit_move_insn (new_out, old_out);
30064 if (code == PLUS)
30065 x = gen_adddi3 (new_out, new_out, value);
30066 else
30067 x = gen_subdi3 (new_out, new_out, value);
30068 emit_insn (x);
30069 break;
30070 }
30071 /* FALLTHRU */
30072
30073 default:
30074 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30075 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30076 break;
30077 }
30078
30079 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30080 use_release);
30081
30082 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30083 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30084
30085 /* Checks whether a barrier is needed and emits one accordingly. */
30086 if (!(use_acquire || use_release))
30087 arm_post_atomic_barrier (model);
30088 }
30089 \f
30090 #define MAX_VECT_LEN 16
30091
30092 struct expand_vec_perm_d
30093 {
30094 rtx target, op0, op1;
30095 unsigned char perm[MAX_VECT_LEN];
30096 enum machine_mode vmode;
30097 unsigned char nelt;
30098 bool one_vector_p;
30099 bool testing_p;
30100 };
30101
30102 /* Generate a variable permutation. */
30103
30104 static void
30105 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
30106 {
30107 enum machine_mode vmode = GET_MODE (target);
30108 bool one_vector_p = rtx_equal_p (op0, op1);
30109
30110 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
30111 gcc_checking_assert (GET_MODE (op0) == vmode);
30112 gcc_checking_assert (GET_MODE (op1) == vmode);
30113 gcc_checking_assert (GET_MODE (sel) == vmode);
30114 gcc_checking_assert (TARGET_NEON);
30115
30116 if (one_vector_p)
30117 {
30118 if (vmode == V8QImode)
30119 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
30120 else
30121 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
30122 }
30123 else
30124 {
30125 rtx pair;
30126
30127 if (vmode == V8QImode)
30128 {
30129 pair = gen_reg_rtx (V16QImode);
30130 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
30131 pair = gen_lowpart (TImode, pair);
30132 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
30133 }
30134 else
30135 {
30136 pair = gen_reg_rtx (OImode);
30137 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
30138 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
30139 }
30140 }
30141 }
30142
30143 void
30144 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
30145 {
30146 enum machine_mode vmode = GET_MODE (target);
30147 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
30148 bool one_vector_p = rtx_equal_p (op0, op1);
30149 rtx rmask[MAX_VECT_LEN], mask;
30150
30151 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30152 numbering of elements for big-endian, we must reverse the order. */
30153 gcc_checking_assert (!BYTES_BIG_ENDIAN);
30154
30155 /* The VTBL instruction does not use a modulo index, so we must take care
30156 of that ourselves. */
30157 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
30158 for (i = 0; i < nelt; ++i)
30159 rmask[i] = mask;
30160 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
30161 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
30162
30163 arm_expand_vec_perm_1 (target, op0, op1, sel);
30164 }
30165
30166 /* Generate or test for an insn that supports a constant permutation. */
30167
30168 /* Recognize patterns for the VUZP insns. */
30169
30170 static bool
30171 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
30172 {
30173 unsigned int i, odd, mask, nelt = d->nelt;
30174 rtx out0, out1, in0, in1, x;
30175 rtx (*gen)(rtx, rtx, rtx, rtx);
30176
30177 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30178 return false;
30179
30180 /* Note that these are little-endian tests. Adjust for big-endian later. */
30181 if (d->perm[0] == 0)
30182 odd = 0;
30183 else if (d->perm[0] == 1)
30184 odd = 1;
30185 else
30186 return false;
30187 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30188
30189 for (i = 0; i < nelt; i++)
30190 {
30191 unsigned elt = (i * 2 + odd) & mask;
30192 if (d->perm[i] != elt)
30193 return false;
30194 }
30195
30196 /* Success! */
30197 if (d->testing_p)
30198 return true;
30199
30200 switch (d->vmode)
30201 {
30202 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
30203 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
30204 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
30205 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
30206 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
30207 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
30208 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
30209 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
30210 default:
30211 gcc_unreachable ();
30212 }
30213
30214 in0 = d->op0;
30215 in1 = d->op1;
30216 if (BYTES_BIG_ENDIAN)
30217 {
30218 x = in0, in0 = in1, in1 = x;
30219 odd = !odd;
30220 }
30221
30222 out0 = d->target;
30223 out1 = gen_reg_rtx (d->vmode);
30224 if (odd)
30225 x = out0, out0 = out1, out1 = x;
30226
30227 emit_insn (gen (out0, in0, in1, out1));
30228 return true;
30229 }
30230
30231 /* Recognize patterns for the VZIP insns. */
30232
30233 static bool
30234 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
30235 {
30236 unsigned int i, high, mask, nelt = d->nelt;
30237 rtx out0, out1, in0, in1, x;
30238 rtx (*gen)(rtx, rtx, rtx, rtx);
30239
30240 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30241 return false;
30242
30243 /* Note that these are little-endian tests. Adjust for big-endian later. */
30244 high = nelt / 2;
30245 if (d->perm[0] == high)
30246 ;
30247 else if (d->perm[0] == 0)
30248 high = 0;
30249 else
30250 return false;
30251 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30252
30253 for (i = 0; i < nelt / 2; i++)
30254 {
30255 unsigned elt = (i + high) & mask;
30256 if (d->perm[i * 2] != elt)
30257 return false;
30258 elt = (elt + nelt) & mask;
30259 if (d->perm[i * 2 + 1] != elt)
30260 return false;
30261 }
30262
30263 /* Success! */
30264 if (d->testing_p)
30265 return true;
30266
30267 switch (d->vmode)
30268 {
30269 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
30270 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
30271 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
30272 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
30273 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
30274 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
30275 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
30276 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
30277 default:
30278 gcc_unreachable ();
30279 }
30280
30281 in0 = d->op0;
30282 in1 = d->op1;
30283 if (BYTES_BIG_ENDIAN)
30284 {
30285 x = in0, in0 = in1, in1 = x;
30286 high = !high;
30287 }
30288
30289 out0 = d->target;
30290 out1 = gen_reg_rtx (d->vmode);
30291 if (high)
30292 x = out0, out0 = out1, out1 = x;
30293
30294 emit_insn (gen (out0, in0, in1, out1));
30295 return true;
30296 }
30297
30298 /* Recognize patterns for the VREV insns. */
30299
30300 static bool
30301 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
30302 {
30303 unsigned int i, j, diff, nelt = d->nelt;
30304 rtx (*gen)(rtx, rtx, rtx);
30305
30306 if (!d->one_vector_p)
30307 return false;
30308
30309 diff = d->perm[0];
30310 switch (diff)
30311 {
30312 case 7:
30313 switch (d->vmode)
30314 {
30315 case V16QImode: gen = gen_neon_vrev64v16qi; break;
30316 case V8QImode: gen = gen_neon_vrev64v8qi; break;
30317 default:
30318 return false;
30319 }
30320 break;
30321 case 3:
30322 switch (d->vmode)
30323 {
30324 case V16QImode: gen = gen_neon_vrev32v16qi; break;
30325 case V8QImode: gen = gen_neon_vrev32v8qi; break;
30326 case V8HImode: gen = gen_neon_vrev64v8hi; break;
30327 case V4HImode: gen = gen_neon_vrev64v4hi; break;
30328 default:
30329 return false;
30330 }
30331 break;
30332 case 1:
30333 switch (d->vmode)
30334 {
30335 case V16QImode: gen = gen_neon_vrev16v16qi; break;
30336 case V8QImode: gen = gen_neon_vrev16v8qi; break;
30337 case V8HImode: gen = gen_neon_vrev32v8hi; break;
30338 case V4HImode: gen = gen_neon_vrev32v4hi; break;
30339 case V4SImode: gen = gen_neon_vrev64v4si; break;
30340 case V2SImode: gen = gen_neon_vrev64v2si; break;
30341 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
30342 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
30343 default:
30344 return false;
30345 }
30346 break;
30347 default:
30348 return false;
30349 }
30350
30351 for (i = 0; i < nelt ; i += diff + 1)
30352 for (j = 0; j <= diff; j += 1)
30353 {
30354 /* This is guaranteed to be true as the value of diff
30355 is 7, 3, 1 and we should have enough elements in the
30356 queue to generate this. Getting a vector mask with a
30357 value of diff other than these values implies that
30358 something is wrong by the time we get here. */
30359 gcc_assert (i + j < nelt);
30360 if (d->perm[i + j] != i + diff - j)
30361 return false;
30362 }
30363
30364 /* Success! */
30365 if (d->testing_p)
30366 return true;
30367
30368 /* ??? The third operand is an artifact of the builtin infrastructure
30369 and is ignored by the actual instruction. */
30370 emit_insn (gen (d->target, d->op0, const0_rtx));
30371 return true;
30372 }
30373
30374 /* Recognize patterns for the VTRN insns. */
30375
30376 static bool
30377 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
30378 {
30379 unsigned int i, odd, mask, nelt = d->nelt;
30380 rtx out0, out1, in0, in1, x;
30381 rtx (*gen)(rtx, rtx, rtx, rtx);
30382
30383 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30384 return false;
30385
30386 /* Note that these are little-endian tests. Adjust for big-endian later. */
30387 if (d->perm[0] == 0)
30388 odd = 0;
30389 else if (d->perm[0] == 1)
30390 odd = 1;
30391 else
30392 return false;
30393 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30394
30395 for (i = 0; i < nelt; i += 2)
30396 {
30397 if (d->perm[i] != i + odd)
30398 return false;
30399 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
30400 return false;
30401 }
30402
30403 /* Success! */
30404 if (d->testing_p)
30405 return true;
30406
30407 switch (d->vmode)
30408 {
30409 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
30410 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
30411 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
30412 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
30413 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
30414 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
30415 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
30416 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
30417 default:
30418 gcc_unreachable ();
30419 }
30420
30421 in0 = d->op0;
30422 in1 = d->op1;
30423 if (BYTES_BIG_ENDIAN)
30424 {
30425 x = in0, in0 = in1, in1 = x;
30426 odd = !odd;
30427 }
30428
30429 out0 = d->target;
30430 out1 = gen_reg_rtx (d->vmode);
30431 if (odd)
30432 x = out0, out0 = out1, out1 = x;
30433
30434 emit_insn (gen (out0, in0, in1, out1));
30435 return true;
30436 }
30437
30438 /* Recognize patterns for the VEXT insns. */
30439
30440 static bool
30441 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
30442 {
30443 unsigned int i, nelt = d->nelt;
30444 rtx (*gen) (rtx, rtx, rtx, rtx);
30445 rtx offset;
30446
30447 unsigned int location;
30448
30449 unsigned int next = d->perm[0] + 1;
30450
30451 /* TODO: Handle GCC's numbering of elements for big-endian. */
30452 if (BYTES_BIG_ENDIAN)
30453 return false;
30454
30455 /* Check if the extracted indexes are increasing by one. */
30456 for (i = 1; i < nelt; next++, i++)
30457 {
30458 /* If we hit the most significant element of the 2nd vector in
30459 the previous iteration, no need to test further. */
30460 if (next == 2 * nelt)
30461 return false;
30462
30463 /* If we are operating on only one vector: it could be a
30464 rotation. If there are only two elements of size < 64, let
30465 arm_evpc_neon_vrev catch it. */
30466 if (d->one_vector_p && (next == nelt))
30467 {
30468 if ((nelt == 2) && (d->vmode != V2DImode))
30469 return false;
30470 else
30471 next = 0;
30472 }
30473
30474 if (d->perm[i] != next)
30475 return false;
30476 }
30477
30478 location = d->perm[0];
30479
30480 switch (d->vmode)
30481 {
30482 case V16QImode: gen = gen_neon_vextv16qi; break;
30483 case V8QImode: gen = gen_neon_vextv8qi; break;
30484 case V4HImode: gen = gen_neon_vextv4hi; break;
30485 case V8HImode: gen = gen_neon_vextv8hi; break;
30486 case V2SImode: gen = gen_neon_vextv2si; break;
30487 case V4SImode: gen = gen_neon_vextv4si; break;
30488 case V2SFmode: gen = gen_neon_vextv2sf; break;
30489 case V4SFmode: gen = gen_neon_vextv4sf; break;
30490 case V2DImode: gen = gen_neon_vextv2di; break;
30491 default:
30492 return false;
30493 }
30494
30495 /* Success! */
30496 if (d->testing_p)
30497 return true;
30498
30499 offset = GEN_INT (location);
30500 emit_insn (gen (d->target, d->op0, d->op1, offset));
30501 return true;
30502 }
30503
30504 /* The NEON VTBL instruction is a fully variable permuation that's even
30505 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
30506 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
30507 can do slightly better by expanding this as a constant where we don't
30508 have to apply a mask. */
30509
30510 static bool
30511 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
30512 {
30513 rtx rperm[MAX_VECT_LEN], sel;
30514 enum machine_mode vmode = d->vmode;
30515 unsigned int i, nelt = d->nelt;
30516
30517 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30518 numbering of elements for big-endian, we must reverse the order. */
30519 if (BYTES_BIG_ENDIAN)
30520 return false;
30521
30522 if (d->testing_p)
30523 return true;
30524
30525 /* Generic code will try constant permutation twice. Once with the
30526 original mode and again with the elements lowered to QImode.
30527 So wait and don't do the selector expansion ourselves. */
30528 if (vmode != V8QImode && vmode != V16QImode)
30529 return false;
30530
30531 for (i = 0; i < nelt; ++i)
30532 rperm[i] = GEN_INT (d->perm[i]);
30533 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
30534 sel = force_reg (vmode, sel);
30535
30536 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
30537 return true;
30538 }
30539
30540 static bool
30541 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
30542 {
30543 /* Check if the input mask matches vext before reordering the
30544 operands. */
30545 if (TARGET_NEON)
30546 if (arm_evpc_neon_vext (d))
30547 return true;
30548
30549 /* The pattern matching functions above are written to look for a small
30550 number to begin the sequence (0, 1, N/2). If we begin with an index
30551 from the second operand, we can swap the operands. */
30552 if (d->perm[0] >= d->nelt)
30553 {
30554 unsigned i, nelt = d->nelt;
30555 rtx x;
30556
30557 for (i = 0; i < nelt; ++i)
30558 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
30559
30560 x = d->op0;
30561 d->op0 = d->op1;
30562 d->op1 = x;
30563 }
30564
30565 if (TARGET_NEON)
30566 {
30567 if (arm_evpc_neon_vuzp (d))
30568 return true;
30569 if (arm_evpc_neon_vzip (d))
30570 return true;
30571 if (arm_evpc_neon_vrev (d))
30572 return true;
30573 if (arm_evpc_neon_vtrn (d))
30574 return true;
30575 return arm_evpc_neon_vtbl (d);
30576 }
30577 return false;
30578 }
30579
30580 /* Expand a vec_perm_const pattern. */
30581
30582 bool
30583 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
30584 {
30585 struct expand_vec_perm_d d;
30586 int i, nelt, which;
30587
30588 d.target = target;
30589 d.op0 = op0;
30590 d.op1 = op1;
30591
30592 d.vmode = GET_MODE (target);
30593 gcc_assert (VECTOR_MODE_P (d.vmode));
30594 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
30595 d.testing_p = false;
30596
30597 for (i = which = 0; i < nelt; ++i)
30598 {
30599 rtx e = XVECEXP (sel, 0, i);
30600 int ei = INTVAL (e) & (2 * nelt - 1);
30601 which |= (ei < nelt ? 1 : 2);
30602 d.perm[i] = ei;
30603 }
30604
30605 switch (which)
30606 {
30607 default:
30608 gcc_unreachable();
30609
30610 case 3:
30611 d.one_vector_p = false;
30612 if (!rtx_equal_p (op0, op1))
30613 break;
30614
30615 /* The elements of PERM do not suggest that only the first operand
30616 is used, but both operands are identical. Allow easier matching
30617 of the permutation by folding the permutation into the single
30618 input vector. */
30619 /* FALLTHRU */
30620 case 2:
30621 for (i = 0; i < nelt; ++i)
30622 d.perm[i] &= nelt - 1;
30623 d.op0 = op1;
30624 d.one_vector_p = true;
30625 break;
30626
30627 case 1:
30628 d.op1 = op0;
30629 d.one_vector_p = true;
30630 break;
30631 }
30632
30633 return arm_expand_vec_perm_const_1 (&d);
30634 }
30635
30636 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
30637
30638 static bool
30639 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
30640 const unsigned char *sel)
30641 {
30642 struct expand_vec_perm_d d;
30643 unsigned int i, nelt, which;
30644 bool ret;
30645
30646 d.vmode = vmode;
30647 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
30648 d.testing_p = true;
30649 memcpy (d.perm, sel, nelt);
30650
30651 /* Categorize the set of elements in the selector. */
30652 for (i = which = 0; i < nelt; ++i)
30653 {
30654 unsigned char e = d.perm[i];
30655 gcc_assert (e < 2 * nelt);
30656 which |= (e < nelt ? 1 : 2);
30657 }
30658
30659 /* For all elements from second vector, fold the elements to first. */
30660 if (which == 2)
30661 for (i = 0; i < nelt; ++i)
30662 d.perm[i] -= nelt;
30663
30664 /* Check whether the mask can be applied to the vector type. */
30665 d.one_vector_p = (which != 3);
30666
30667 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
30668 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
30669 if (!d.one_vector_p)
30670 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
30671
30672 start_sequence ();
30673 ret = arm_expand_vec_perm_const_1 (&d);
30674 end_sequence ();
30675
30676 return ret;
30677 }
30678
30679 bool
30680 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
30681 {
30682 /* If we are soft float and we do not have ldrd
30683 then all auto increment forms are ok. */
30684 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
30685 return true;
30686
30687 switch (code)
30688 {
30689 /* Post increment and Pre Decrement are supported for all
30690 instruction forms except for vector forms. */
30691 case ARM_POST_INC:
30692 case ARM_PRE_DEC:
30693 if (VECTOR_MODE_P (mode))
30694 {
30695 if (code != ARM_PRE_DEC)
30696 return true;
30697 else
30698 return false;
30699 }
30700
30701 return true;
30702
30703 case ARM_POST_DEC:
30704 case ARM_PRE_INC:
30705 /* Without LDRD and mode size greater than
30706 word size, there is no point in auto-incrementing
30707 because ldm and stm will not have these forms. */
30708 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
30709 return false;
30710
30711 /* Vector and floating point modes do not support
30712 these auto increment forms. */
30713 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
30714 return false;
30715
30716 return true;
30717
30718 default:
30719 return false;
30720
30721 }
30722
30723 return false;
30724 }
30725
30726 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
30727 on ARM, since we know that shifts by negative amounts are no-ops.
30728 Additionally, the default expansion code is not available or suitable
30729 for post-reload insn splits (this can occur when the register allocator
30730 chooses not to do a shift in NEON).
30731
30732 This function is used in both initial expand and post-reload splits, and
30733 handles all kinds of 64-bit shifts.
30734
30735 Input requirements:
30736 - It is safe for the input and output to be the same register, but
30737 early-clobber rules apply for the shift amount and scratch registers.
30738 - Shift by register requires both scratch registers. In all other cases
30739 the scratch registers may be NULL.
30740 - Ashiftrt by a register also clobbers the CC register. */
30741 void
30742 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
30743 rtx amount, rtx scratch1, rtx scratch2)
30744 {
30745 rtx out_high = gen_highpart (SImode, out);
30746 rtx out_low = gen_lowpart (SImode, out);
30747 rtx in_high = gen_highpart (SImode, in);
30748 rtx in_low = gen_lowpart (SImode, in);
30749
30750 /* Terminology:
30751 in = the register pair containing the input value.
30752 out = the destination register pair.
30753 up = the high- or low-part of each pair.
30754 down = the opposite part to "up".
30755 In a shift, we can consider bits to shift from "up"-stream to
30756 "down"-stream, so in a left-shift "up" is the low-part and "down"
30757 is the high-part of each register pair. */
30758
30759 rtx out_up = code == ASHIFT ? out_low : out_high;
30760 rtx out_down = code == ASHIFT ? out_high : out_low;
30761 rtx in_up = code == ASHIFT ? in_low : in_high;
30762 rtx in_down = code == ASHIFT ? in_high : in_low;
30763
30764 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
30765 gcc_assert (out
30766 && (REG_P (out) || GET_CODE (out) == SUBREG)
30767 && GET_MODE (out) == DImode);
30768 gcc_assert (in
30769 && (REG_P (in) || GET_CODE (in) == SUBREG)
30770 && GET_MODE (in) == DImode);
30771 gcc_assert (amount
30772 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
30773 && GET_MODE (amount) == SImode)
30774 || CONST_INT_P (amount)));
30775 gcc_assert (scratch1 == NULL
30776 || (GET_CODE (scratch1) == SCRATCH)
30777 || (GET_MODE (scratch1) == SImode
30778 && REG_P (scratch1)));
30779 gcc_assert (scratch2 == NULL
30780 || (GET_CODE (scratch2) == SCRATCH)
30781 || (GET_MODE (scratch2) == SImode
30782 && REG_P (scratch2)));
30783 gcc_assert (!REG_P (out) || !REG_P (amount)
30784 || !HARD_REGISTER_P (out)
30785 || (REGNO (out) != REGNO (amount)
30786 && REGNO (out) + 1 != REGNO (amount)));
30787
30788 /* Macros to make following code more readable. */
30789 #define SUB_32(DEST,SRC) \
30790 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
30791 #define RSB_32(DEST,SRC) \
30792 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
30793 #define SUB_S_32(DEST,SRC) \
30794 gen_addsi3_compare0 ((DEST), (SRC), \
30795 GEN_INT (-32))
30796 #define SET(DEST,SRC) \
30797 gen_rtx_SET (SImode, (DEST), (SRC))
30798 #define SHIFT(CODE,SRC,AMOUNT) \
30799 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
30800 #define LSHIFT(CODE,SRC,AMOUNT) \
30801 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
30802 SImode, (SRC), (AMOUNT))
30803 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
30804 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
30805 SImode, (SRC), (AMOUNT))
30806 #define ORR(A,B) \
30807 gen_rtx_IOR (SImode, (A), (B))
30808 #define BRANCH(COND,LABEL) \
30809 gen_arm_cond_branch ((LABEL), \
30810 gen_rtx_ ## COND (CCmode, cc_reg, \
30811 const0_rtx), \
30812 cc_reg)
30813
30814 /* Shifts by register and shifts by constant are handled separately. */
30815 if (CONST_INT_P (amount))
30816 {
30817 /* We have a shift-by-constant. */
30818
30819 /* First, handle out-of-range shift amounts.
30820 In both cases we try to match the result an ARM instruction in a
30821 shift-by-register would give. This helps reduce execution
30822 differences between optimization levels, but it won't stop other
30823 parts of the compiler doing different things. This is "undefined
30824 behaviour, in any case. */
30825 if (INTVAL (amount) <= 0)
30826 emit_insn (gen_movdi (out, in));
30827 else if (INTVAL (amount) >= 64)
30828 {
30829 if (code == ASHIFTRT)
30830 {
30831 rtx const31_rtx = GEN_INT (31);
30832 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
30833 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
30834 }
30835 else
30836 emit_insn (gen_movdi (out, const0_rtx));
30837 }
30838
30839 /* Now handle valid shifts. */
30840 else if (INTVAL (amount) < 32)
30841 {
30842 /* Shifts by a constant less than 32. */
30843 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
30844
30845 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
30846 emit_insn (SET (out_down,
30847 ORR (REV_LSHIFT (code, in_up, reverse_amount),
30848 out_down)));
30849 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
30850 }
30851 else
30852 {
30853 /* Shifts by a constant greater than 31. */
30854 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
30855
30856 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
30857 if (code == ASHIFTRT)
30858 emit_insn (gen_ashrsi3 (out_up, in_up,
30859 GEN_INT (31)));
30860 else
30861 emit_insn (SET (out_up, const0_rtx));
30862 }
30863 }
30864 else
30865 {
30866 /* We have a shift-by-register. */
30867 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
30868
30869 /* This alternative requires the scratch registers. */
30870 gcc_assert (scratch1 && REG_P (scratch1));
30871 gcc_assert (scratch2 && REG_P (scratch2));
30872
30873 /* We will need the values "amount-32" and "32-amount" later.
30874 Swapping them around now allows the later code to be more general. */
30875 switch (code)
30876 {
30877 case ASHIFT:
30878 emit_insn (SUB_32 (scratch1, amount));
30879 emit_insn (RSB_32 (scratch2, amount));
30880 break;
30881 case ASHIFTRT:
30882 emit_insn (RSB_32 (scratch1, amount));
30883 /* Also set CC = amount > 32. */
30884 emit_insn (SUB_S_32 (scratch2, amount));
30885 break;
30886 case LSHIFTRT:
30887 emit_insn (RSB_32 (scratch1, amount));
30888 emit_insn (SUB_32 (scratch2, amount));
30889 break;
30890 default:
30891 gcc_unreachable ();
30892 }
30893
30894 /* Emit code like this:
30895
30896 arithmetic-left:
30897 out_down = in_down << amount;
30898 out_down = (in_up << (amount - 32)) | out_down;
30899 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
30900 out_up = in_up << amount;
30901
30902 arithmetic-right:
30903 out_down = in_down >> amount;
30904 out_down = (in_up << (32 - amount)) | out_down;
30905 if (amount < 32)
30906 out_down = ((signed)in_up >> (amount - 32)) | out_down;
30907 out_up = in_up << amount;
30908
30909 logical-right:
30910 out_down = in_down >> amount;
30911 out_down = (in_up << (32 - amount)) | out_down;
30912 if (amount < 32)
30913 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
30914 out_up = in_up << amount;
30915
30916 The ARM and Thumb2 variants are the same but implemented slightly
30917 differently. If this were only called during expand we could just
30918 use the Thumb2 case and let combine do the right thing, but this
30919 can also be called from post-reload splitters. */
30920
30921 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
30922
30923 if (!TARGET_THUMB2)
30924 {
30925 /* Emit code for ARM mode. */
30926 emit_insn (SET (out_down,
30927 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
30928 if (code == ASHIFTRT)
30929 {
30930 rtx done_label = gen_label_rtx ();
30931 emit_jump_insn (BRANCH (LT, done_label));
30932 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
30933 out_down)));
30934 emit_label (done_label);
30935 }
30936 else
30937 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
30938 out_down)));
30939 }
30940 else
30941 {
30942 /* Emit code for Thumb2 mode.
30943 Thumb2 can't do shift and or in one insn. */
30944 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
30945 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
30946
30947 if (code == ASHIFTRT)
30948 {
30949 rtx done_label = gen_label_rtx ();
30950 emit_jump_insn (BRANCH (LT, done_label));
30951 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
30952 emit_insn (SET (out_down, ORR (out_down, scratch2)));
30953 emit_label (done_label);
30954 }
30955 else
30956 {
30957 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
30958 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
30959 }
30960 }
30961
30962 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
30963 }
30964
30965 #undef SUB_32
30966 #undef RSB_32
30967 #undef SUB_S_32
30968 #undef SET
30969 #undef SHIFT
30970 #undef LSHIFT
30971 #undef REV_LSHIFT
30972 #undef ORR
30973 #undef BRANCH
30974 }
30975
30976
30977 /* Returns true if a valid comparison operation and makes
30978 the operands in a form that is valid. */
30979 bool
30980 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
30981 {
30982 enum rtx_code code = GET_CODE (*comparison);
30983 int code_int;
30984 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
30985 ? GET_MODE (*op2) : GET_MODE (*op1);
30986
30987 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
30988
30989 if (code == UNEQ || code == LTGT)
30990 return false;
30991
30992 code_int = (int)code;
30993 arm_canonicalize_comparison (&code_int, op1, op2, 0);
30994 PUT_CODE (*comparison, (enum rtx_code)code_int);
30995
30996 switch (mode)
30997 {
30998 case SImode:
30999 if (!arm_add_operand (*op1, mode))
31000 *op1 = force_reg (mode, *op1);
31001 if (!arm_add_operand (*op2, mode))
31002 *op2 = force_reg (mode, *op2);
31003 return true;
31004
31005 case DImode:
31006 if (!cmpdi_operand (*op1, mode))
31007 *op1 = force_reg (mode, *op1);
31008 if (!cmpdi_operand (*op2, mode))
31009 *op2 = force_reg (mode, *op2);
31010 return true;
31011
31012 case SFmode:
31013 case DFmode:
31014 if (!arm_float_compare_operand (*op1, mode))
31015 *op1 = force_reg (mode, *op1);
31016 if (!arm_float_compare_operand (*op2, mode))
31017 *op2 = force_reg (mode, *op2);
31018 return true;
31019 default:
31020 break;
31021 }
31022
31023 return false;
31024
31025 }
31026
31027 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31028
31029 static unsigned HOST_WIDE_INT
31030 arm_asan_shadow_offset (void)
31031 {
31032 return (unsigned HOST_WIDE_INT) 1 << 29;
31033 }
31034
31035 #include "gt-arm.h"