arm.c (neon_vector_mem_operand): Add strict argument.
[gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2013 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "obstack.h"
31 #include "regs.h"
32 #include "hard-reg-set.h"
33 #include "insn-config.h"
34 #include "conditions.h"
35 #include "output.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "reload.h"
39 #include "function.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "diagnostic-core.h"
43 #include "recog.h"
44 #include "cgraph.h"
45 #include "ggc.h"
46 #include "except.h"
47 #include "tm_p.h"
48 #include "target.h"
49 #include "target-def.h"
50 #include "debug.h"
51 #include "langhooks.h"
52 #include "df.h"
53 #include "intl.h"
54 #include "libfuncs.h"
55 #include "params.h"
56 #include "opts.h"
57 #include "dumpfile.h"
58
59 /* Forward definitions of types. */
60 typedef struct minipool_node Mnode;
61 typedef struct minipool_fixup Mfix;
62
63 void (*arm_lang_output_object_attributes_hook)(void);
64
65 struct four_ints
66 {
67 int i[4];
68 };
69
70 /* Forward function declarations. */
71 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
72 static int arm_compute_static_chain_stack_bytes (void);
73 static arm_stack_offsets *arm_get_frame_offsets (void);
74 static void arm_add_gc_roots (void);
75 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
76 HOST_WIDE_INT, rtx, rtx, int, int);
77 static unsigned bit_count (unsigned long);
78 static int arm_address_register_rtx_p (rtx, int);
79 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
80 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
81 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
82 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
83 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
84 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
85 inline static int thumb1_index_register_rtx_p (rtx, int);
86 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
87 static int thumb_far_jump_used_p (void);
88 static bool thumb_force_lr_save (void);
89 static unsigned arm_size_return_regs (void);
90 static bool arm_assemble_integer (rtx, unsigned int, int);
91 static void arm_print_operand (FILE *, rtx, int);
92 static void arm_print_operand_address (FILE *, rtx);
93 static bool arm_print_operand_punct_valid_p (unsigned char code);
94 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
95 static arm_cc get_arm_condition_code (rtx);
96 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
97 static rtx is_jump_table (rtx);
98 static const char *output_multi_immediate (rtx *, const char *, const char *,
99 int, HOST_WIDE_INT);
100 static const char *shift_op (rtx, HOST_WIDE_INT *);
101 static struct machine_function *arm_init_machine_status (void);
102 static void thumb_exit (FILE *, int);
103 static rtx is_jump_table (rtx);
104 static HOST_WIDE_INT get_jump_table_size (rtx);
105 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
106 static Mnode *add_minipool_forward_ref (Mfix *);
107 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
108 static Mnode *add_minipool_backward_ref (Mfix *);
109 static void assign_minipool_offsets (Mfix *);
110 static void arm_print_value (FILE *, rtx);
111 static void dump_minipool (rtx);
112 static int arm_barrier_cost (rtx);
113 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
114 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
115 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
116 rtx);
117 static void arm_reorg (void);
118 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
119 static unsigned long arm_compute_save_reg0_reg12_mask (void);
120 static unsigned long arm_compute_save_reg_mask (void);
121 static unsigned long arm_isr_value (tree);
122 static unsigned long arm_compute_func_type (void);
123 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
124 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
125 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
126 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
127 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
128 #endif
129 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
130 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
131 static int arm_comp_type_attributes (const_tree, const_tree);
132 static void arm_set_default_type_attributes (tree);
133 static int arm_adjust_cost (rtx, rtx, rtx, int);
134 static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
135 static int optimal_immediate_sequence (enum rtx_code code,
136 unsigned HOST_WIDE_INT val,
137 struct four_ints *return_sequence);
138 static int optimal_immediate_sequence_1 (enum rtx_code code,
139 unsigned HOST_WIDE_INT val,
140 struct four_ints *return_sequence,
141 int i);
142 static int arm_get_strip_length (int);
143 static bool arm_function_ok_for_sibcall (tree, tree);
144 static enum machine_mode arm_promote_function_mode (const_tree,
145 enum machine_mode, int *,
146 const_tree, int);
147 static bool arm_return_in_memory (const_tree, const_tree);
148 static rtx arm_function_value (const_tree, const_tree, bool);
149 static rtx arm_libcall_value_1 (enum machine_mode);
150 static rtx arm_libcall_value (enum machine_mode, const_rtx);
151 static bool arm_function_value_regno_p (const unsigned int);
152 static void arm_internal_label (FILE *, const char *, unsigned long);
153 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
154 tree);
155 static bool arm_have_conditional_execution (void);
156 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
157 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
158 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
159 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
160 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
161 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
162 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
163 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
164 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
165 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
166 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
167 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
168 static void arm_init_builtins (void);
169 static void arm_init_iwmmxt_builtins (void);
170 static rtx safe_vector_operand (rtx, enum machine_mode);
171 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
172 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
173 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
174 static tree arm_builtin_decl (unsigned, bool);
175 static void emit_constant_insn (rtx cond, rtx pattern);
176 static rtx emit_set_insn (rtx, rtx);
177 static rtx emit_multi_reg_push (unsigned long);
178 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
179 tree, bool);
180 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
181 const_tree, bool);
182 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
183 const_tree, bool);
184 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
185 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
186 const_tree);
187 static rtx aapcs_libcall_value (enum machine_mode);
188 static int aapcs_select_return_coproc (const_tree, const_tree);
189
190 #ifdef OBJECT_FORMAT_ELF
191 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
192 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
193 #endif
194 #ifndef ARM_PE
195 static void arm_encode_section_info (tree, rtx, int);
196 #endif
197
198 static void arm_file_end (void);
199 static void arm_file_start (void);
200
201 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
202 tree, int *, int);
203 static bool arm_pass_by_reference (cumulative_args_t,
204 enum machine_mode, const_tree, bool);
205 static bool arm_promote_prototypes (const_tree);
206 static bool arm_default_short_enums (void);
207 static bool arm_align_anon_bitfield (void);
208 static bool arm_return_in_msb (const_tree);
209 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
210 static bool arm_return_in_memory (const_tree, const_tree);
211 #if ARM_UNWIND_INFO
212 static void arm_unwind_emit (FILE *, rtx);
213 static bool arm_output_ttype (rtx);
214 static void arm_asm_emit_except_personality (rtx);
215 static void arm_asm_init_sections (void);
216 #endif
217 static rtx arm_dwarf_register_span (rtx);
218
219 static tree arm_cxx_guard_type (void);
220 static bool arm_cxx_guard_mask_bit (void);
221 static tree arm_get_cookie_size (tree);
222 static bool arm_cookie_has_size (void);
223 static bool arm_cxx_cdtor_returns_this (void);
224 static bool arm_cxx_key_method_may_be_inline (void);
225 static void arm_cxx_determine_class_data_visibility (tree);
226 static bool arm_cxx_class_data_always_comdat (void);
227 static bool arm_cxx_use_aeabi_atexit (void);
228 static void arm_init_libfuncs (void);
229 static tree arm_build_builtin_va_list (void);
230 static void arm_expand_builtin_va_start (tree, rtx);
231 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
232 static void arm_option_override (void);
233 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
234 static bool arm_cannot_copy_insn_p (rtx);
235 static bool arm_tls_symbol_p (rtx x);
236 static int arm_issue_rate (void);
237 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
238 static bool arm_output_addr_const_extra (FILE *, rtx);
239 static bool arm_allocate_stack_slots_for_args (void);
240 static bool arm_warn_func_return (tree);
241 static const char *arm_invalid_parameter_type (const_tree t);
242 static const char *arm_invalid_return_type (const_tree t);
243 static tree arm_promoted_type (const_tree t);
244 static tree arm_convert_to_type (tree type, tree expr);
245 static bool arm_scalar_mode_supported_p (enum machine_mode);
246 static bool arm_frame_pointer_required (void);
247 static bool arm_can_eliminate (const int, const int);
248 static void arm_asm_trampoline_template (FILE *);
249 static void arm_trampoline_init (rtx, tree, rtx);
250 static rtx arm_trampoline_adjust_address (rtx);
251 static rtx arm_pic_static_addr (rtx orig, rtx reg);
252 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
253 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
254 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
255 static bool arm_array_mode_supported_p (enum machine_mode,
256 unsigned HOST_WIDE_INT);
257 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
258 static bool arm_class_likely_spilled_p (reg_class_t);
259 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
260 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
261 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
262 const_tree type,
263 int misalignment,
264 bool is_packed);
265 static void arm_conditional_register_usage (void);
266 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
267 static unsigned int arm_autovectorize_vector_sizes (void);
268 static int arm_default_branch_cost (bool, bool);
269 static int arm_cortex_a5_branch_cost (bool, bool);
270
271 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
272 const unsigned char *sel);
273
274 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
275 tree vectype,
276 int misalign ATTRIBUTE_UNUSED);
277 static unsigned arm_add_stmt_cost (void *data, int count,
278 enum vect_cost_for_stmt kind,
279 struct _stmt_vec_info *stmt_info,
280 int misalign,
281 enum vect_cost_model_location where);
282
283 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
284 bool op0_preserve_value);
285 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
286 \f
287 /* Table of machine attributes. */
288 static const struct attribute_spec arm_attribute_table[] =
289 {
290 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
291 affects_type_identity } */
292 /* Function calls made to this symbol must be done indirectly, because
293 it may lie outside of the 26 bit addressing range of a normal function
294 call. */
295 { "long_call", 0, 0, false, true, true, NULL, false },
296 /* Whereas these functions are always known to reside within the 26 bit
297 addressing range. */
298 { "short_call", 0, 0, false, true, true, NULL, false },
299 /* Specify the procedure call conventions for a function. */
300 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
301 false },
302 /* Interrupt Service Routines have special prologue and epilogue requirements. */
303 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
304 false },
305 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
306 false },
307 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
308 false },
309 #ifdef ARM_PE
310 /* ARM/PE has three new attributes:
311 interfacearm - ?
312 dllexport - for exporting a function/variable that will live in a dll
313 dllimport - for importing a function/variable from a dll
314
315 Microsoft allows multiple declspecs in one __declspec, separating
316 them with spaces. We do NOT support this. Instead, use __declspec
317 multiple times.
318 */
319 { "dllimport", 0, 0, true, false, false, NULL, false },
320 { "dllexport", 0, 0, true, false, false, NULL, false },
321 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
322 false },
323 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
324 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
325 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
326 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
327 false },
328 #endif
329 { NULL, 0, 0, false, false, false, NULL, false }
330 };
331 \f
332 /* Initialize the GCC target structure. */
333 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
334 #undef TARGET_MERGE_DECL_ATTRIBUTES
335 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
336 #endif
337
338 #undef TARGET_LEGITIMIZE_ADDRESS
339 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
340
341 #undef TARGET_ATTRIBUTE_TABLE
342 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
343
344 #undef TARGET_ASM_FILE_START
345 #define TARGET_ASM_FILE_START arm_file_start
346 #undef TARGET_ASM_FILE_END
347 #define TARGET_ASM_FILE_END arm_file_end
348
349 #undef TARGET_ASM_ALIGNED_SI_OP
350 #define TARGET_ASM_ALIGNED_SI_OP NULL
351 #undef TARGET_ASM_INTEGER
352 #define TARGET_ASM_INTEGER arm_assemble_integer
353
354 #undef TARGET_PRINT_OPERAND
355 #define TARGET_PRINT_OPERAND arm_print_operand
356 #undef TARGET_PRINT_OPERAND_ADDRESS
357 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
358 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
359 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
360
361 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
362 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
363
364 #undef TARGET_ASM_FUNCTION_PROLOGUE
365 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
366
367 #undef TARGET_ASM_FUNCTION_EPILOGUE
368 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
369
370 #undef TARGET_OPTION_OVERRIDE
371 #define TARGET_OPTION_OVERRIDE arm_option_override
372
373 #undef TARGET_COMP_TYPE_ATTRIBUTES
374 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
375
376 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
377 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
378
379 #undef TARGET_SCHED_ADJUST_COST
380 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
381
382 #undef TARGET_SCHED_REORDER
383 #define TARGET_SCHED_REORDER arm_sched_reorder
384
385 #undef TARGET_REGISTER_MOVE_COST
386 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
387
388 #undef TARGET_MEMORY_MOVE_COST
389 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
390
391 #undef TARGET_ENCODE_SECTION_INFO
392 #ifdef ARM_PE
393 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
394 #else
395 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
396 #endif
397
398 #undef TARGET_STRIP_NAME_ENCODING
399 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
400
401 #undef TARGET_ASM_INTERNAL_LABEL
402 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
403
404 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
405 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
406
407 #undef TARGET_FUNCTION_VALUE
408 #define TARGET_FUNCTION_VALUE arm_function_value
409
410 #undef TARGET_LIBCALL_VALUE
411 #define TARGET_LIBCALL_VALUE arm_libcall_value
412
413 #undef TARGET_FUNCTION_VALUE_REGNO_P
414 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
415
416 #undef TARGET_ASM_OUTPUT_MI_THUNK
417 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
418 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
419 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
420
421 #undef TARGET_RTX_COSTS
422 #define TARGET_RTX_COSTS arm_rtx_costs
423 #undef TARGET_ADDRESS_COST
424 #define TARGET_ADDRESS_COST arm_address_cost
425
426 #undef TARGET_SHIFT_TRUNCATION_MASK
427 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
428 #undef TARGET_VECTOR_MODE_SUPPORTED_P
429 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
430 #undef TARGET_ARRAY_MODE_SUPPORTED_P
431 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
432 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
433 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
434 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
435 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
436 arm_autovectorize_vector_sizes
437
438 #undef TARGET_MACHINE_DEPENDENT_REORG
439 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
440
441 #undef TARGET_INIT_BUILTINS
442 #define TARGET_INIT_BUILTINS arm_init_builtins
443 #undef TARGET_EXPAND_BUILTIN
444 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
445 #undef TARGET_BUILTIN_DECL
446 #define TARGET_BUILTIN_DECL arm_builtin_decl
447
448 #undef TARGET_INIT_LIBFUNCS
449 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
450
451 #undef TARGET_PROMOTE_FUNCTION_MODE
452 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
453 #undef TARGET_PROMOTE_PROTOTYPES
454 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
455 #undef TARGET_PASS_BY_REFERENCE
456 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
457 #undef TARGET_ARG_PARTIAL_BYTES
458 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
459 #undef TARGET_FUNCTION_ARG
460 #define TARGET_FUNCTION_ARG arm_function_arg
461 #undef TARGET_FUNCTION_ARG_ADVANCE
462 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
463 #undef TARGET_FUNCTION_ARG_BOUNDARY
464 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
465
466 #undef TARGET_SETUP_INCOMING_VARARGS
467 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
468
469 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
470 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
471
472 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
473 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
474 #undef TARGET_TRAMPOLINE_INIT
475 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
476 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
477 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
478
479 #undef TARGET_WARN_FUNC_RETURN
480 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
481
482 #undef TARGET_DEFAULT_SHORT_ENUMS
483 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
484
485 #undef TARGET_ALIGN_ANON_BITFIELD
486 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
487
488 #undef TARGET_NARROW_VOLATILE_BITFIELD
489 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
490
491 #undef TARGET_CXX_GUARD_TYPE
492 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
493
494 #undef TARGET_CXX_GUARD_MASK_BIT
495 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
496
497 #undef TARGET_CXX_GET_COOKIE_SIZE
498 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
499
500 #undef TARGET_CXX_COOKIE_HAS_SIZE
501 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
502
503 #undef TARGET_CXX_CDTOR_RETURNS_THIS
504 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
505
506 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
507 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
508
509 #undef TARGET_CXX_USE_AEABI_ATEXIT
510 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
511
512 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
513 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
514 arm_cxx_determine_class_data_visibility
515
516 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
517 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
518
519 #undef TARGET_RETURN_IN_MSB
520 #define TARGET_RETURN_IN_MSB arm_return_in_msb
521
522 #undef TARGET_RETURN_IN_MEMORY
523 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
524
525 #undef TARGET_MUST_PASS_IN_STACK
526 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
527
528 #if ARM_UNWIND_INFO
529 #undef TARGET_ASM_UNWIND_EMIT
530 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
531
532 /* EABI unwinding tables use a different format for the typeinfo tables. */
533 #undef TARGET_ASM_TTYPE
534 #define TARGET_ASM_TTYPE arm_output_ttype
535
536 #undef TARGET_ARM_EABI_UNWINDER
537 #define TARGET_ARM_EABI_UNWINDER true
538
539 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
540 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
541
542 #undef TARGET_ASM_INIT_SECTIONS
543 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
544 #endif /* ARM_UNWIND_INFO */
545
546 #undef TARGET_DWARF_REGISTER_SPAN
547 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
548
549 #undef TARGET_CANNOT_COPY_INSN_P
550 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
551
552 #ifdef HAVE_AS_TLS
553 #undef TARGET_HAVE_TLS
554 #define TARGET_HAVE_TLS true
555 #endif
556
557 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
558 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
559
560 #undef TARGET_LEGITIMATE_CONSTANT_P
561 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
562
563 #undef TARGET_CANNOT_FORCE_CONST_MEM
564 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
565
566 #undef TARGET_MAX_ANCHOR_OFFSET
567 #define TARGET_MAX_ANCHOR_OFFSET 4095
568
569 /* The minimum is set such that the total size of the block
570 for a particular anchor is -4088 + 1 + 4095 bytes, which is
571 divisible by eight, ensuring natural spacing of anchors. */
572 #undef TARGET_MIN_ANCHOR_OFFSET
573 #define TARGET_MIN_ANCHOR_OFFSET -4088
574
575 #undef TARGET_SCHED_ISSUE_RATE
576 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
577
578 #undef TARGET_MANGLE_TYPE
579 #define TARGET_MANGLE_TYPE arm_mangle_type
580
581 #undef TARGET_BUILD_BUILTIN_VA_LIST
582 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
583 #undef TARGET_EXPAND_BUILTIN_VA_START
584 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
585 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
586 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
587
588 #ifdef HAVE_AS_TLS
589 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
590 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
591 #endif
592
593 #undef TARGET_LEGITIMATE_ADDRESS_P
594 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
595
596 #undef TARGET_PREFERRED_RELOAD_CLASS
597 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
598
599 #undef TARGET_INVALID_PARAMETER_TYPE
600 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
601
602 #undef TARGET_INVALID_RETURN_TYPE
603 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
604
605 #undef TARGET_PROMOTED_TYPE
606 #define TARGET_PROMOTED_TYPE arm_promoted_type
607
608 #undef TARGET_CONVERT_TO_TYPE
609 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
610
611 #undef TARGET_SCALAR_MODE_SUPPORTED_P
612 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
613
614 #undef TARGET_FRAME_POINTER_REQUIRED
615 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
616
617 #undef TARGET_CAN_ELIMINATE
618 #define TARGET_CAN_ELIMINATE arm_can_eliminate
619
620 #undef TARGET_CONDITIONAL_REGISTER_USAGE
621 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
622
623 #undef TARGET_CLASS_LIKELY_SPILLED_P
624 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
625
626 #undef TARGET_VECTORIZE_BUILTINS
627 #define TARGET_VECTORIZE_BUILTINS
628
629 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
630 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
631 arm_builtin_vectorized_function
632
633 #undef TARGET_VECTOR_ALIGNMENT
634 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
635
636 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
637 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
638 arm_vector_alignment_reachable
639
640 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
641 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
642 arm_builtin_support_vector_misalignment
643
644 #undef TARGET_PREFERRED_RENAME_CLASS
645 #define TARGET_PREFERRED_RENAME_CLASS \
646 arm_preferred_rename_class
647
648 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
649 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
650 arm_vectorize_vec_perm_const_ok
651
652 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
653 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
654 arm_builtin_vectorization_cost
655 #undef TARGET_VECTORIZE_ADD_STMT_COST
656 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
657
658 #undef TARGET_CANONICALIZE_COMPARISON
659 #define TARGET_CANONICALIZE_COMPARISON \
660 arm_canonicalize_comparison
661
662 #undef TARGET_ASAN_SHADOW_OFFSET
663 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
664
665 #undef MAX_INSN_PER_IT_BLOCK
666 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
667
668
669 struct gcc_target targetm = TARGET_INITIALIZER;
670 \f
671 /* Obstack for minipool constant handling. */
672 static struct obstack minipool_obstack;
673 static char * minipool_startobj;
674
675 /* The maximum number of insns skipped which
676 will be conditionalised if possible. */
677 static int max_insns_skipped = 5;
678
679 extern FILE * asm_out_file;
680
681 /* True if we are currently building a constant table. */
682 int making_const_table;
683
684 /* The processor for which instructions should be scheduled. */
685 enum processor_type arm_tune = arm_none;
686
687 /* The current tuning set. */
688 const struct tune_params *current_tune;
689
690 /* Which floating point hardware to schedule for. */
691 int arm_fpu_attr;
692
693 /* Which floating popint hardware to use. */
694 const struct arm_fpu_desc *arm_fpu_desc;
695
696 /* Used for Thumb call_via trampolines. */
697 rtx thumb_call_via_label[14];
698 static int thumb_call_reg_needed;
699
700 /* Bit values used to identify processor capabilities. */
701 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
702 #define FL_ARCH3M (1 << 1) /* Extended multiply */
703 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
704 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
705 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
706 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
707 #define FL_THUMB (1 << 6) /* Thumb aware */
708 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
709 #define FL_STRONG (1 << 8) /* StrongARM */
710 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
711 #define FL_XSCALE (1 << 10) /* XScale */
712 /* spare (1 << 11) */
713 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
714 media instructions. */
715 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
716 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
717 Note: ARM6 & 7 derivatives only. */
718 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
719 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
720 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
721 profile. */
722 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
723 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
724 #define FL_NEON (1 << 20) /* Neon instructions. */
725 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
726 architecture. */
727 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
728 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
729 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
730
731 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
732 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
733
734 /* Flags that only effect tuning, not available instructions. */
735 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
736 | FL_CO_PROC)
737
738 #define FL_FOR_ARCH2 FL_NOTM
739 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
740 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
741 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
742 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
743 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
744 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
745 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
746 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
747 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
748 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
749 #define FL_FOR_ARCH6J FL_FOR_ARCH6
750 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
751 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
752 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
753 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
754 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
755 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
756 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
757 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
758 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
759 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
760 #define FL_FOR_ARCH8A (FL_FOR_ARCH7 | FL_ARCH6K | FL_ARCH8 | FL_THUMB_DIV \
761 | FL_ARM_DIV | FL_NOTM)
762
763 /* The bits in this mask specify which
764 instructions we are allowed to generate. */
765 static unsigned long insn_flags = 0;
766
767 /* The bits in this mask specify which instruction scheduling options should
768 be used. */
769 static unsigned long tune_flags = 0;
770
771 /* The highest ARM architecture version supported by the
772 target. */
773 enum base_architecture arm_base_arch = BASE_ARCH_0;
774
775 /* The following are used in the arm.md file as equivalents to bits
776 in the above two flag variables. */
777
778 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
779 int arm_arch3m = 0;
780
781 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
782 int arm_arch4 = 0;
783
784 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
785 int arm_arch4t = 0;
786
787 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
788 int arm_arch5 = 0;
789
790 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
791 int arm_arch5e = 0;
792
793 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
794 int arm_arch6 = 0;
795
796 /* Nonzero if this chip supports the ARM 6K extensions. */
797 int arm_arch6k = 0;
798
799 /* Nonzero if instructions present in ARMv6-M can be used. */
800 int arm_arch6m = 0;
801
802 /* Nonzero if this chip supports the ARM 7 extensions. */
803 int arm_arch7 = 0;
804
805 /* Nonzero if instructions not present in the 'M' profile can be used. */
806 int arm_arch_notm = 0;
807
808 /* Nonzero if instructions present in ARMv7E-M can be used. */
809 int arm_arch7em = 0;
810
811 /* Nonzero if instructions present in ARMv8 can be used. */
812 int arm_arch8 = 0;
813
814 /* Nonzero if this chip can benefit from load scheduling. */
815 int arm_ld_sched = 0;
816
817 /* Nonzero if this chip is a StrongARM. */
818 int arm_tune_strongarm = 0;
819
820 /* Nonzero if this chip supports Intel Wireless MMX technology. */
821 int arm_arch_iwmmxt = 0;
822
823 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
824 int arm_arch_iwmmxt2 = 0;
825
826 /* Nonzero if this chip is an XScale. */
827 int arm_arch_xscale = 0;
828
829 /* Nonzero if tuning for XScale */
830 int arm_tune_xscale = 0;
831
832 /* Nonzero if we want to tune for stores that access the write-buffer.
833 This typically means an ARM6 or ARM7 with MMU or MPU. */
834 int arm_tune_wbuf = 0;
835
836 /* Nonzero if tuning for Cortex-A9. */
837 int arm_tune_cortex_a9 = 0;
838
839 /* Nonzero if generating Thumb instructions. */
840 int thumb_code = 0;
841
842 /* Nonzero if generating Thumb-1 instructions. */
843 int thumb1_code = 0;
844
845 /* Nonzero if we should define __THUMB_INTERWORK__ in the
846 preprocessor.
847 XXX This is a bit of a hack, it's intended to help work around
848 problems in GLD which doesn't understand that armv5t code is
849 interworking clean. */
850 int arm_cpp_interwork = 0;
851
852 /* Nonzero if chip supports Thumb 2. */
853 int arm_arch_thumb2;
854
855 /* Nonzero if chip supports integer division instruction. */
856 int arm_arch_arm_hwdiv;
857 int arm_arch_thumb_hwdiv;
858
859 /* Nonzero if we should use Neon to handle 64-bits operations rather
860 than core registers. */
861 int prefer_neon_for_64bits = 0;
862
863 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
864 we must report the mode of the memory reference from
865 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
866 enum machine_mode output_memory_reference_mode;
867
868 /* The register number to be used for the PIC offset register. */
869 unsigned arm_pic_register = INVALID_REGNUM;
870
871 /* Set to 1 after arm_reorg has started. Reset to start at the start of
872 the next function. */
873 static int after_arm_reorg = 0;
874
875 enum arm_pcs arm_pcs_default;
876
877 /* For an explanation of these variables, see final_prescan_insn below. */
878 int arm_ccfsm_state;
879 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
880 enum arm_cond_code arm_current_cc;
881
882 rtx arm_target_insn;
883 int arm_target_label;
884 /* The number of conditionally executed insns, including the current insn. */
885 int arm_condexec_count = 0;
886 /* A bitmask specifying the patterns for the IT block.
887 Zero means do not output an IT block before this insn. */
888 int arm_condexec_mask = 0;
889 /* The number of bits used in arm_condexec_mask. */
890 int arm_condexec_masklen = 0;
891
892 /* The condition codes of the ARM, and the inverse function. */
893 static const char * const arm_condition_codes[] =
894 {
895 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
896 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
897 };
898
899 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
900 int arm_regs_in_sequence[] =
901 {
902 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
903 };
904
905 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
906 #define streq(string1, string2) (strcmp (string1, string2) == 0)
907
908 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
909 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
910 | (1 << PIC_OFFSET_TABLE_REGNUM)))
911 \f
912 /* Initialization code. */
913
914 struct processors
915 {
916 const char *const name;
917 enum processor_type core;
918 const char *arch;
919 enum base_architecture base_arch;
920 const unsigned long flags;
921 const struct tune_params *const tune;
922 };
923
924
925 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
926 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
927 prefetch_slots, \
928 l1_size, \
929 l1_line_size
930
931 /* arm generic vectorizer costs. */
932 static const
933 struct cpu_vec_costs arm_default_vec_cost = {
934 1, /* scalar_stmt_cost. */
935 1, /* scalar load_cost. */
936 1, /* scalar_store_cost. */
937 1, /* vec_stmt_cost. */
938 1, /* vec_to_scalar_cost. */
939 1, /* scalar_to_vec_cost. */
940 1, /* vec_align_load_cost. */
941 1, /* vec_unalign_load_cost. */
942 1, /* vec_unalign_store_cost. */
943 1, /* vec_store_cost. */
944 3, /* cond_taken_branch_cost. */
945 1, /* cond_not_taken_branch_cost. */
946 };
947
948 const struct tune_params arm_slowmul_tune =
949 {
950 arm_slowmul_rtx_costs,
951 NULL,
952 3, /* Constant limit. */
953 5, /* Max cond insns. */
954 ARM_PREFETCH_NOT_BENEFICIAL,
955 true, /* Prefer constant pool. */
956 arm_default_branch_cost,
957 false, /* Prefer LDRD/STRD. */
958 {true, true}, /* Prefer non short circuit. */
959 &arm_default_vec_cost, /* Vectorizer costs. */
960 false /* Prefer Neon for 64-bits bitops. */
961 };
962
963 const struct tune_params arm_fastmul_tune =
964 {
965 arm_fastmul_rtx_costs,
966 NULL,
967 1, /* Constant limit. */
968 5, /* Max cond insns. */
969 ARM_PREFETCH_NOT_BENEFICIAL,
970 true, /* Prefer constant pool. */
971 arm_default_branch_cost,
972 false, /* Prefer LDRD/STRD. */
973 {true, true}, /* Prefer non short circuit. */
974 &arm_default_vec_cost, /* Vectorizer costs. */
975 false /* Prefer Neon for 64-bits bitops. */
976 };
977
978 /* StrongARM has early execution of branches, so a sequence that is worth
979 skipping is shorter. Set max_insns_skipped to a lower value. */
980
981 const struct tune_params arm_strongarm_tune =
982 {
983 arm_fastmul_rtx_costs,
984 NULL,
985 1, /* Constant limit. */
986 3, /* Max cond insns. */
987 ARM_PREFETCH_NOT_BENEFICIAL,
988 true, /* Prefer constant pool. */
989 arm_default_branch_cost,
990 false, /* Prefer LDRD/STRD. */
991 {true, true}, /* Prefer non short circuit. */
992 &arm_default_vec_cost, /* Vectorizer costs. */
993 false /* Prefer Neon for 64-bits bitops. */
994 };
995
996 const struct tune_params arm_xscale_tune =
997 {
998 arm_xscale_rtx_costs,
999 xscale_sched_adjust_cost,
1000 2, /* Constant limit. */
1001 3, /* Max cond insns. */
1002 ARM_PREFETCH_NOT_BENEFICIAL,
1003 true, /* Prefer constant pool. */
1004 arm_default_branch_cost,
1005 false, /* Prefer LDRD/STRD. */
1006 {true, true}, /* Prefer non short circuit. */
1007 &arm_default_vec_cost, /* Vectorizer costs. */
1008 false /* Prefer Neon for 64-bits bitops. */
1009 };
1010
1011 const struct tune_params arm_9e_tune =
1012 {
1013 arm_9e_rtx_costs,
1014 NULL,
1015 1, /* Constant limit. */
1016 5, /* Max cond insns. */
1017 ARM_PREFETCH_NOT_BENEFICIAL,
1018 true, /* Prefer constant pool. */
1019 arm_default_branch_cost,
1020 false, /* Prefer LDRD/STRD. */
1021 {true, true}, /* Prefer non short circuit. */
1022 &arm_default_vec_cost, /* Vectorizer costs. */
1023 false /* Prefer Neon for 64-bits bitops. */
1024 };
1025
1026 const struct tune_params arm_v6t2_tune =
1027 {
1028 arm_9e_rtx_costs,
1029 NULL,
1030 1, /* Constant limit. */
1031 5, /* Max cond insns. */
1032 ARM_PREFETCH_NOT_BENEFICIAL,
1033 false, /* Prefer constant pool. */
1034 arm_default_branch_cost,
1035 false, /* Prefer LDRD/STRD. */
1036 {true, true}, /* Prefer non short circuit. */
1037 &arm_default_vec_cost, /* Vectorizer costs. */
1038 false /* Prefer Neon for 64-bits bitops. */
1039 };
1040
1041 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1042 const struct tune_params arm_cortex_tune =
1043 {
1044 arm_9e_rtx_costs,
1045 NULL,
1046 1, /* Constant limit. */
1047 5, /* Max cond insns. */
1048 ARM_PREFETCH_NOT_BENEFICIAL,
1049 false, /* Prefer constant pool. */
1050 arm_default_branch_cost,
1051 false, /* Prefer LDRD/STRD. */
1052 {true, true}, /* Prefer non short circuit. */
1053 &arm_default_vec_cost, /* Vectorizer costs. */
1054 false /* Prefer Neon for 64-bits bitops. */
1055 };
1056
1057 const struct tune_params arm_cortex_a15_tune =
1058 {
1059 arm_9e_rtx_costs,
1060 NULL,
1061 1, /* Constant limit. */
1062 5, /* Max cond insns. */
1063 ARM_PREFETCH_NOT_BENEFICIAL,
1064 false, /* Prefer constant pool. */
1065 arm_default_branch_cost,
1066 true, /* Prefer LDRD/STRD. */
1067 {true, true}, /* Prefer non short circuit. */
1068 &arm_default_vec_cost, /* Vectorizer costs. */
1069 false /* Prefer Neon for 64-bits bitops. */
1070 };
1071
1072 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1073 less appealing. Set max_insns_skipped to a low value. */
1074
1075 const struct tune_params arm_cortex_a5_tune =
1076 {
1077 arm_9e_rtx_costs,
1078 NULL,
1079 1, /* Constant limit. */
1080 1, /* Max cond insns. */
1081 ARM_PREFETCH_NOT_BENEFICIAL,
1082 false, /* Prefer constant pool. */
1083 arm_cortex_a5_branch_cost,
1084 false, /* Prefer LDRD/STRD. */
1085 {false, false}, /* Prefer non short circuit. */
1086 &arm_default_vec_cost, /* Vectorizer costs. */
1087 false /* Prefer Neon for 64-bits bitops. */
1088 };
1089
1090 const struct tune_params arm_cortex_a9_tune =
1091 {
1092 arm_9e_rtx_costs,
1093 cortex_a9_sched_adjust_cost,
1094 1, /* Constant limit. */
1095 5, /* Max cond insns. */
1096 ARM_PREFETCH_BENEFICIAL(4,32,32),
1097 false, /* Prefer constant pool. */
1098 arm_default_branch_cost,
1099 false, /* Prefer LDRD/STRD. */
1100 {true, true}, /* Prefer non short circuit. */
1101 &arm_default_vec_cost, /* Vectorizer costs. */
1102 false /* Prefer Neon for 64-bits bitops. */
1103 };
1104
1105 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1106 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1107 const struct tune_params arm_v6m_tune =
1108 {
1109 arm_9e_rtx_costs,
1110 NULL,
1111 1, /* Constant limit. */
1112 5, /* Max cond insns. */
1113 ARM_PREFETCH_NOT_BENEFICIAL,
1114 false, /* Prefer constant pool. */
1115 arm_default_branch_cost,
1116 false, /* Prefer LDRD/STRD. */
1117 {false, false}, /* Prefer non short circuit. */
1118 &arm_default_vec_cost, /* Vectorizer costs. */
1119 false /* Prefer Neon for 64-bits bitops. */
1120 };
1121
1122 const struct tune_params arm_fa726te_tune =
1123 {
1124 arm_9e_rtx_costs,
1125 fa726te_sched_adjust_cost,
1126 1, /* Constant limit. */
1127 5, /* Max cond insns. */
1128 ARM_PREFETCH_NOT_BENEFICIAL,
1129 true, /* Prefer constant pool. */
1130 arm_default_branch_cost,
1131 false, /* Prefer LDRD/STRD. */
1132 {true, true}, /* Prefer non short circuit. */
1133 &arm_default_vec_cost, /* Vectorizer costs. */
1134 false /* Prefer Neon for 64-bits bitops. */
1135 };
1136
1137
1138 /* Not all of these give usefully different compilation alternatives,
1139 but there is no simple way of generalizing them. */
1140 static const struct processors all_cores[] =
1141 {
1142 /* ARM Cores */
1143 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
1144 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1145 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1146 #include "arm-cores.def"
1147 #undef ARM_CORE
1148 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1149 };
1150
1151 static const struct processors all_architectures[] =
1152 {
1153 /* ARM Architectures */
1154 /* We don't specify tuning costs here as it will be figured out
1155 from the core. */
1156
1157 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1158 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1159 #include "arm-arches.def"
1160 #undef ARM_ARCH
1161 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1162 };
1163
1164
1165 /* These are populated as commandline arguments are processed, or NULL
1166 if not specified. */
1167 static const struct processors *arm_selected_arch;
1168 static const struct processors *arm_selected_cpu;
1169 static const struct processors *arm_selected_tune;
1170
1171 /* The name of the preprocessor macro to define for this architecture. */
1172
1173 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1174
1175 /* Available values for -mfpu=. */
1176
1177 static const struct arm_fpu_desc all_fpus[] =
1178 {
1179 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1180 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1181 #include "arm-fpus.def"
1182 #undef ARM_FPU
1183 };
1184
1185
1186 /* Supported TLS relocations. */
1187
1188 enum tls_reloc {
1189 TLS_GD32,
1190 TLS_LDM32,
1191 TLS_LDO32,
1192 TLS_IE32,
1193 TLS_LE32,
1194 TLS_DESCSEQ /* GNU scheme */
1195 };
1196
1197 /* The maximum number of insns to be used when loading a constant. */
1198 inline static int
1199 arm_constant_limit (bool size_p)
1200 {
1201 return size_p ? 1 : current_tune->constant_limit;
1202 }
1203
1204 /* Emit an insn that's a simple single-set. Both the operands must be known
1205 to be valid. */
1206 inline static rtx
1207 emit_set_insn (rtx x, rtx y)
1208 {
1209 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1210 }
1211
1212 /* Return the number of bits set in VALUE. */
1213 static unsigned
1214 bit_count (unsigned long value)
1215 {
1216 unsigned long count = 0;
1217
1218 while (value)
1219 {
1220 count++;
1221 value &= value - 1; /* Clear the least-significant set bit. */
1222 }
1223
1224 return count;
1225 }
1226
1227 typedef struct
1228 {
1229 enum machine_mode mode;
1230 const char *name;
1231 } arm_fixed_mode_set;
1232
1233 /* A small helper for setting fixed-point library libfuncs. */
1234
1235 static void
1236 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1237 const char *funcname, const char *modename,
1238 int num_suffix)
1239 {
1240 char buffer[50];
1241
1242 if (num_suffix == 0)
1243 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1244 else
1245 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1246
1247 set_optab_libfunc (optable, mode, buffer);
1248 }
1249
1250 static void
1251 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1252 enum machine_mode from, const char *funcname,
1253 const char *toname, const char *fromname)
1254 {
1255 char buffer[50];
1256 const char *maybe_suffix_2 = "";
1257
1258 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1259 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1260 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1261 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1262 maybe_suffix_2 = "2";
1263
1264 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1265 maybe_suffix_2);
1266
1267 set_conv_libfunc (optable, to, from, buffer);
1268 }
1269
1270 /* Set up library functions unique to ARM. */
1271
1272 static void
1273 arm_init_libfuncs (void)
1274 {
1275 /* For Linux, we have access to kernel support for atomic operations. */
1276 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1277 init_sync_libfuncs (2 * UNITS_PER_WORD);
1278
1279 /* There are no special library functions unless we are using the
1280 ARM BPABI. */
1281 if (!TARGET_BPABI)
1282 return;
1283
1284 /* The functions below are described in Section 4 of the "Run-Time
1285 ABI for the ARM architecture", Version 1.0. */
1286
1287 /* Double-precision floating-point arithmetic. Table 2. */
1288 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1289 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1290 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1291 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1292 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1293
1294 /* Double-precision comparisons. Table 3. */
1295 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1296 set_optab_libfunc (ne_optab, DFmode, NULL);
1297 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1298 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1299 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1300 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1301 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1302
1303 /* Single-precision floating-point arithmetic. Table 4. */
1304 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1305 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1306 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1307 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1308 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1309
1310 /* Single-precision comparisons. Table 5. */
1311 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1312 set_optab_libfunc (ne_optab, SFmode, NULL);
1313 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1314 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1315 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1316 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1317 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1318
1319 /* Floating-point to integer conversions. Table 6. */
1320 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1321 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1322 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1323 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1324 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1325 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1326 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1327 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1328
1329 /* Conversions between floating types. Table 7. */
1330 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1331 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1332
1333 /* Integer to floating-point conversions. Table 8. */
1334 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1335 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1336 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1337 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1338 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1339 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1340 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1341 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1342
1343 /* Long long. Table 9. */
1344 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1345 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1346 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1347 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1348 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1349 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1350 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1351 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1352
1353 /* Integer (32/32->32) division. \S 4.3.1. */
1354 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1355 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1356
1357 /* The divmod functions are designed so that they can be used for
1358 plain division, even though they return both the quotient and the
1359 remainder. The quotient is returned in the usual location (i.e.,
1360 r0 for SImode, {r0, r1} for DImode), just as would be expected
1361 for an ordinary division routine. Because the AAPCS calling
1362 conventions specify that all of { r0, r1, r2, r3 } are
1363 callee-saved registers, there is no need to tell the compiler
1364 explicitly that those registers are clobbered by these
1365 routines. */
1366 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1367 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1368
1369 /* For SImode division the ABI provides div-without-mod routines,
1370 which are faster. */
1371 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1372 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1373
1374 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1375 divmod libcalls instead. */
1376 set_optab_libfunc (smod_optab, DImode, NULL);
1377 set_optab_libfunc (umod_optab, DImode, NULL);
1378 set_optab_libfunc (smod_optab, SImode, NULL);
1379 set_optab_libfunc (umod_optab, SImode, NULL);
1380
1381 /* Half-precision float operations. The compiler handles all operations
1382 with NULL libfuncs by converting the SFmode. */
1383 switch (arm_fp16_format)
1384 {
1385 case ARM_FP16_FORMAT_IEEE:
1386 case ARM_FP16_FORMAT_ALTERNATIVE:
1387
1388 /* Conversions. */
1389 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1390 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1391 ? "__gnu_f2h_ieee"
1392 : "__gnu_f2h_alternative"));
1393 set_conv_libfunc (sext_optab, SFmode, HFmode,
1394 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1395 ? "__gnu_h2f_ieee"
1396 : "__gnu_h2f_alternative"));
1397
1398 /* Arithmetic. */
1399 set_optab_libfunc (add_optab, HFmode, NULL);
1400 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1401 set_optab_libfunc (smul_optab, HFmode, NULL);
1402 set_optab_libfunc (neg_optab, HFmode, NULL);
1403 set_optab_libfunc (sub_optab, HFmode, NULL);
1404
1405 /* Comparisons. */
1406 set_optab_libfunc (eq_optab, HFmode, NULL);
1407 set_optab_libfunc (ne_optab, HFmode, NULL);
1408 set_optab_libfunc (lt_optab, HFmode, NULL);
1409 set_optab_libfunc (le_optab, HFmode, NULL);
1410 set_optab_libfunc (ge_optab, HFmode, NULL);
1411 set_optab_libfunc (gt_optab, HFmode, NULL);
1412 set_optab_libfunc (unord_optab, HFmode, NULL);
1413 break;
1414
1415 default:
1416 break;
1417 }
1418
1419 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1420 {
1421 const arm_fixed_mode_set fixed_arith_modes[] =
1422 {
1423 { QQmode, "qq" },
1424 { UQQmode, "uqq" },
1425 { HQmode, "hq" },
1426 { UHQmode, "uhq" },
1427 { SQmode, "sq" },
1428 { USQmode, "usq" },
1429 { DQmode, "dq" },
1430 { UDQmode, "udq" },
1431 { TQmode, "tq" },
1432 { UTQmode, "utq" },
1433 { HAmode, "ha" },
1434 { UHAmode, "uha" },
1435 { SAmode, "sa" },
1436 { USAmode, "usa" },
1437 { DAmode, "da" },
1438 { UDAmode, "uda" },
1439 { TAmode, "ta" },
1440 { UTAmode, "uta" }
1441 };
1442 const arm_fixed_mode_set fixed_conv_modes[] =
1443 {
1444 { QQmode, "qq" },
1445 { UQQmode, "uqq" },
1446 { HQmode, "hq" },
1447 { UHQmode, "uhq" },
1448 { SQmode, "sq" },
1449 { USQmode, "usq" },
1450 { DQmode, "dq" },
1451 { UDQmode, "udq" },
1452 { TQmode, "tq" },
1453 { UTQmode, "utq" },
1454 { HAmode, "ha" },
1455 { UHAmode, "uha" },
1456 { SAmode, "sa" },
1457 { USAmode, "usa" },
1458 { DAmode, "da" },
1459 { UDAmode, "uda" },
1460 { TAmode, "ta" },
1461 { UTAmode, "uta" },
1462 { QImode, "qi" },
1463 { HImode, "hi" },
1464 { SImode, "si" },
1465 { DImode, "di" },
1466 { TImode, "ti" },
1467 { SFmode, "sf" },
1468 { DFmode, "df" }
1469 };
1470 unsigned int i, j;
1471
1472 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1473 {
1474 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1475 "add", fixed_arith_modes[i].name, 3);
1476 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1477 "ssadd", fixed_arith_modes[i].name, 3);
1478 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1479 "usadd", fixed_arith_modes[i].name, 3);
1480 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1481 "sub", fixed_arith_modes[i].name, 3);
1482 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1483 "sssub", fixed_arith_modes[i].name, 3);
1484 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1485 "ussub", fixed_arith_modes[i].name, 3);
1486 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1487 "mul", fixed_arith_modes[i].name, 3);
1488 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1489 "ssmul", fixed_arith_modes[i].name, 3);
1490 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1491 "usmul", fixed_arith_modes[i].name, 3);
1492 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1493 "div", fixed_arith_modes[i].name, 3);
1494 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1495 "udiv", fixed_arith_modes[i].name, 3);
1496 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1497 "ssdiv", fixed_arith_modes[i].name, 3);
1498 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1499 "usdiv", fixed_arith_modes[i].name, 3);
1500 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1501 "neg", fixed_arith_modes[i].name, 2);
1502 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1503 "ssneg", fixed_arith_modes[i].name, 2);
1504 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1505 "usneg", fixed_arith_modes[i].name, 2);
1506 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1507 "ashl", fixed_arith_modes[i].name, 3);
1508 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1509 "ashr", fixed_arith_modes[i].name, 3);
1510 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1511 "lshr", fixed_arith_modes[i].name, 3);
1512 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1513 "ssashl", fixed_arith_modes[i].name, 3);
1514 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1515 "usashl", fixed_arith_modes[i].name, 3);
1516 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1517 "cmp", fixed_arith_modes[i].name, 2);
1518 }
1519
1520 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1521 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1522 {
1523 if (i == j
1524 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1525 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1526 continue;
1527
1528 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1529 fixed_conv_modes[j].mode, "fract",
1530 fixed_conv_modes[i].name,
1531 fixed_conv_modes[j].name);
1532 arm_set_fixed_conv_libfunc (satfract_optab,
1533 fixed_conv_modes[i].mode,
1534 fixed_conv_modes[j].mode, "satfract",
1535 fixed_conv_modes[i].name,
1536 fixed_conv_modes[j].name);
1537 arm_set_fixed_conv_libfunc (fractuns_optab,
1538 fixed_conv_modes[i].mode,
1539 fixed_conv_modes[j].mode, "fractuns",
1540 fixed_conv_modes[i].name,
1541 fixed_conv_modes[j].name);
1542 arm_set_fixed_conv_libfunc (satfractuns_optab,
1543 fixed_conv_modes[i].mode,
1544 fixed_conv_modes[j].mode, "satfractuns",
1545 fixed_conv_modes[i].name,
1546 fixed_conv_modes[j].name);
1547 }
1548 }
1549
1550 if (TARGET_AAPCS_BASED)
1551 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1552 }
1553
1554 /* On AAPCS systems, this is the "struct __va_list". */
1555 static GTY(()) tree va_list_type;
1556
1557 /* Return the type to use as __builtin_va_list. */
1558 static tree
1559 arm_build_builtin_va_list (void)
1560 {
1561 tree va_list_name;
1562 tree ap_field;
1563
1564 if (!TARGET_AAPCS_BASED)
1565 return std_build_builtin_va_list ();
1566
1567 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1568 defined as:
1569
1570 struct __va_list
1571 {
1572 void *__ap;
1573 };
1574
1575 The C Library ABI further reinforces this definition in \S
1576 4.1.
1577
1578 We must follow this definition exactly. The structure tag
1579 name is visible in C++ mangled names, and thus forms a part
1580 of the ABI. The field name may be used by people who
1581 #include <stdarg.h>. */
1582 /* Create the type. */
1583 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1584 /* Give it the required name. */
1585 va_list_name = build_decl (BUILTINS_LOCATION,
1586 TYPE_DECL,
1587 get_identifier ("__va_list"),
1588 va_list_type);
1589 DECL_ARTIFICIAL (va_list_name) = 1;
1590 TYPE_NAME (va_list_type) = va_list_name;
1591 TYPE_STUB_DECL (va_list_type) = va_list_name;
1592 /* Create the __ap field. */
1593 ap_field = build_decl (BUILTINS_LOCATION,
1594 FIELD_DECL,
1595 get_identifier ("__ap"),
1596 ptr_type_node);
1597 DECL_ARTIFICIAL (ap_field) = 1;
1598 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1599 TYPE_FIELDS (va_list_type) = ap_field;
1600 /* Compute its layout. */
1601 layout_type (va_list_type);
1602
1603 return va_list_type;
1604 }
1605
1606 /* Return an expression of type "void *" pointing to the next
1607 available argument in a variable-argument list. VALIST is the
1608 user-level va_list object, of type __builtin_va_list. */
1609 static tree
1610 arm_extract_valist_ptr (tree valist)
1611 {
1612 if (TREE_TYPE (valist) == error_mark_node)
1613 return error_mark_node;
1614
1615 /* On an AAPCS target, the pointer is stored within "struct
1616 va_list". */
1617 if (TARGET_AAPCS_BASED)
1618 {
1619 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1620 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1621 valist, ap_field, NULL_TREE);
1622 }
1623
1624 return valist;
1625 }
1626
1627 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1628 static void
1629 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1630 {
1631 valist = arm_extract_valist_ptr (valist);
1632 std_expand_builtin_va_start (valist, nextarg);
1633 }
1634
1635 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1636 static tree
1637 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1638 gimple_seq *post_p)
1639 {
1640 valist = arm_extract_valist_ptr (valist);
1641 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1642 }
1643
1644 /* Fix up any incompatible options that the user has specified. */
1645 static void
1646 arm_option_override (void)
1647 {
1648 if (global_options_set.x_arm_arch_option)
1649 arm_selected_arch = &all_architectures[arm_arch_option];
1650
1651 if (global_options_set.x_arm_cpu_option)
1652 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1653
1654 if (global_options_set.x_arm_tune_option)
1655 arm_selected_tune = &all_cores[(int) arm_tune_option];
1656
1657 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1658 SUBTARGET_OVERRIDE_OPTIONS;
1659 #endif
1660
1661 if (arm_selected_arch)
1662 {
1663 if (arm_selected_cpu)
1664 {
1665 /* Check for conflict between mcpu and march. */
1666 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1667 {
1668 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1669 arm_selected_cpu->name, arm_selected_arch->name);
1670 /* -march wins for code generation.
1671 -mcpu wins for default tuning. */
1672 if (!arm_selected_tune)
1673 arm_selected_tune = arm_selected_cpu;
1674
1675 arm_selected_cpu = arm_selected_arch;
1676 }
1677 else
1678 /* -mcpu wins. */
1679 arm_selected_arch = NULL;
1680 }
1681 else
1682 /* Pick a CPU based on the architecture. */
1683 arm_selected_cpu = arm_selected_arch;
1684 }
1685
1686 /* If the user did not specify a processor, choose one for them. */
1687 if (!arm_selected_cpu)
1688 {
1689 const struct processors * sel;
1690 unsigned int sought;
1691
1692 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1693 if (!arm_selected_cpu->name)
1694 {
1695 #ifdef SUBTARGET_CPU_DEFAULT
1696 /* Use the subtarget default CPU if none was specified by
1697 configure. */
1698 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1699 #endif
1700 /* Default to ARM6. */
1701 if (!arm_selected_cpu->name)
1702 arm_selected_cpu = &all_cores[arm6];
1703 }
1704
1705 sel = arm_selected_cpu;
1706 insn_flags = sel->flags;
1707
1708 /* Now check to see if the user has specified some command line
1709 switch that require certain abilities from the cpu. */
1710 sought = 0;
1711
1712 if (TARGET_INTERWORK || TARGET_THUMB)
1713 {
1714 sought |= (FL_THUMB | FL_MODE32);
1715
1716 /* There are no ARM processors that support both APCS-26 and
1717 interworking. Therefore we force FL_MODE26 to be removed
1718 from insn_flags here (if it was set), so that the search
1719 below will always be able to find a compatible processor. */
1720 insn_flags &= ~FL_MODE26;
1721 }
1722
1723 if (sought != 0 && ((sought & insn_flags) != sought))
1724 {
1725 /* Try to locate a CPU type that supports all of the abilities
1726 of the default CPU, plus the extra abilities requested by
1727 the user. */
1728 for (sel = all_cores; sel->name != NULL; sel++)
1729 if ((sel->flags & sought) == (sought | insn_flags))
1730 break;
1731
1732 if (sel->name == NULL)
1733 {
1734 unsigned current_bit_count = 0;
1735 const struct processors * best_fit = NULL;
1736
1737 /* Ideally we would like to issue an error message here
1738 saying that it was not possible to find a CPU compatible
1739 with the default CPU, but which also supports the command
1740 line options specified by the programmer, and so they
1741 ought to use the -mcpu=<name> command line option to
1742 override the default CPU type.
1743
1744 If we cannot find a cpu that has both the
1745 characteristics of the default cpu and the given
1746 command line options we scan the array again looking
1747 for a best match. */
1748 for (sel = all_cores; sel->name != NULL; sel++)
1749 if ((sel->flags & sought) == sought)
1750 {
1751 unsigned count;
1752
1753 count = bit_count (sel->flags & insn_flags);
1754
1755 if (count >= current_bit_count)
1756 {
1757 best_fit = sel;
1758 current_bit_count = count;
1759 }
1760 }
1761
1762 gcc_assert (best_fit);
1763 sel = best_fit;
1764 }
1765
1766 arm_selected_cpu = sel;
1767 }
1768 }
1769
1770 gcc_assert (arm_selected_cpu);
1771 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1772 if (!arm_selected_tune)
1773 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1774
1775 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1776 insn_flags = arm_selected_cpu->flags;
1777 arm_base_arch = arm_selected_cpu->base_arch;
1778
1779 arm_tune = arm_selected_tune->core;
1780 tune_flags = arm_selected_tune->flags;
1781 current_tune = arm_selected_tune->tune;
1782
1783 /* Make sure that the processor choice does not conflict with any of the
1784 other command line choices. */
1785 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1786 error ("target CPU does not support ARM mode");
1787
1788 /* BPABI targets use linker tricks to allow interworking on cores
1789 without thumb support. */
1790 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1791 {
1792 warning (0, "target CPU does not support interworking" );
1793 target_flags &= ~MASK_INTERWORK;
1794 }
1795
1796 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1797 {
1798 warning (0, "target CPU does not support THUMB instructions");
1799 target_flags &= ~MASK_THUMB;
1800 }
1801
1802 if (TARGET_APCS_FRAME && TARGET_THUMB)
1803 {
1804 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1805 target_flags &= ~MASK_APCS_FRAME;
1806 }
1807
1808 /* Callee super interworking implies thumb interworking. Adding
1809 this to the flags here simplifies the logic elsewhere. */
1810 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1811 target_flags |= MASK_INTERWORK;
1812
1813 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1814 from here where no function is being compiled currently. */
1815 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1816 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1817
1818 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1819 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1820
1821 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1822 {
1823 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1824 target_flags |= MASK_APCS_FRAME;
1825 }
1826
1827 if (TARGET_POKE_FUNCTION_NAME)
1828 target_flags |= MASK_APCS_FRAME;
1829
1830 if (TARGET_APCS_REENT && flag_pic)
1831 error ("-fpic and -mapcs-reent are incompatible");
1832
1833 if (TARGET_APCS_REENT)
1834 warning (0, "APCS reentrant code not supported. Ignored");
1835
1836 /* If this target is normally configured to use APCS frames, warn if they
1837 are turned off and debugging is turned on. */
1838 if (TARGET_ARM
1839 && write_symbols != NO_DEBUG
1840 && !TARGET_APCS_FRAME
1841 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1842 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1843
1844 if (TARGET_APCS_FLOAT)
1845 warning (0, "passing floating point arguments in fp regs not yet supported");
1846
1847 if (TARGET_LITTLE_WORDS)
1848 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
1849 "will be removed in a future release");
1850
1851 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1852 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1853 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1854 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1855 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1856 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1857 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1858 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1859 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1860 arm_arch6m = arm_arch6 && !arm_arch_notm;
1861 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1862 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1863 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
1864 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1865 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1866
1867 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1868 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1869 thumb_code = TARGET_ARM == 0;
1870 thumb1_code = TARGET_THUMB1 != 0;
1871 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1872 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1873 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1874 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
1875 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1876 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1877 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1878 if (arm_restrict_it == 2)
1879 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
1880
1881 if (!TARGET_THUMB2)
1882 arm_restrict_it = 0;
1883
1884 /* If we are not using the default (ARM mode) section anchor offset
1885 ranges, then set the correct ranges now. */
1886 if (TARGET_THUMB1)
1887 {
1888 /* Thumb-1 LDR instructions cannot have negative offsets.
1889 Permissible positive offset ranges are 5-bit (for byte loads),
1890 6-bit (for halfword loads), or 7-bit (for word loads).
1891 Empirical results suggest a 7-bit anchor range gives the best
1892 overall code size. */
1893 targetm.min_anchor_offset = 0;
1894 targetm.max_anchor_offset = 127;
1895 }
1896 else if (TARGET_THUMB2)
1897 {
1898 /* The minimum is set such that the total size of the block
1899 for a particular anchor is 248 + 1 + 4095 bytes, which is
1900 divisible by eight, ensuring natural spacing of anchors. */
1901 targetm.min_anchor_offset = -248;
1902 targetm.max_anchor_offset = 4095;
1903 }
1904
1905 /* V5 code we generate is completely interworking capable, so we turn off
1906 TARGET_INTERWORK here to avoid many tests later on. */
1907
1908 /* XXX However, we must pass the right pre-processor defines to CPP
1909 or GLD can get confused. This is a hack. */
1910 if (TARGET_INTERWORK)
1911 arm_cpp_interwork = 1;
1912
1913 if (arm_arch5)
1914 target_flags &= ~MASK_INTERWORK;
1915
1916 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1917 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1918
1919 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1920 error ("iwmmxt abi requires an iwmmxt capable cpu");
1921
1922 if (!global_options_set.x_arm_fpu_index)
1923 {
1924 const char *target_fpu_name;
1925 bool ok;
1926
1927 #ifdef FPUTYPE_DEFAULT
1928 target_fpu_name = FPUTYPE_DEFAULT;
1929 #else
1930 target_fpu_name = "vfp";
1931 #endif
1932
1933 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1934 CL_TARGET);
1935 gcc_assert (ok);
1936 }
1937
1938 arm_fpu_desc = &all_fpus[arm_fpu_index];
1939
1940 switch (arm_fpu_desc->model)
1941 {
1942 case ARM_FP_MODEL_VFP:
1943 arm_fpu_attr = FPU_VFP;
1944 break;
1945
1946 default:
1947 gcc_unreachable();
1948 }
1949
1950 if (TARGET_AAPCS_BASED)
1951 {
1952 if (TARGET_CALLER_INTERWORKING)
1953 error ("AAPCS does not support -mcaller-super-interworking");
1954 else
1955 if (TARGET_CALLEE_INTERWORKING)
1956 error ("AAPCS does not support -mcallee-super-interworking");
1957 }
1958
1959 /* iWMMXt and NEON are incompatible. */
1960 if (TARGET_IWMMXT && TARGET_NEON)
1961 error ("iWMMXt and NEON are incompatible");
1962
1963 /* iWMMXt unsupported under Thumb mode. */
1964 if (TARGET_THUMB && TARGET_IWMMXT)
1965 error ("iWMMXt unsupported under Thumb mode");
1966
1967 /* __fp16 support currently assumes the core has ldrh. */
1968 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1969 sorry ("__fp16 and no ldrh");
1970
1971 /* If soft-float is specified then don't use FPU. */
1972 if (TARGET_SOFT_FLOAT)
1973 arm_fpu_attr = FPU_NONE;
1974
1975 if (TARGET_AAPCS_BASED)
1976 {
1977 if (arm_abi == ARM_ABI_IWMMXT)
1978 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1979 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1980 && TARGET_HARD_FLOAT
1981 && TARGET_VFP)
1982 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1983 else
1984 arm_pcs_default = ARM_PCS_AAPCS;
1985 }
1986 else
1987 {
1988 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1989 sorry ("-mfloat-abi=hard and VFP");
1990
1991 if (arm_abi == ARM_ABI_APCS)
1992 arm_pcs_default = ARM_PCS_APCS;
1993 else
1994 arm_pcs_default = ARM_PCS_ATPCS;
1995 }
1996
1997 /* For arm2/3 there is no need to do any scheduling if we are doing
1998 software floating-point. */
1999 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2000 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2001
2002 /* Use the cp15 method if it is available. */
2003 if (target_thread_pointer == TP_AUTO)
2004 {
2005 if (arm_arch6k && !TARGET_THUMB1)
2006 target_thread_pointer = TP_CP15;
2007 else
2008 target_thread_pointer = TP_SOFT;
2009 }
2010
2011 if (TARGET_HARD_TP && TARGET_THUMB1)
2012 error ("can not use -mtp=cp15 with 16-bit Thumb");
2013
2014 /* Override the default structure alignment for AAPCS ABI. */
2015 if (!global_options_set.x_arm_structure_size_boundary)
2016 {
2017 if (TARGET_AAPCS_BASED)
2018 arm_structure_size_boundary = 8;
2019 }
2020 else
2021 {
2022 if (arm_structure_size_boundary != 8
2023 && arm_structure_size_boundary != 32
2024 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2025 {
2026 if (ARM_DOUBLEWORD_ALIGN)
2027 warning (0,
2028 "structure size boundary can only be set to 8, 32 or 64");
2029 else
2030 warning (0, "structure size boundary can only be set to 8 or 32");
2031 arm_structure_size_boundary
2032 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2033 }
2034 }
2035
2036 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2037 {
2038 error ("RTP PIC is incompatible with Thumb");
2039 flag_pic = 0;
2040 }
2041
2042 /* If stack checking is disabled, we can use r10 as the PIC register,
2043 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2044 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2045 {
2046 if (TARGET_VXWORKS_RTP)
2047 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2048 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2049 }
2050
2051 if (flag_pic && TARGET_VXWORKS_RTP)
2052 arm_pic_register = 9;
2053
2054 if (arm_pic_register_string != NULL)
2055 {
2056 int pic_register = decode_reg_name (arm_pic_register_string);
2057
2058 if (!flag_pic)
2059 warning (0, "-mpic-register= is useless without -fpic");
2060
2061 /* Prevent the user from choosing an obviously stupid PIC register. */
2062 else if (pic_register < 0 || call_used_regs[pic_register]
2063 || pic_register == HARD_FRAME_POINTER_REGNUM
2064 || pic_register == STACK_POINTER_REGNUM
2065 || pic_register >= PC_REGNUM
2066 || (TARGET_VXWORKS_RTP
2067 && (unsigned int) pic_register != arm_pic_register))
2068 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2069 else
2070 arm_pic_register = pic_register;
2071 }
2072
2073 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2074 if (fix_cm3_ldrd == 2)
2075 {
2076 if (arm_selected_cpu->core == cortexm3)
2077 fix_cm3_ldrd = 1;
2078 else
2079 fix_cm3_ldrd = 0;
2080 }
2081
2082 /* Enable -munaligned-access by default for
2083 - all ARMv6 architecture-based processors
2084 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2085 - ARMv8 architecture-base processors.
2086
2087 Disable -munaligned-access by default for
2088 - all pre-ARMv6 architecture-based processors
2089 - ARMv6-M architecture-based processors. */
2090
2091 if (unaligned_access == 2)
2092 {
2093 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2094 unaligned_access = 1;
2095 else
2096 unaligned_access = 0;
2097 }
2098 else if (unaligned_access == 1
2099 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2100 {
2101 warning (0, "target CPU does not support unaligned accesses");
2102 unaligned_access = 0;
2103 }
2104
2105 if (TARGET_THUMB1 && flag_schedule_insns)
2106 {
2107 /* Don't warn since it's on by default in -O2. */
2108 flag_schedule_insns = 0;
2109 }
2110
2111 if (optimize_size)
2112 {
2113 /* If optimizing for size, bump the number of instructions that we
2114 are prepared to conditionally execute (even on a StrongARM). */
2115 max_insns_skipped = 6;
2116 }
2117 else
2118 max_insns_skipped = current_tune->max_insns_skipped;
2119
2120 /* Hot/Cold partitioning is not currently supported, since we can't
2121 handle literal pool placement in that case. */
2122 if (flag_reorder_blocks_and_partition)
2123 {
2124 inform (input_location,
2125 "-freorder-blocks-and-partition not supported on this architecture");
2126 flag_reorder_blocks_and_partition = 0;
2127 flag_reorder_blocks = 1;
2128 }
2129
2130 if (flag_pic)
2131 /* Hoisting PIC address calculations more aggressively provides a small,
2132 but measurable, size reduction for PIC code. Therefore, we decrease
2133 the bar for unrestricted expression hoisting to the cost of PIC address
2134 calculation, which is 2 instructions. */
2135 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2136 global_options.x_param_values,
2137 global_options_set.x_param_values);
2138
2139 /* ARM EABI defaults to strict volatile bitfields. */
2140 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2141 && abi_version_at_least(2))
2142 flag_strict_volatile_bitfields = 1;
2143
2144 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2145 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2146 if (flag_prefetch_loop_arrays < 0
2147 && HAVE_prefetch
2148 && optimize >= 3
2149 && current_tune->num_prefetch_slots > 0)
2150 flag_prefetch_loop_arrays = 1;
2151
2152 /* Set up parameters to be used in prefetching algorithm. Do not override the
2153 defaults unless we are tuning for a core we have researched values for. */
2154 if (current_tune->num_prefetch_slots > 0)
2155 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2156 current_tune->num_prefetch_slots,
2157 global_options.x_param_values,
2158 global_options_set.x_param_values);
2159 if (current_tune->l1_cache_line_size >= 0)
2160 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2161 current_tune->l1_cache_line_size,
2162 global_options.x_param_values,
2163 global_options_set.x_param_values);
2164 if (current_tune->l1_cache_size >= 0)
2165 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2166 current_tune->l1_cache_size,
2167 global_options.x_param_values,
2168 global_options_set.x_param_values);
2169
2170 /* Use Neon to perform 64-bits operations rather than core
2171 registers. */
2172 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
2173 if (use_neon_for_64bits == 1)
2174 prefer_neon_for_64bits = true;
2175
2176 /* Use the alternative scheduling-pressure algorithm by default. */
2177 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
2178 global_options.x_param_values,
2179 global_options_set.x_param_values);
2180
2181 /* Disable shrink-wrap when optimizing function for size, since it tends to
2182 generate additional returns. */
2183 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
2184 flag_shrink_wrap = false;
2185 /* TBD: Dwarf info for apcs frame is not handled yet. */
2186 if (TARGET_APCS_FRAME)
2187 flag_shrink_wrap = false;
2188
2189 /* Register global variables with the garbage collector. */
2190 arm_add_gc_roots ();
2191 }
2192
2193 static void
2194 arm_add_gc_roots (void)
2195 {
2196 gcc_obstack_init(&minipool_obstack);
2197 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2198 }
2199 \f
2200 /* A table of known ARM exception types.
2201 For use with the interrupt function attribute. */
2202
2203 typedef struct
2204 {
2205 const char *const arg;
2206 const unsigned long return_value;
2207 }
2208 isr_attribute_arg;
2209
2210 static const isr_attribute_arg isr_attribute_args [] =
2211 {
2212 { "IRQ", ARM_FT_ISR },
2213 { "irq", ARM_FT_ISR },
2214 { "FIQ", ARM_FT_FIQ },
2215 { "fiq", ARM_FT_FIQ },
2216 { "ABORT", ARM_FT_ISR },
2217 { "abort", ARM_FT_ISR },
2218 { "ABORT", ARM_FT_ISR },
2219 { "abort", ARM_FT_ISR },
2220 { "UNDEF", ARM_FT_EXCEPTION },
2221 { "undef", ARM_FT_EXCEPTION },
2222 { "SWI", ARM_FT_EXCEPTION },
2223 { "swi", ARM_FT_EXCEPTION },
2224 { NULL, ARM_FT_NORMAL }
2225 };
2226
2227 /* Returns the (interrupt) function type of the current
2228 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2229
2230 static unsigned long
2231 arm_isr_value (tree argument)
2232 {
2233 const isr_attribute_arg * ptr;
2234 const char * arg;
2235
2236 if (!arm_arch_notm)
2237 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2238
2239 /* No argument - default to IRQ. */
2240 if (argument == NULL_TREE)
2241 return ARM_FT_ISR;
2242
2243 /* Get the value of the argument. */
2244 if (TREE_VALUE (argument) == NULL_TREE
2245 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2246 return ARM_FT_UNKNOWN;
2247
2248 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2249
2250 /* Check it against the list of known arguments. */
2251 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2252 if (streq (arg, ptr->arg))
2253 return ptr->return_value;
2254
2255 /* An unrecognized interrupt type. */
2256 return ARM_FT_UNKNOWN;
2257 }
2258
2259 /* Computes the type of the current function. */
2260
2261 static unsigned long
2262 arm_compute_func_type (void)
2263 {
2264 unsigned long type = ARM_FT_UNKNOWN;
2265 tree a;
2266 tree attr;
2267
2268 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2269
2270 /* Decide if the current function is volatile. Such functions
2271 never return, and many memory cycles can be saved by not storing
2272 register values that will never be needed again. This optimization
2273 was added to speed up context switching in a kernel application. */
2274 if (optimize > 0
2275 && (TREE_NOTHROW (current_function_decl)
2276 || !(flag_unwind_tables
2277 || (flag_exceptions
2278 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2279 && TREE_THIS_VOLATILE (current_function_decl))
2280 type |= ARM_FT_VOLATILE;
2281
2282 if (cfun->static_chain_decl != NULL)
2283 type |= ARM_FT_NESTED;
2284
2285 attr = DECL_ATTRIBUTES (current_function_decl);
2286
2287 a = lookup_attribute ("naked", attr);
2288 if (a != NULL_TREE)
2289 type |= ARM_FT_NAKED;
2290
2291 a = lookup_attribute ("isr", attr);
2292 if (a == NULL_TREE)
2293 a = lookup_attribute ("interrupt", attr);
2294
2295 if (a == NULL_TREE)
2296 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2297 else
2298 type |= arm_isr_value (TREE_VALUE (a));
2299
2300 return type;
2301 }
2302
2303 /* Returns the type of the current function. */
2304
2305 unsigned long
2306 arm_current_func_type (void)
2307 {
2308 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2309 cfun->machine->func_type = arm_compute_func_type ();
2310
2311 return cfun->machine->func_type;
2312 }
2313
2314 bool
2315 arm_allocate_stack_slots_for_args (void)
2316 {
2317 /* Naked functions should not allocate stack slots for arguments. */
2318 return !IS_NAKED (arm_current_func_type ());
2319 }
2320
2321 static bool
2322 arm_warn_func_return (tree decl)
2323 {
2324 /* Naked functions are implemented entirely in assembly, including the
2325 return sequence, so suppress warnings about this. */
2326 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
2327 }
2328
2329 \f
2330 /* Output assembler code for a block containing the constant parts
2331 of a trampoline, leaving space for the variable parts.
2332
2333 On the ARM, (if r8 is the static chain regnum, and remembering that
2334 referencing pc adds an offset of 8) the trampoline looks like:
2335 ldr r8, [pc, #0]
2336 ldr pc, [pc]
2337 .word static chain value
2338 .word function's address
2339 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2340
2341 static void
2342 arm_asm_trampoline_template (FILE *f)
2343 {
2344 if (TARGET_ARM)
2345 {
2346 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2347 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2348 }
2349 else if (TARGET_THUMB2)
2350 {
2351 /* The Thumb-2 trampoline is similar to the arm implementation.
2352 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2353 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2354 STATIC_CHAIN_REGNUM, PC_REGNUM);
2355 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2356 }
2357 else
2358 {
2359 ASM_OUTPUT_ALIGN (f, 2);
2360 fprintf (f, "\t.code\t16\n");
2361 fprintf (f, ".Ltrampoline_start:\n");
2362 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2363 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2364 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2365 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2366 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2367 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2368 }
2369 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2370 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2371 }
2372
2373 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2374
2375 static void
2376 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2377 {
2378 rtx fnaddr, mem, a_tramp;
2379
2380 emit_block_move (m_tramp, assemble_trampoline_template (),
2381 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2382
2383 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2384 emit_move_insn (mem, chain_value);
2385
2386 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2387 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2388 emit_move_insn (mem, fnaddr);
2389
2390 a_tramp = XEXP (m_tramp, 0);
2391 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2392 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2393 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
2394 }
2395
2396 /* Thumb trampolines should be entered in thumb mode, so set
2397 the bottom bit of the address. */
2398
2399 static rtx
2400 arm_trampoline_adjust_address (rtx addr)
2401 {
2402 if (TARGET_THUMB)
2403 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2404 NULL, 0, OPTAB_LIB_WIDEN);
2405 return addr;
2406 }
2407 \f
2408 /* Return 1 if it is possible to return using a single instruction.
2409 If SIBLING is non-null, this is a test for a return before a sibling
2410 call. SIBLING is the call insn, so we can examine its register usage. */
2411
2412 int
2413 use_return_insn (int iscond, rtx sibling)
2414 {
2415 int regno;
2416 unsigned int func_type;
2417 unsigned long saved_int_regs;
2418 unsigned HOST_WIDE_INT stack_adjust;
2419 arm_stack_offsets *offsets;
2420
2421 /* Never use a return instruction before reload has run. */
2422 if (!reload_completed)
2423 return 0;
2424
2425 func_type = arm_current_func_type ();
2426
2427 /* Naked, volatile and stack alignment functions need special
2428 consideration. */
2429 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2430 return 0;
2431
2432 /* So do interrupt functions that use the frame pointer and Thumb
2433 interrupt functions. */
2434 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2435 return 0;
2436
2437 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
2438 && !optimize_function_for_size_p (cfun))
2439 return 0;
2440
2441 offsets = arm_get_frame_offsets ();
2442 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2443
2444 /* As do variadic functions. */
2445 if (crtl->args.pretend_args_size
2446 || cfun->machine->uses_anonymous_args
2447 /* Or if the function calls __builtin_eh_return () */
2448 || crtl->calls_eh_return
2449 /* Or if the function calls alloca */
2450 || cfun->calls_alloca
2451 /* Or if there is a stack adjustment. However, if the stack pointer
2452 is saved on the stack, we can use a pre-incrementing stack load. */
2453 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2454 && stack_adjust == 4)))
2455 return 0;
2456
2457 saved_int_regs = offsets->saved_regs_mask;
2458
2459 /* Unfortunately, the insn
2460
2461 ldmib sp, {..., sp, ...}
2462
2463 triggers a bug on most SA-110 based devices, such that the stack
2464 pointer won't be correctly restored if the instruction takes a
2465 page fault. We work around this problem by popping r3 along with
2466 the other registers, since that is never slower than executing
2467 another instruction.
2468
2469 We test for !arm_arch5 here, because code for any architecture
2470 less than this could potentially be run on one of the buggy
2471 chips. */
2472 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2473 {
2474 /* Validate that r3 is a call-clobbered register (always true in
2475 the default abi) ... */
2476 if (!call_used_regs[3])
2477 return 0;
2478
2479 /* ... that it isn't being used for a return value ... */
2480 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2481 return 0;
2482
2483 /* ... or for a tail-call argument ... */
2484 if (sibling)
2485 {
2486 gcc_assert (CALL_P (sibling));
2487
2488 if (find_regno_fusage (sibling, USE, 3))
2489 return 0;
2490 }
2491
2492 /* ... and that there are no call-saved registers in r0-r2
2493 (always true in the default ABI). */
2494 if (saved_int_regs & 0x7)
2495 return 0;
2496 }
2497
2498 /* Can't be done if interworking with Thumb, and any registers have been
2499 stacked. */
2500 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2501 return 0;
2502
2503 /* On StrongARM, conditional returns are expensive if they aren't
2504 taken and multiple registers have been stacked. */
2505 if (iscond && arm_tune_strongarm)
2506 {
2507 /* Conditional return when just the LR is stored is a simple
2508 conditional-load instruction, that's not expensive. */
2509 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2510 return 0;
2511
2512 if (flag_pic
2513 && arm_pic_register != INVALID_REGNUM
2514 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2515 return 0;
2516 }
2517
2518 /* If there are saved registers but the LR isn't saved, then we need
2519 two instructions for the return. */
2520 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2521 return 0;
2522
2523 /* Can't be done if any of the VFP regs are pushed,
2524 since this also requires an insn. */
2525 if (TARGET_HARD_FLOAT && TARGET_VFP)
2526 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2527 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2528 return 0;
2529
2530 if (TARGET_REALLY_IWMMXT)
2531 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2532 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2533 return 0;
2534
2535 return 1;
2536 }
2537
2538 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
2539 shrink-wrapping if possible. This is the case if we need to emit a
2540 prologue, which we can test by looking at the offsets. */
2541 bool
2542 use_simple_return_p (void)
2543 {
2544 arm_stack_offsets *offsets;
2545
2546 offsets = arm_get_frame_offsets ();
2547 return offsets->outgoing_args != 0;
2548 }
2549
2550 /* Return TRUE if int I is a valid immediate ARM constant. */
2551
2552 int
2553 const_ok_for_arm (HOST_WIDE_INT i)
2554 {
2555 int lowbit;
2556
2557 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2558 be all zero, or all one. */
2559 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2560 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2561 != ((~(unsigned HOST_WIDE_INT) 0)
2562 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2563 return FALSE;
2564
2565 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2566
2567 /* Fast return for 0 and small values. We must do this for zero, since
2568 the code below can't handle that one case. */
2569 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2570 return TRUE;
2571
2572 /* Get the number of trailing zeros. */
2573 lowbit = ffs((int) i) - 1;
2574
2575 /* Only even shifts are allowed in ARM mode so round down to the
2576 nearest even number. */
2577 if (TARGET_ARM)
2578 lowbit &= ~1;
2579
2580 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2581 return TRUE;
2582
2583 if (TARGET_ARM)
2584 {
2585 /* Allow rotated constants in ARM mode. */
2586 if (lowbit <= 4
2587 && ((i & ~0xc000003f) == 0
2588 || (i & ~0xf000000f) == 0
2589 || (i & ~0xfc000003) == 0))
2590 return TRUE;
2591 }
2592 else
2593 {
2594 HOST_WIDE_INT v;
2595
2596 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2597 v = i & 0xff;
2598 v |= v << 16;
2599 if (i == v || i == (v | (v << 8)))
2600 return TRUE;
2601
2602 /* Allow repeated pattern 0xXY00XY00. */
2603 v = i & 0xff00;
2604 v |= v << 16;
2605 if (i == v)
2606 return TRUE;
2607 }
2608
2609 return FALSE;
2610 }
2611
2612 /* Return true if I is a valid constant for the operation CODE. */
2613 int
2614 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2615 {
2616 if (const_ok_for_arm (i))
2617 return 1;
2618
2619 switch (code)
2620 {
2621 case SET:
2622 /* See if we can use movw. */
2623 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2624 return 1;
2625 else
2626 /* Otherwise, try mvn. */
2627 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2628
2629 case PLUS:
2630 /* See if we can use addw or subw. */
2631 if (TARGET_THUMB2
2632 && ((i & 0xfffff000) == 0
2633 || ((-i) & 0xfffff000) == 0))
2634 return 1;
2635 /* else fall through. */
2636
2637 case COMPARE:
2638 case EQ:
2639 case NE:
2640 case GT:
2641 case LE:
2642 case LT:
2643 case GE:
2644 case GEU:
2645 case LTU:
2646 case GTU:
2647 case LEU:
2648 case UNORDERED:
2649 case ORDERED:
2650 case UNEQ:
2651 case UNGE:
2652 case UNLT:
2653 case UNGT:
2654 case UNLE:
2655 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2656
2657 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2658 case XOR:
2659 return 0;
2660
2661 case IOR:
2662 if (TARGET_THUMB2)
2663 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2664 return 0;
2665
2666 case AND:
2667 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2668
2669 default:
2670 gcc_unreachable ();
2671 }
2672 }
2673
2674 /* Return true if I is a valid di mode constant for the operation CODE. */
2675 int
2676 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
2677 {
2678 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
2679 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
2680 rtx hi = GEN_INT (hi_val);
2681 rtx lo = GEN_INT (lo_val);
2682
2683 if (TARGET_THUMB1)
2684 return 0;
2685
2686 switch (code)
2687 {
2688 case AND:
2689 case IOR:
2690 case XOR:
2691 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
2692 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
2693 case PLUS:
2694 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
2695
2696 default:
2697 return 0;
2698 }
2699 }
2700
2701 /* Emit a sequence of insns to handle a large constant.
2702 CODE is the code of the operation required, it can be any of SET, PLUS,
2703 IOR, AND, XOR, MINUS;
2704 MODE is the mode in which the operation is being performed;
2705 VAL is the integer to operate on;
2706 SOURCE is the other operand (a register, or a null-pointer for SET);
2707 SUBTARGETS means it is safe to create scratch registers if that will
2708 either produce a simpler sequence, or we will want to cse the values.
2709 Return value is the number of insns emitted. */
2710
2711 /* ??? Tweak this for thumb2. */
2712 int
2713 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2714 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2715 {
2716 rtx cond;
2717
2718 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2719 cond = COND_EXEC_TEST (PATTERN (insn));
2720 else
2721 cond = NULL_RTX;
2722
2723 if (subtargets || code == SET
2724 || (REG_P (target) && REG_P (source)
2725 && REGNO (target) != REGNO (source)))
2726 {
2727 /* After arm_reorg has been called, we can't fix up expensive
2728 constants by pushing them into memory so we must synthesize
2729 them in-line, regardless of the cost. This is only likely to
2730 be more costly on chips that have load delay slots and we are
2731 compiling without running the scheduler (so no splitting
2732 occurred before the final instruction emission).
2733
2734 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2735 */
2736 if (!after_arm_reorg
2737 && !cond
2738 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2739 1, 0)
2740 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2741 + (code != SET))))
2742 {
2743 if (code == SET)
2744 {
2745 /* Currently SET is the only monadic value for CODE, all
2746 the rest are diadic. */
2747 if (TARGET_USE_MOVT)
2748 arm_emit_movpair (target, GEN_INT (val));
2749 else
2750 emit_set_insn (target, GEN_INT (val));
2751
2752 return 1;
2753 }
2754 else
2755 {
2756 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2757
2758 if (TARGET_USE_MOVT)
2759 arm_emit_movpair (temp, GEN_INT (val));
2760 else
2761 emit_set_insn (temp, GEN_INT (val));
2762
2763 /* For MINUS, the value is subtracted from, since we never
2764 have subtraction of a constant. */
2765 if (code == MINUS)
2766 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2767 else
2768 emit_set_insn (target,
2769 gen_rtx_fmt_ee (code, mode, source, temp));
2770 return 2;
2771 }
2772 }
2773 }
2774
2775 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2776 1);
2777 }
2778
2779 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2780 ARM/THUMB2 immediates, and add up to VAL.
2781 Thr function return value gives the number of insns required. */
2782 static int
2783 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
2784 struct four_ints *return_sequence)
2785 {
2786 int best_consecutive_zeros = 0;
2787 int i;
2788 int best_start = 0;
2789 int insns1, insns2;
2790 struct four_ints tmp_sequence;
2791
2792 /* If we aren't targeting ARM, the best place to start is always at
2793 the bottom, otherwise look more closely. */
2794 if (TARGET_ARM)
2795 {
2796 for (i = 0; i < 32; i += 2)
2797 {
2798 int consecutive_zeros = 0;
2799
2800 if (!(val & (3 << i)))
2801 {
2802 while ((i < 32) && !(val & (3 << i)))
2803 {
2804 consecutive_zeros += 2;
2805 i += 2;
2806 }
2807 if (consecutive_zeros > best_consecutive_zeros)
2808 {
2809 best_consecutive_zeros = consecutive_zeros;
2810 best_start = i - consecutive_zeros;
2811 }
2812 i -= 2;
2813 }
2814 }
2815 }
2816
2817 /* So long as it won't require any more insns to do so, it's
2818 desirable to emit a small constant (in bits 0...9) in the last
2819 insn. This way there is more chance that it can be combined with
2820 a later addressing insn to form a pre-indexed load or store
2821 operation. Consider:
2822
2823 *((volatile int *)0xe0000100) = 1;
2824 *((volatile int *)0xe0000110) = 2;
2825
2826 We want this to wind up as:
2827
2828 mov rA, #0xe0000000
2829 mov rB, #1
2830 str rB, [rA, #0x100]
2831 mov rB, #2
2832 str rB, [rA, #0x110]
2833
2834 rather than having to synthesize both large constants from scratch.
2835
2836 Therefore, we calculate how many insns would be required to emit
2837 the constant starting from `best_start', and also starting from
2838 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2839 yield a shorter sequence, we may as well use zero. */
2840 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
2841 if (best_start != 0
2842 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
2843 {
2844 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
2845 if (insns2 <= insns1)
2846 {
2847 *return_sequence = tmp_sequence;
2848 insns1 = insns2;
2849 }
2850 }
2851
2852 return insns1;
2853 }
2854
2855 /* As for optimal_immediate_sequence, but starting at bit-position I. */
2856 static int
2857 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
2858 struct four_ints *return_sequence, int i)
2859 {
2860 int remainder = val & 0xffffffff;
2861 int insns = 0;
2862
2863 /* Try and find a way of doing the job in either two or three
2864 instructions.
2865
2866 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2867 location. We start at position I. This may be the MSB, or
2868 optimial_immediate_sequence may have positioned it at the largest block
2869 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2870 wrapping around to the top of the word when we drop off the bottom.
2871 In the worst case this code should produce no more than four insns.
2872
2873 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2874 constants, shifted to any arbitrary location. We should always start
2875 at the MSB. */
2876 do
2877 {
2878 int end;
2879 unsigned int b1, b2, b3, b4;
2880 unsigned HOST_WIDE_INT result;
2881 int loc;
2882
2883 gcc_assert (insns < 4);
2884
2885 if (i <= 0)
2886 i += 32;
2887
2888 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
2889 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
2890 {
2891 loc = i;
2892 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
2893 /* We can use addw/subw for the last 12 bits. */
2894 result = remainder;
2895 else
2896 {
2897 /* Use an 8-bit shifted/rotated immediate. */
2898 end = i - 8;
2899 if (end < 0)
2900 end += 32;
2901 result = remainder & ((0x0ff << end)
2902 | ((i < end) ? (0xff >> (32 - end))
2903 : 0));
2904 i -= 8;
2905 }
2906 }
2907 else
2908 {
2909 /* Arm allows rotates by a multiple of two. Thumb-2 allows
2910 arbitrary shifts. */
2911 i -= TARGET_ARM ? 2 : 1;
2912 continue;
2913 }
2914
2915 /* Next, see if we can do a better job with a thumb2 replicated
2916 constant.
2917
2918 We do it this way around to catch the cases like 0x01F001E0 where
2919 two 8-bit immediates would work, but a replicated constant would
2920 make it worse.
2921
2922 TODO: 16-bit constants that don't clear all the bits, but still win.
2923 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
2924 if (TARGET_THUMB2)
2925 {
2926 b1 = (remainder & 0xff000000) >> 24;
2927 b2 = (remainder & 0x00ff0000) >> 16;
2928 b3 = (remainder & 0x0000ff00) >> 8;
2929 b4 = remainder & 0xff;
2930
2931 if (loc > 24)
2932 {
2933 /* The 8-bit immediate already found clears b1 (and maybe b2),
2934 but must leave b3 and b4 alone. */
2935
2936 /* First try to find a 32-bit replicated constant that clears
2937 almost everything. We can assume that we can't do it in one,
2938 or else we wouldn't be here. */
2939 unsigned int tmp = b1 & b2 & b3 & b4;
2940 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
2941 + (tmp << 24);
2942 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
2943 + (tmp == b3) + (tmp == b4);
2944 if (tmp
2945 && (matching_bytes >= 3
2946 || (matching_bytes == 2
2947 && const_ok_for_op (remainder & ~tmp2, code))))
2948 {
2949 /* At least 3 of the bytes match, and the fourth has at
2950 least as many bits set, or two of the bytes match
2951 and it will only require one more insn to finish. */
2952 result = tmp2;
2953 i = tmp != b1 ? 32
2954 : tmp != b2 ? 24
2955 : tmp != b3 ? 16
2956 : 8;
2957 }
2958
2959 /* Second, try to find a 16-bit replicated constant that can
2960 leave three of the bytes clear. If b2 or b4 is already
2961 zero, then we can. If the 8-bit from above would not
2962 clear b2 anyway, then we still win. */
2963 else if (b1 == b3 && (!b2 || !b4
2964 || (remainder & 0x00ff0000 & ~result)))
2965 {
2966 result = remainder & 0xff00ff00;
2967 i = 24;
2968 }
2969 }
2970 else if (loc > 16)
2971 {
2972 /* The 8-bit immediate already found clears b2 (and maybe b3)
2973 and we don't get here unless b1 is alredy clear, but it will
2974 leave b4 unchanged. */
2975
2976 /* If we can clear b2 and b4 at once, then we win, since the
2977 8-bits couldn't possibly reach that far. */
2978 if (b2 == b4)
2979 {
2980 result = remainder & 0x00ff00ff;
2981 i = 16;
2982 }
2983 }
2984 }
2985
2986 return_sequence->i[insns++] = result;
2987 remainder &= ~result;
2988
2989 if (code == SET || code == MINUS)
2990 code = PLUS;
2991 }
2992 while (remainder);
2993
2994 return insns;
2995 }
2996
2997 /* Emit an instruction with the indicated PATTERN. If COND is
2998 non-NULL, conditionalize the execution of the instruction on COND
2999 being true. */
3000
3001 static void
3002 emit_constant_insn (rtx cond, rtx pattern)
3003 {
3004 if (cond)
3005 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3006 emit_insn (pattern);
3007 }
3008
3009 /* As above, but extra parameter GENERATE which, if clear, suppresses
3010 RTL generation. */
3011
3012 static int
3013 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
3014 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3015 int generate)
3016 {
3017 int can_invert = 0;
3018 int can_negate = 0;
3019 int final_invert = 0;
3020 int i;
3021 int set_sign_bit_copies = 0;
3022 int clear_sign_bit_copies = 0;
3023 int clear_zero_bit_copies = 0;
3024 int set_zero_bit_copies = 0;
3025 int insns = 0, neg_insns, inv_insns;
3026 unsigned HOST_WIDE_INT temp1, temp2;
3027 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3028 struct four_ints *immediates;
3029 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3030
3031 /* Find out which operations are safe for a given CODE. Also do a quick
3032 check for degenerate cases; these can occur when DImode operations
3033 are split. */
3034 switch (code)
3035 {
3036 case SET:
3037 can_invert = 1;
3038 break;
3039
3040 case PLUS:
3041 can_negate = 1;
3042 break;
3043
3044 case IOR:
3045 if (remainder == 0xffffffff)
3046 {
3047 if (generate)
3048 emit_constant_insn (cond,
3049 gen_rtx_SET (VOIDmode, target,
3050 GEN_INT (ARM_SIGN_EXTEND (val))));
3051 return 1;
3052 }
3053
3054 if (remainder == 0)
3055 {
3056 if (reload_completed && rtx_equal_p (target, source))
3057 return 0;
3058
3059 if (generate)
3060 emit_constant_insn (cond,
3061 gen_rtx_SET (VOIDmode, target, source));
3062 return 1;
3063 }
3064 break;
3065
3066 case AND:
3067 if (remainder == 0)
3068 {
3069 if (generate)
3070 emit_constant_insn (cond,
3071 gen_rtx_SET (VOIDmode, target, const0_rtx));
3072 return 1;
3073 }
3074 if (remainder == 0xffffffff)
3075 {
3076 if (reload_completed && rtx_equal_p (target, source))
3077 return 0;
3078 if (generate)
3079 emit_constant_insn (cond,
3080 gen_rtx_SET (VOIDmode, target, source));
3081 return 1;
3082 }
3083 can_invert = 1;
3084 break;
3085
3086 case XOR:
3087 if (remainder == 0)
3088 {
3089 if (reload_completed && rtx_equal_p (target, source))
3090 return 0;
3091 if (generate)
3092 emit_constant_insn (cond,
3093 gen_rtx_SET (VOIDmode, target, source));
3094 return 1;
3095 }
3096
3097 if (remainder == 0xffffffff)
3098 {
3099 if (generate)
3100 emit_constant_insn (cond,
3101 gen_rtx_SET (VOIDmode, target,
3102 gen_rtx_NOT (mode, source)));
3103 return 1;
3104 }
3105 final_invert = 1;
3106 break;
3107
3108 case MINUS:
3109 /* We treat MINUS as (val - source), since (source - val) is always
3110 passed as (source + (-val)). */
3111 if (remainder == 0)
3112 {
3113 if (generate)
3114 emit_constant_insn (cond,
3115 gen_rtx_SET (VOIDmode, target,
3116 gen_rtx_NEG (mode, source)));
3117 return 1;
3118 }
3119 if (const_ok_for_arm (val))
3120 {
3121 if (generate)
3122 emit_constant_insn (cond,
3123 gen_rtx_SET (VOIDmode, target,
3124 gen_rtx_MINUS (mode, GEN_INT (val),
3125 source)));
3126 return 1;
3127 }
3128
3129 break;
3130
3131 default:
3132 gcc_unreachable ();
3133 }
3134
3135 /* If we can do it in one insn get out quickly. */
3136 if (const_ok_for_op (val, code))
3137 {
3138 if (generate)
3139 emit_constant_insn (cond,
3140 gen_rtx_SET (VOIDmode, target,
3141 (source
3142 ? gen_rtx_fmt_ee (code, mode, source,
3143 GEN_INT (val))
3144 : GEN_INT (val))));
3145 return 1;
3146 }
3147
3148 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3149 insn. */
3150 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
3151 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
3152 {
3153 if (generate)
3154 {
3155 if (mode == SImode && i == 16)
3156 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3157 smaller insn. */
3158 emit_constant_insn (cond,
3159 gen_zero_extendhisi2
3160 (target, gen_lowpart (HImode, source)));
3161 else
3162 /* Extz only supports SImode, but we can coerce the operands
3163 into that mode. */
3164 emit_constant_insn (cond,
3165 gen_extzv_t2 (gen_lowpart (SImode, target),
3166 gen_lowpart (SImode, source),
3167 GEN_INT (i), const0_rtx));
3168 }
3169
3170 return 1;
3171 }
3172
3173 /* Calculate a few attributes that may be useful for specific
3174 optimizations. */
3175 /* Count number of leading zeros. */
3176 for (i = 31; i >= 0; i--)
3177 {
3178 if ((remainder & (1 << i)) == 0)
3179 clear_sign_bit_copies++;
3180 else
3181 break;
3182 }
3183
3184 /* Count number of leading 1's. */
3185 for (i = 31; i >= 0; i--)
3186 {
3187 if ((remainder & (1 << i)) != 0)
3188 set_sign_bit_copies++;
3189 else
3190 break;
3191 }
3192
3193 /* Count number of trailing zero's. */
3194 for (i = 0; i <= 31; i++)
3195 {
3196 if ((remainder & (1 << i)) == 0)
3197 clear_zero_bit_copies++;
3198 else
3199 break;
3200 }
3201
3202 /* Count number of trailing 1's. */
3203 for (i = 0; i <= 31; i++)
3204 {
3205 if ((remainder & (1 << i)) != 0)
3206 set_zero_bit_copies++;
3207 else
3208 break;
3209 }
3210
3211 switch (code)
3212 {
3213 case SET:
3214 /* See if we can do this by sign_extending a constant that is known
3215 to be negative. This is a good, way of doing it, since the shift
3216 may well merge into a subsequent insn. */
3217 if (set_sign_bit_copies > 1)
3218 {
3219 if (const_ok_for_arm
3220 (temp1 = ARM_SIGN_EXTEND (remainder
3221 << (set_sign_bit_copies - 1))))
3222 {
3223 if (generate)
3224 {
3225 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3226 emit_constant_insn (cond,
3227 gen_rtx_SET (VOIDmode, new_src,
3228 GEN_INT (temp1)));
3229 emit_constant_insn (cond,
3230 gen_ashrsi3 (target, new_src,
3231 GEN_INT (set_sign_bit_copies - 1)));
3232 }
3233 return 2;
3234 }
3235 /* For an inverted constant, we will need to set the low bits,
3236 these will be shifted out of harm's way. */
3237 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3238 if (const_ok_for_arm (~temp1))
3239 {
3240 if (generate)
3241 {
3242 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3243 emit_constant_insn (cond,
3244 gen_rtx_SET (VOIDmode, new_src,
3245 GEN_INT (temp1)));
3246 emit_constant_insn (cond,
3247 gen_ashrsi3 (target, new_src,
3248 GEN_INT (set_sign_bit_copies - 1)));
3249 }
3250 return 2;
3251 }
3252 }
3253
3254 /* See if we can calculate the value as the difference between two
3255 valid immediates. */
3256 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3257 {
3258 int topshift = clear_sign_bit_copies & ~1;
3259
3260 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3261 & (0xff000000 >> topshift));
3262
3263 /* If temp1 is zero, then that means the 9 most significant
3264 bits of remainder were 1 and we've caused it to overflow.
3265 When topshift is 0 we don't need to do anything since we
3266 can borrow from 'bit 32'. */
3267 if (temp1 == 0 && topshift != 0)
3268 temp1 = 0x80000000 >> (topshift - 1);
3269
3270 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3271
3272 if (const_ok_for_arm (temp2))
3273 {
3274 if (generate)
3275 {
3276 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3277 emit_constant_insn (cond,
3278 gen_rtx_SET (VOIDmode, new_src,
3279 GEN_INT (temp1)));
3280 emit_constant_insn (cond,
3281 gen_addsi3 (target, new_src,
3282 GEN_INT (-temp2)));
3283 }
3284
3285 return 2;
3286 }
3287 }
3288
3289 /* See if we can generate this by setting the bottom (or the top)
3290 16 bits, and then shifting these into the other half of the
3291 word. We only look for the simplest cases, to do more would cost
3292 too much. Be careful, however, not to generate this when the
3293 alternative would take fewer insns. */
3294 if (val & 0xffff0000)
3295 {
3296 temp1 = remainder & 0xffff0000;
3297 temp2 = remainder & 0x0000ffff;
3298
3299 /* Overlaps outside this range are best done using other methods. */
3300 for (i = 9; i < 24; i++)
3301 {
3302 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3303 && !const_ok_for_arm (temp2))
3304 {
3305 rtx new_src = (subtargets
3306 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3307 : target);
3308 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3309 source, subtargets, generate);
3310 source = new_src;
3311 if (generate)
3312 emit_constant_insn
3313 (cond,
3314 gen_rtx_SET
3315 (VOIDmode, target,
3316 gen_rtx_IOR (mode,
3317 gen_rtx_ASHIFT (mode, source,
3318 GEN_INT (i)),
3319 source)));
3320 return insns + 1;
3321 }
3322 }
3323
3324 /* Don't duplicate cases already considered. */
3325 for (i = 17; i < 24; i++)
3326 {
3327 if (((temp1 | (temp1 >> i)) == remainder)
3328 && !const_ok_for_arm (temp1))
3329 {
3330 rtx new_src = (subtargets
3331 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3332 : target);
3333 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3334 source, subtargets, generate);
3335 source = new_src;
3336 if (generate)
3337 emit_constant_insn
3338 (cond,
3339 gen_rtx_SET (VOIDmode, target,
3340 gen_rtx_IOR
3341 (mode,
3342 gen_rtx_LSHIFTRT (mode, source,
3343 GEN_INT (i)),
3344 source)));
3345 return insns + 1;
3346 }
3347 }
3348 }
3349 break;
3350
3351 case IOR:
3352 case XOR:
3353 /* If we have IOR or XOR, and the constant can be loaded in a
3354 single instruction, and we can find a temporary to put it in,
3355 then this can be done in two instructions instead of 3-4. */
3356 if (subtargets
3357 /* TARGET can't be NULL if SUBTARGETS is 0 */
3358 || (reload_completed && !reg_mentioned_p (target, source)))
3359 {
3360 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3361 {
3362 if (generate)
3363 {
3364 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3365
3366 emit_constant_insn (cond,
3367 gen_rtx_SET (VOIDmode, sub,
3368 GEN_INT (val)));
3369 emit_constant_insn (cond,
3370 gen_rtx_SET (VOIDmode, target,
3371 gen_rtx_fmt_ee (code, mode,
3372 source, sub)));
3373 }
3374 return 2;
3375 }
3376 }
3377
3378 if (code == XOR)
3379 break;
3380
3381 /* Convert.
3382 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3383 and the remainder 0s for e.g. 0xfff00000)
3384 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3385
3386 This can be done in 2 instructions by using shifts with mov or mvn.
3387 e.g. for
3388 x = x | 0xfff00000;
3389 we generate.
3390 mvn r0, r0, asl #12
3391 mvn r0, r0, lsr #12 */
3392 if (set_sign_bit_copies > 8
3393 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3394 {
3395 if (generate)
3396 {
3397 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3398 rtx shift = GEN_INT (set_sign_bit_copies);
3399
3400 emit_constant_insn
3401 (cond,
3402 gen_rtx_SET (VOIDmode, sub,
3403 gen_rtx_NOT (mode,
3404 gen_rtx_ASHIFT (mode,
3405 source,
3406 shift))));
3407 emit_constant_insn
3408 (cond,
3409 gen_rtx_SET (VOIDmode, target,
3410 gen_rtx_NOT (mode,
3411 gen_rtx_LSHIFTRT (mode, sub,
3412 shift))));
3413 }
3414 return 2;
3415 }
3416
3417 /* Convert
3418 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3419 to
3420 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3421
3422 For eg. r0 = r0 | 0xfff
3423 mvn r0, r0, lsr #12
3424 mvn r0, r0, asl #12
3425
3426 */
3427 if (set_zero_bit_copies > 8
3428 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3429 {
3430 if (generate)
3431 {
3432 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3433 rtx shift = GEN_INT (set_zero_bit_copies);
3434
3435 emit_constant_insn
3436 (cond,
3437 gen_rtx_SET (VOIDmode, sub,
3438 gen_rtx_NOT (mode,
3439 gen_rtx_LSHIFTRT (mode,
3440 source,
3441 shift))));
3442 emit_constant_insn
3443 (cond,
3444 gen_rtx_SET (VOIDmode, target,
3445 gen_rtx_NOT (mode,
3446 gen_rtx_ASHIFT (mode, sub,
3447 shift))));
3448 }
3449 return 2;
3450 }
3451
3452 /* This will never be reached for Thumb2 because orn is a valid
3453 instruction. This is for Thumb1 and the ARM 32 bit cases.
3454
3455 x = y | constant (such that ~constant is a valid constant)
3456 Transform this to
3457 x = ~(~y & ~constant).
3458 */
3459 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3460 {
3461 if (generate)
3462 {
3463 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3464 emit_constant_insn (cond,
3465 gen_rtx_SET (VOIDmode, sub,
3466 gen_rtx_NOT (mode, source)));
3467 source = sub;
3468 if (subtargets)
3469 sub = gen_reg_rtx (mode);
3470 emit_constant_insn (cond,
3471 gen_rtx_SET (VOIDmode, sub,
3472 gen_rtx_AND (mode, source,
3473 GEN_INT (temp1))));
3474 emit_constant_insn (cond,
3475 gen_rtx_SET (VOIDmode, target,
3476 gen_rtx_NOT (mode, sub)));
3477 }
3478 return 3;
3479 }
3480 break;
3481
3482 case AND:
3483 /* See if two shifts will do 2 or more insn's worth of work. */
3484 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3485 {
3486 HOST_WIDE_INT shift_mask = ((0xffffffff
3487 << (32 - clear_sign_bit_copies))
3488 & 0xffffffff);
3489
3490 if ((remainder | shift_mask) != 0xffffffff)
3491 {
3492 if (generate)
3493 {
3494 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3495 insns = arm_gen_constant (AND, mode, cond,
3496 remainder | shift_mask,
3497 new_src, source, subtargets, 1);
3498 source = new_src;
3499 }
3500 else
3501 {
3502 rtx targ = subtargets ? NULL_RTX : target;
3503 insns = arm_gen_constant (AND, mode, cond,
3504 remainder | shift_mask,
3505 targ, source, subtargets, 0);
3506 }
3507 }
3508
3509 if (generate)
3510 {
3511 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3512 rtx shift = GEN_INT (clear_sign_bit_copies);
3513
3514 emit_insn (gen_ashlsi3 (new_src, source, shift));
3515 emit_insn (gen_lshrsi3 (target, new_src, shift));
3516 }
3517
3518 return insns + 2;
3519 }
3520
3521 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3522 {
3523 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3524
3525 if ((remainder | shift_mask) != 0xffffffff)
3526 {
3527 if (generate)
3528 {
3529 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3530
3531 insns = arm_gen_constant (AND, mode, cond,
3532 remainder | shift_mask,
3533 new_src, source, subtargets, 1);
3534 source = new_src;
3535 }
3536 else
3537 {
3538 rtx targ = subtargets ? NULL_RTX : target;
3539
3540 insns = arm_gen_constant (AND, mode, cond,
3541 remainder | shift_mask,
3542 targ, source, subtargets, 0);
3543 }
3544 }
3545
3546 if (generate)
3547 {
3548 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3549 rtx shift = GEN_INT (clear_zero_bit_copies);
3550
3551 emit_insn (gen_lshrsi3 (new_src, source, shift));
3552 emit_insn (gen_ashlsi3 (target, new_src, shift));
3553 }
3554
3555 return insns + 2;
3556 }
3557
3558 break;
3559
3560 default:
3561 break;
3562 }
3563
3564 /* Calculate what the instruction sequences would be if we generated it
3565 normally, negated, or inverted. */
3566 if (code == AND)
3567 /* AND cannot be split into multiple insns, so invert and use BIC. */
3568 insns = 99;
3569 else
3570 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3571
3572 if (can_negate)
3573 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3574 &neg_immediates);
3575 else
3576 neg_insns = 99;
3577
3578 if (can_invert || final_invert)
3579 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3580 &inv_immediates);
3581 else
3582 inv_insns = 99;
3583
3584 immediates = &pos_immediates;
3585
3586 /* Is the negated immediate sequence more efficient? */
3587 if (neg_insns < insns && neg_insns <= inv_insns)
3588 {
3589 insns = neg_insns;
3590 immediates = &neg_immediates;
3591 }
3592 else
3593 can_negate = 0;
3594
3595 /* Is the inverted immediate sequence more efficient?
3596 We must allow for an extra NOT instruction for XOR operations, although
3597 there is some chance that the final 'mvn' will get optimized later. */
3598 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
3599 {
3600 insns = inv_insns;
3601 immediates = &inv_immediates;
3602 }
3603 else
3604 {
3605 can_invert = 0;
3606 final_invert = 0;
3607 }
3608
3609 /* Now output the chosen sequence as instructions. */
3610 if (generate)
3611 {
3612 for (i = 0; i < insns; i++)
3613 {
3614 rtx new_src, temp1_rtx;
3615
3616 temp1 = immediates->i[i];
3617
3618 if (code == SET || code == MINUS)
3619 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3620 else if ((final_invert || i < (insns - 1)) && subtargets)
3621 new_src = gen_reg_rtx (mode);
3622 else
3623 new_src = target;
3624
3625 if (can_invert)
3626 temp1 = ~temp1;
3627 else if (can_negate)
3628 temp1 = -temp1;
3629
3630 temp1 = trunc_int_for_mode (temp1, mode);
3631 temp1_rtx = GEN_INT (temp1);
3632
3633 if (code == SET)
3634 ;
3635 else if (code == MINUS)
3636 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3637 else
3638 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3639
3640 emit_constant_insn (cond,
3641 gen_rtx_SET (VOIDmode, new_src,
3642 temp1_rtx));
3643 source = new_src;
3644
3645 if (code == SET)
3646 {
3647 can_negate = can_invert;
3648 can_invert = 0;
3649 code = PLUS;
3650 }
3651 else if (code == MINUS)
3652 code = PLUS;
3653 }
3654 }
3655
3656 if (final_invert)
3657 {
3658 if (generate)
3659 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3660 gen_rtx_NOT (mode, source)));
3661 insns++;
3662 }
3663
3664 return insns;
3665 }
3666
3667 /* Canonicalize a comparison so that we are more likely to recognize it.
3668 This can be done for a few constant compares, where we can make the
3669 immediate value easier to load. */
3670
3671 static void
3672 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
3673 bool op0_preserve_value)
3674 {
3675 enum machine_mode mode;
3676 unsigned HOST_WIDE_INT i, maxval;
3677
3678 mode = GET_MODE (*op0);
3679 if (mode == VOIDmode)
3680 mode = GET_MODE (*op1);
3681
3682 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3683
3684 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3685 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3686 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3687 for GTU/LEU in Thumb mode. */
3688 if (mode == DImode)
3689 {
3690 rtx tem;
3691
3692 if (*code == GT || *code == LE
3693 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
3694 {
3695 /* Missing comparison. First try to use an available
3696 comparison. */
3697 if (CONST_INT_P (*op1))
3698 {
3699 i = INTVAL (*op1);
3700 switch (*code)
3701 {
3702 case GT:
3703 case LE:
3704 if (i != maxval
3705 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3706 {
3707 *op1 = GEN_INT (i + 1);
3708 *code = *code == GT ? GE : LT;
3709 return;
3710 }
3711 break;
3712 case GTU:
3713 case LEU:
3714 if (i != ~((unsigned HOST_WIDE_INT) 0)
3715 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3716 {
3717 *op1 = GEN_INT (i + 1);
3718 *code = *code == GTU ? GEU : LTU;
3719 return;
3720 }
3721 break;
3722 default:
3723 gcc_unreachable ();
3724 }
3725 }
3726
3727 /* If that did not work, reverse the condition. */
3728 if (!op0_preserve_value)
3729 {
3730 tem = *op0;
3731 *op0 = *op1;
3732 *op1 = tem;
3733 *code = (int)swap_condition ((enum rtx_code)*code);
3734 }
3735 }
3736 return;
3737 }
3738
3739 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3740 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3741 to facilitate possible combining with a cmp into 'ands'. */
3742 if (mode == SImode
3743 && GET_CODE (*op0) == ZERO_EXTEND
3744 && GET_CODE (XEXP (*op0, 0)) == SUBREG
3745 && GET_MODE (XEXP (*op0, 0)) == QImode
3746 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
3747 && subreg_lowpart_p (XEXP (*op0, 0))
3748 && *op1 == const0_rtx)
3749 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
3750 GEN_INT (255));
3751
3752 /* Comparisons smaller than DImode. Only adjust comparisons against
3753 an out-of-range constant. */
3754 if (!CONST_INT_P (*op1)
3755 || const_ok_for_arm (INTVAL (*op1))
3756 || const_ok_for_arm (- INTVAL (*op1)))
3757 return;
3758
3759 i = INTVAL (*op1);
3760
3761 switch (*code)
3762 {
3763 case EQ:
3764 case NE:
3765 return;
3766
3767 case GT:
3768 case LE:
3769 if (i != maxval
3770 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3771 {
3772 *op1 = GEN_INT (i + 1);
3773 *code = *code == GT ? GE : LT;
3774 return;
3775 }
3776 break;
3777
3778 case GE:
3779 case LT:
3780 if (i != ~maxval
3781 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3782 {
3783 *op1 = GEN_INT (i - 1);
3784 *code = *code == GE ? GT : LE;
3785 return;
3786 }
3787 break;
3788
3789 case GTU:
3790 case LEU:
3791 if (i != ~((unsigned HOST_WIDE_INT) 0)
3792 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3793 {
3794 *op1 = GEN_INT (i + 1);
3795 *code = *code == GTU ? GEU : LTU;
3796 return;
3797 }
3798 break;
3799
3800 case GEU:
3801 case LTU:
3802 if (i != 0
3803 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3804 {
3805 *op1 = GEN_INT (i - 1);
3806 *code = *code == GEU ? GTU : LEU;
3807 return;
3808 }
3809 break;
3810
3811 default:
3812 gcc_unreachable ();
3813 }
3814 }
3815
3816
3817 /* Define how to find the value returned by a function. */
3818
3819 static rtx
3820 arm_function_value(const_tree type, const_tree func,
3821 bool outgoing ATTRIBUTE_UNUSED)
3822 {
3823 enum machine_mode mode;
3824 int unsignedp ATTRIBUTE_UNUSED;
3825 rtx r ATTRIBUTE_UNUSED;
3826
3827 mode = TYPE_MODE (type);
3828
3829 if (TARGET_AAPCS_BASED)
3830 return aapcs_allocate_return_reg (mode, type, func);
3831
3832 /* Promote integer types. */
3833 if (INTEGRAL_TYPE_P (type))
3834 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3835
3836 /* Promotes small structs returned in a register to full-word size
3837 for big-endian AAPCS. */
3838 if (arm_return_in_msb (type))
3839 {
3840 HOST_WIDE_INT size = int_size_in_bytes (type);
3841 if (size % UNITS_PER_WORD != 0)
3842 {
3843 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3844 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3845 }
3846 }
3847
3848 return arm_libcall_value_1 (mode);
3849 }
3850
3851 /* libcall hashtable helpers. */
3852
3853 struct libcall_hasher : typed_noop_remove <rtx_def>
3854 {
3855 typedef rtx_def value_type;
3856 typedef rtx_def compare_type;
3857 static inline hashval_t hash (const value_type *);
3858 static inline bool equal (const value_type *, const compare_type *);
3859 static inline void remove (value_type *);
3860 };
3861
3862 inline bool
3863 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
3864 {
3865 return rtx_equal_p (p1, p2);
3866 }
3867
3868 inline hashval_t
3869 libcall_hasher::hash (const value_type *p1)
3870 {
3871 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
3872 }
3873
3874 typedef hash_table <libcall_hasher> libcall_table_type;
3875
3876 static void
3877 add_libcall (libcall_table_type htab, rtx libcall)
3878 {
3879 *htab.find_slot (libcall, INSERT) = libcall;
3880 }
3881
3882 static bool
3883 arm_libcall_uses_aapcs_base (const_rtx libcall)
3884 {
3885 static bool init_done = false;
3886 static libcall_table_type libcall_htab;
3887
3888 if (!init_done)
3889 {
3890 init_done = true;
3891
3892 libcall_htab.create (31);
3893 add_libcall (libcall_htab,
3894 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3895 add_libcall (libcall_htab,
3896 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3897 add_libcall (libcall_htab,
3898 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3899 add_libcall (libcall_htab,
3900 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3901
3902 add_libcall (libcall_htab,
3903 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3904 add_libcall (libcall_htab,
3905 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3906 add_libcall (libcall_htab,
3907 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3908 add_libcall (libcall_htab,
3909 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3910
3911 add_libcall (libcall_htab,
3912 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3913 add_libcall (libcall_htab,
3914 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3915 add_libcall (libcall_htab,
3916 convert_optab_libfunc (sfix_optab, SImode, DFmode));
3917 add_libcall (libcall_htab,
3918 convert_optab_libfunc (ufix_optab, SImode, DFmode));
3919 add_libcall (libcall_htab,
3920 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3921 add_libcall (libcall_htab,
3922 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3923 add_libcall (libcall_htab,
3924 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3925 add_libcall (libcall_htab,
3926 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3927
3928 /* Values from double-precision helper functions are returned in core
3929 registers if the selected core only supports single-precision
3930 arithmetic, even if we are using the hard-float ABI. The same is
3931 true for single-precision helpers, but we will never be using the
3932 hard-float ABI on a CPU which doesn't support single-precision
3933 operations in hardware. */
3934 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3935 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3936 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3937 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3938 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3939 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3940 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3941 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3942 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3943 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3944 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3945 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3946 SFmode));
3947 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3948 DFmode));
3949 }
3950
3951 return libcall && libcall_htab.find (libcall) != NULL;
3952 }
3953
3954 static rtx
3955 arm_libcall_value_1 (enum machine_mode mode)
3956 {
3957 if (TARGET_AAPCS_BASED)
3958 return aapcs_libcall_value (mode);
3959 else if (TARGET_IWMMXT_ABI
3960 && arm_vector_mode_supported_p (mode))
3961 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
3962 else
3963 return gen_rtx_REG (mode, ARG_REGISTER (1));
3964 }
3965
3966 /* Define how to find the value returned by a library function
3967 assuming the value has mode MODE. */
3968
3969 static rtx
3970 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3971 {
3972 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3973 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3974 {
3975 /* The following libcalls return their result in integer registers,
3976 even though they return a floating point value. */
3977 if (arm_libcall_uses_aapcs_base (libcall))
3978 return gen_rtx_REG (mode, ARG_REGISTER(1));
3979
3980 }
3981
3982 return arm_libcall_value_1 (mode);
3983 }
3984
3985 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
3986
3987 static bool
3988 arm_function_value_regno_p (const unsigned int regno)
3989 {
3990 if (regno == ARG_REGISTER (1)
3991 || (TARGET_32BIT
3992 && TARGET_AAPCS_BASED
3993 && TARGET_VFP
3994 && TARGET_HARD_FLOAT
3995 && regno == FIRST_VFP_REGNUM)
3996 || (TARGET_IWMMXT_ABI
3997 && regno == FIRST_IWMMXT_REGNUM))
3998 return true;
3999
4000 return false;
4001 }
4002
4003 /* Determine the amount of memory needed to store the possible return
4004 registers of an untyped call. */
4005 int
4006 arm_apply_result_size (void)
4007 {
4008 int size = 16;
4009
4010 if (TARGET_32BIT)
4011 {
4012 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4013 size += 32;
4014 if (TARGET_IWMMXT_ABI)
4015 size += 8;
4016 }
4017
4018 return size;
4019 }
4020
4021 /* Decide whether TYPE should be returned in memory (true)
4022 or in a register (false). FNTYPE is the type of the function making
4023 the call. */
4024 static bool
4025 arm_return_in_memory (const_tree type, const_tree fntype)
4026 {
4027 HOST_WIDE_INT size;
4028
4029 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4030
4031 if (TARGET_AAPCS_BASED)
4032 {
4033 /* Simple, non-aggregate types (ie not including vectors and
4034 complex) are always returned in a register (or registers).
4035 We don't care about which register here, so we can short-cut
4036 some of the detail. */
4037 if (!AGGREGATE_TYPE_P (type)
4038 && TREE_CODE (type) != VECTOR_TYPE
4039 && TREE_CODE (type) != COMPLEX_TYPE)
4040 return false;
4041
4042 /* Any return value that is no larger than one word can be
4043 returned in r0. */
4044 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4045 return false;
4046
4047 /* Check any available co-processors to see if they accept the
4048 type as a register candidate (VFP, for example, can return
4049 some aggregates in consecutive registers). These aren't
4050 available if the call is variadic. */
4051 if (aapcs_select_return_coproc (type, fntype) >= 0)
4052 return false;
4053
4054 /* Vector values should be returned using ARM registers, not
4055 memory (unless they're over 16 bytes, which will break since
4056 we only have four call-clobbered registers to play with). */
4057 if (TREE_CODE (type) == VECTOR_TYPE)
4058 return (size < 0 || size > (4 * UNITS_PER_WORD));
4059
4060 /* The rest go in memory. */
4061 return true;
4062 }
4063
4064 if (TREE_CODE (type) == VECTOR_TYPE)
4065 return (size < 0 || size > (4 * UNITS_PER_WORD));
4066
4067 if (!AGGREGATE_TYPE_P (type) &&
4068 (TREE_CODE (type) != VECTOR_TYPE))
4069 /* All simple types are returned in registers. */
4070 return false;
4071
4072 if (arm_abi != ARM_ABI_APCS)
4073 {
4074 /* ATPCS and later return aggregate types in memory only if they are
4075 larger than a word (or are variable size). */
4076 return (size < 0 || size > UNITS_PER_WORD);
4077 }
4078
4079 /* For the arm-wince targets we choose to be compatible with Microsoft's
4080 ARM and Thumb compilers, which always return aggregates in memory. */
4081 #ifndef ARM_WINCE
4082 /* All structures/unions bigger than one word are returned in memory.
4083 Also catch the case where int_size_in_bytes returns -1. In this case
4084 the aggregate is either huge or of variable size, and in either case
4085 we will want to return it via memory and not in a register. */
4086 if (size < 0 || size > UNITS_PER_WORD)
4087 return true;
4088
4089 if (TREE_CODE (type) == RECORD_TYPE)
4090 {
4091 tree field;
4092
4093 /* For a struct the APCS says that we only return in a register
4094 if the type is 'integer like' and every addressable element
4095 has an offset of zero. For practical purposes this means
4096 that the structure can have at most one non bit-field element
4097 and that this element must be the first one in the structure. */
4098
4099 /* Find the first field, ignoring non FIELD_DECL things which will
4100 have been created by C++. */
4101 for (field = TYPE_FIELDS (type);
4102 field && TREE_CODE (field) != FIELD_DECL;
4103 field = DECL_CHAIN (field))
4104 continue;
4105
4106 if (field == NULL)
4107 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4108
4109 /* Check that the first field is valid for returning in a register. */
4110
4111 /* ... Floats are not allowed */
4112 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4113 return true;
4114
4115 /* ... Aggregates that are not themselves valid for returning in
4116 a register are not allowed. */
4117 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4118 return true;
4119
4120 /* Now check the remaining fields, if any. Only bitfields are allowed,
4121 since they are not addressable. */
4122 for (field = DECL_CHAIN (field);
4123 field;
4124 field = DECL_CHAIN (field))
4125 {
4126 if (TREE_CODE (field) != FIELD_DECL)
4127 continue;
4128
4129 if (!DECL_BIT_FIELD_TYPE (field))
4130 return true;
4131 }
4132
4133 return false;
4134 }
4135
4136 if (TREE_CODE (type) == UNION_TYPE)
4137 {
4138 tree field;
4139
4140 /* Unions can be returned in registers if every element is
4141 integral, or can be returned in an integer register. */
4142 for (field = TYPE_FIELDS (type);
4143 field;
4144 field = DECL_CHAIN (field))
4145 {
4146 if (TREE_CODE (field) != FIELD_DECL)
4147 continue;
4148
4149 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4150 return true;
4151
4152 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4153 return true;
4154 }
4155
4156 return false;
4157 }
4158 #endif /* not ARM_WINCE */
4159
4160 /* Return all other types in memory. */
4161 return true;
4162 }
4163
4164 const struct pcs_attribute_arg
4165 {
4166 const char *arg;
4167 enum arm_pcs value;
4168 } pcs_attribute_args[] =
4169 {
4170 {"aapcs", ARM_PCS_AAPCS},
4171 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
4172 #if 0
4173 /* We could recognize these, but changes would be needed elsewhere
4174 * to implement them. */
4175 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
4176 {"atpcs", ARM_PCS_ATPCS},
4177 {"apcs", ARM_PCS_APCS},
4178 #endif
4179 {NULL, ARM_PCS_UNKNOWN}
4180 };
4181
4182 static enum arm_pcs
4183 arm_pcs_from_attribute (tree attr)
4184 {
4185 const struct pcs_attribute_arg *ptr;
4186 const char *arg;
4187
4188 /* Get the value of the argument. */
4189 if (TREE_VALUE (attr) == NULL_TREE
4190 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4191 return ARM_PCS_UNKNOWN;
4192
4193 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4194
4195 /* Check it against the list of known arguments. */
4196 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4197 if (streq (arg, ptr->arg))
4198 return ptr->value;
4199
4200 /* An unrecognized interrupt type. */
4201 return ARM_PCS_UNKNOWN;
4202 }
4203
4204 /* Get the PCS variant to use for this call. TYPE is the function's type
4205 specification, DECL is the specific declartion. DECL may be null if
4206 the call could be indirect or if this is a library call. */
4207 static enum arm_pcs
4208 arm_get_pcs_model (const_tree type, const_tree decl)
4209 {
4210 bool user_convention = false;
4211 enum arm_pcs user_pcs = arm_pcs_default;
4212 tree attr;
4213
4214 gcc_assert (type);
4215
4216 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4217 if (attr)
4218 {
4219 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4220 user_convention = true;
4221 }
4222
4223 if (TARGET_AAPCS_BASED)
4224 {
4225 /* Detect varargs functions. These always use the base rules
4226 (no argument is ever a candidate for a co-processor
4227 register). */
4228 bool base_rules = stdarg_p (type);
4229
4230 if (user_convention)
4231 {
4232 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4233 sorry ("non-AAPCS derived PCS variant");
4234 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4235 error ("variadic functions must use the base AAPCS variant");
4236 }
4237
4238 if (base_rules)
4239 return ARM_PCS_AAPCS;
4240 else if (user_convention)
4241 return user_pcs;
4242 else if (decl && flag_unit_at_a_time)
4243 {
4244 /* Local functions never leak outside this compilation unit,
4245 so we are free to use whatever conventions are
4246 appropriate. */
4247 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4248 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4249 if (i && i->local)
4250 return ARM_PCS_AAPCS_LOCAL;
4251 }
4252 }
4253 else if (user_convention && user_pcs != arm_pcs_default)
4254 sorry ("PCS variant");
4255
4256 /* For everything else we use the target's default. */
4257 return arm_pcs_default;
4258 }
4259
4260
4261 static void
4262 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4263 const_tree fntype ATTRIBUTE_UNUSED,
4264 rtx libcall ATTRIBUTE_UNUSED,
4265 const_tree fndecl ATTRIBUTE_UNUSED)
4266 {
4267 /* Record the unallocated VFP registers. */
4268 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4269 pcum->aapcs_vfp_reg_alloc = 0;
4270 }
4271
4272 /* Walk down the type tree of TYPE counting consecutive base elements.
4273 If *MODEP is VOIDmode, then set it to the first valid floating point
4274 type. If a non-floating point type is found, or if a floating point
4275 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4276 otherwise return the count in the sub-tree. */
4277 static int
4278 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4279 {
4280 enum machine_mode mode;
4281 HOST_WIDE_INT size;
4282
4283 switch (TREE_CODE (type))
4284 {
4285 case REAL_TYPE:
4286 mode = TYPE_MODE (type);
4287 if (mode != DFmode && mode != SFmode)
4288 return -1;
4289
4290 if (*modep == VOIDmode)
4291 *modep = mode;
4292
4293 if (*modep == mode)
4294 return 1;
4295
4296 break;
4297
4298 case COMPLEX_TYPE:
4299 mode = TYPE_MODE (TREE_TYPE (type));
4300 if (mode != DFmode && mode != SFmode)
4301 return -1;
4302
4303 if (*modep == VOIDmode)
4304 *modep = mode;
4305
4306 if (*modep == mode)
4307 return 2;
4308
4309 break;
4310
4311 case VECTOR_TYPE:
4312 /* Use V2SImode and V4SImode as representatives of all 64-bit
4313 and 128-bit vector types, whether or not those modes are
4314 supported with the present options. */
4315 size = int_size_in_bytes (type);
4316 switch (size)
4317 {
4318 case 8:
4319 mode = V2SImode;
4320 break;
4321 case 16:
4322 mode = V4SImode;
4323 break;
4324 default:
4325 return -1;
4326 }
4327
4328 if (*modep == VOIDmode)
4329 *modep = mode;
4330
4331 /* Vector modes are considered to be opaque: two vectors are
4332 equivalent for the purposes of being homogeneous aggregates
4333 if they are the same size. */
4334 if (*modep == mode)
4335 return 1;
4336
4337 break;
4338
4339 case ARRAY_TYPE:
4340 {
4341 int count;
4342 tree index = TYPE_DOMAIN (type);
4343
4344 /* Can't handle incomplete types. */
4345 if (!COMPLETE_TYPE_P (type))
4346 return -1;
4347
4348 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4349 if (count == -1
4350 || !index
4351 || !TYPE_MAX_VALUE (index)
4352 || !host_integerp (TYPE_MAX_VALUE (index), 1)
4353 || !TYPE_MIN_VALUE (index)
4354 || !host_integerp (TYPE_MIN_VALUE (index), 1)
4355 || count < 0)
4356 return -1;
4357
4358 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
4359 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4360
4361 /* There must be no padding. */
4362 if (!host_integerp (TYPE_SIZE (type), 1)
4363 || (tree_low_cst (TYPE_SIZE (type), 1)
4364 != count * GET_MODE_BITSIZE (*modep)))
4365 return -1;
4366
4367 return count;
4368 }
4369
4370 case RECORD_TYPE:
4371 {
4372 int count = 0;
4373 int sub_count;
4374 tree field;
4375
4376 /* Can't handle incomplete types. */
4377 if (!COMPLETE_TYPE_P (type))
4378 return -1;
4379
4380 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4381 {
4382 if (TREE_CODE (field) != FIELD_DECL)
4383 continue;
4384
4385 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4386 if (sub_count < 0)
4387 return -1;
4388 count += sub_count;
4389 }
4390
4391 /* There must be no padding. */
4392 if (!host_integerp (TYPE_SIZE (type), 1)
4393 || (tree_low_cst (TYPE_SIZE (type), 1)
4394 != count * GET_MODE_BITSIZE (*modep)))
4395 return -1;
4396
4397 return count;
4398 }
4399
4400 case UNION_TYPE:
4401 case QUAL_UNION_TYPE:
4402 {
4403 /* These aren't very interesting except in a degenerate case. */
4404 int count = 0;
4405 int sub_count;
4406 tree field;
4407
4408 /* Can't handle incomplete types. */
4409 if (!COMPLETE_TYPE_P (type))
4410 return -1;
4411
4412 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4413 {
4414 if (TREE_CODE (field) != FIELD_DECL)
4415 continue;
4416
4417 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4418 if (sub_count < 0)
4419 return -1;
4420 count = count > sub_count ? count : sub_count;
4421 }
4422
4423 /* There must be no padding. */
4424 if (!host_integerp (TYPE_SIZE (type), 1)
4425 || (tree_low_cst (TYPE_SIZE (type), 1)
4426 != count * GET_MODE_BITSIZE (*modep)))
4427 return -1;
4428
4429 return count;
4430 }
4431
4432 default:
4433 break;
4434 }
4435
4436 return -1;
4437 }
4438
4439 /* Return true if PCS_VARIANT should use VFP registers. */
4440 static bool
4441 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4442 {
4443 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4444 {
4445 static bool seen_thumb1_vfp = false;
4446
4447 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4448 {
4449 sorry ("Thumb-1 hard-float VFP ABI");
4450 /* sorry() is not immediately fatal, so only display this once. */
4451 seen_thumb1_vfp = true;
4452 }
4453
4454 return true;
4455 }
4456
4457 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4458 return false;
4459
4460 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4461 (TARGET_VFP_DOUBLE || !is_double));
4462 }
4463
4464 /* Return true if an argument whose type is TYPE, or mode is MODE, is
4465 suitable for passing or returning in VFP registers for the PCS
4466 variant selected. If it is, then *BASE_MODE is updated to contain
4467 a machine mode describing each element of the argument's type and
4468 *COUNT to hold the number of such elements. */
4469 static bool
4470 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4471 enum machine_mode mode, const_tree type,
4472 enum machine_mode *base_mode, int *count)
4473 {
4474 enum machine_mode new_mode = VOIDmode;
4475
4476 /* If we have the type information, prefer that to working things
4477 out from the mode. */
4478 if (type)
4479 {
4480 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4481
4482 if (ag_count > 0 && ag_count <= 4)
4483 *count = ag_count;
4484 else
4485 return false;
4486 }
4487 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
4488 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4489 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4490 {
4491 *count = 1;
4492 new_mode = mode;
4493 }
4494 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4495 {
4496 *count = 2;
4497 new_mode = (mode == DCmode ? DFmode : SFmode);
4498 }
4499 else
4500 return false;
4501
4502
4503 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4504 return false;
4505
4506 *base_mode = new_mode;
4507 return true;
4508 }
4509
4510 static bool
4511 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4512 enum machine_mode mode, const_tree type)
4513 {
4514 int count ATTRIBUTE_UNUSED;
4515 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4516
4517 if (!use_vfp_abi (pcs_variant, false))
4518 return false;
4519 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4520 &ag_mode, &count);
4521 }
4522
4523 static bool
4524 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4525 const_tree type)
4526 {
4527 if (!use_vfp_abi (pcum->pcs_variant, false))
4528 return false;
4529
4530 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4531 &pcum->aapcs_vfp_rmode,
4532 &pcum->aapcs_vfp_rcount);
4533 }
4534
4535 static bool
4536 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4537 const_tree type ATTRIBUTE_UNUSED)
4538 {
4539 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4540 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4541 int regno;
4542
4543 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4544 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4545 {
4546 pcum->aapcs_vfp_reg_alloc = mask << regno;
4547 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4548 {
4549 int i;
4550 int rcount = pcum->aapcs_vfp_rcount;
4551 int rshift = shift;
4552 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4553 rtx par;
4554 if (!TARGET_NEON)
4555 {
4556 /* Avoid using unsupported vector modes. */
4557 if (rmode == V2SImode)
4558 rmode = DImode;
4559 else if (rmode == V4SImode)
4560 {
4561 rmode = DImode;
4562 rcount *= 2;
4563 rshift /= 2;
4564 }
4565 }
4566 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4567 for (i = 0; i < rcount; i++)
4568 {
4569 rtx tmp = gen_rtx_REG (rmode,
4570 FIRST_VFP_REGNUM + regno + i * rshift);
4571 tmp = gen_rtx_EXPR_LIST
4572 (VOIDmode, tmp,
4573 GEN_INT (i * GET_MODE_SIZE (rmode)));
4574 XVECEXP (par, 0, i) = tmp;
4575 }
4576
4577 pcum->aapcs_reg = par;
4578 }
4579 else
4580 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4581 return true;
4582 }
4583 return false;
4584 }
4585
4586 static rtx
4587 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4588 enum machine_mode mode,
4589 const_tree type ATTRIBUTE_UNUSED)
4590 {
4591 if (!use_vfp_abi (pcs_variant, false))
4592 return NULL;
4593
4594 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4595 {
4596 int count;
4597 enum machine_mode ag_mode;
4598 int i;
4599 rtx par;
4600 int shift;
4601
4602 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4603 &ag_mode, &count);
4604
4605 if (!TARGET_NEON)
4606 {
4607 if (ag_mode == V2SImode)
4608 ag_mode = DImode;
4609 else if (ag_mode == V4SImode)
4610 {
4611 ag_mode = DImode;
4612 count *= 2;
4613 }
4614 }
4615 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4616 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4617 for (i = 0; i < count; i++)
4618 {
4619 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4620 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4621 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4622 XVECEXP (par, 0, i) = tmp;
4623 }
4624
4625 return par;
4626 }
4627
4628 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4629 }
4630
4631 static void
4632 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4633 enum machine_mode mode ATTRIBUTE_UNUSED,
4634 const_tree type ATTRIBUTE_UNUSED)
4635 {
4636 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4637 pcum->aapcs_vfp_reg_alloc = 0;
4638 return;
4639 }
4640
4641 #define AAPCS_CP(X) \
4642 { \
4643 aapcs_ ## X ## _cum_init, \
4644 aapcs_ ## X ## _is_call_candidate, \
4645 aapcs_ ## X ## _allocate, \
4646 aapcs_ ## X ## _is_return_candidate, \
4647 aapcs_ ## X ## _allocate_return_reg, \
4648 aapcs_ ## X ## _advance \
4649 }
4650
4651 /* Table of co-processors that can be used to pass arguments in
4652 registers. Idealy no arugment should be a candidate for more than
4653 one co-processor table entry, but the table is processed in order
4654 and stops after the first match. If that entry then fails to put
4655 the argument into a co-processor register, the argument will go on
4656 the stack. */
4657 static struct
4658 {
4659 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4660 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4661
4662 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4663 BLKmode) is a candidate for this co-processor's registers; this
4664 function should ignore any position-dependent state in
4665 CUMULATIVE_ARGS and only use call-type dependent information. */
4666 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4667
4668 /* Return true if the argument does get a co-processor register; it
4669 should set aapcs_reg to an RTX of the register allocated as is
4670 required for a return from FUNCTION_ARG. */
4671 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4672
4673 /* Return true if a result of mode MODE (or type TYPE if MODE is
4674 BLKmode) is can be returned in this co-processor's registers. */
4675 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4676
4677 /* Allocate and return an RTX element to hold the return type of a
4678 call, this routine must not fail and will only be called if
4679 is_return_candidate returned true with the same parameters. */
4680 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4681
4682 /* Finish processing this argument and prepare to start processing
4683 the next one. */
4684 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4685 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4686 {
4687 AAPCS_CP(vfp)
4688 };
4689
4690 #undef AAPCS_CP
4691
4692 static int
4693 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4694 const_tree type)
4695 {
4696 int i;
4697
4698 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4699 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4700 return i;
4701
4702 return -1;
4703 }
4704
4705 static int
4706 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4707 {
4708 /* We aren't passed a decl, so we can't check that a call is local.
4709 However, it isn't clear that that would be a win anyway, since it
4710 might limit some tail-calling opportunities. */
4711 enum arm_pcs pcs_variant;
4712
4713 if (fntype)
4714 {
4715 const_tree fndecl = NULL_TREE;
4716
4717 if (TREE_CODE (fntype) == FUNCTION_DECL)
4718 {
4719 fndecl = fntype;
4720 fntype = TREE_TYPE (fntype);
4721 }
4722
4723 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4724 }
4725 else
4726 pcs_variant = arm_pcs_default;
4727
4728 if (pcs_variant != ARM_PCS_AAPCS)
4729 {
4730 int i;
4731
4732 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4733 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4734 TYPE_MODE (type),
4735 type))
4736 return i;
4737 }
4738 return -1;
4739 }
4740
4741 static rtx
4742 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4743 const_tree fntype)
4744 {
4745 /* We aren't passed a decl, so we can't check that a call is local.
4746 However, it isn't clear that that would be a win anyway, since it
4747 might limit some tail-calling opportunities. */
4748 enum arm_pcs pcs_variant;
4749 int unsignedp ATTRIBUTE_UNUSED;
4750
4751 if (fntype)
4752 {
4753 const_tree fndecl = NULL_TREE;
4754
4755 if (TREE_CODE (fntype) == FUNCTION_DECL)
4756 {
4757 fndecl = fntype;
4758 fntype = TREE_TYPE (fntype);
4759 }
4760
4761 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4762 }
4763 else
4764 pcs_variant = arm_pcs_default;
4765
4766 /* Promote integer types. */
4767 if (type && INTEGRAL_TYPE_P (type))
4768 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4769
4770 if (pcs_variant != ARM_PCS_AAPCS)
4771 {
4772 int i;
4773
4774 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4775 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4776 type))
4777 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4778 mode, type);
4779 }
4780
4781 /* Promotes small structs returned in a register to full-word size
4782 for big-endian AAPCS. */
4783 if (type && arm_return_in_msb (type))
4784 {
4785 HOST_WIDE_INT size = int_size_in_bytes (type);
4786 if (size % UNITS_PER_WORD != 0)
4787 {
4788 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4789 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4790 }
4791 }
4792
4793 return gen_rtx_REG (mode, R0_REGNUM);
4794 }
4795
4796 static rtx
4797 aapcs_libcall_value (enum machine_mode mode)
4798 {
4799 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
4800 && GET_MODE_SIZE (mode) <= 4)
4801 mode = SImode;
4802
4803 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4804 }
4805
4806 /* Lay out a function argument using the AAPCS rules. The rule
4807 numbers referred to here are those in the AAPCS. */
4808 static void
4809 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4810 const_tree type, bool named)
4811 {
4812 int nregs, nregs2;
4813 int ncrn;
4814
4815 /* We only need to do this once per argument. */
4816 if (pcum->aapcs_arg_processed)
4817 return;
4818
4819 pcum->aapcs_arg_processed = true;
4820
4821 /* Special case: if named is false then we are handling an incoming
4822 anonymous argument which is on the stack. */
4823 if (!named)
4824 return;
4825
4826 /* Is this a potential co-processor register candidate? */
4827 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4828 {
4829 int slot = aapcs_select_call_coproc (pcum, mode, type);
4830 pcum->aapcs_cprc_slot = slot;
4831
4832 /* We don't have to apply any of the rules from part B of the
4833 preparation phase, these are handled elsewhere in the
4834 compiler. */
4835
4836 if (slot >= 0)
4837 {
4838 /* A Co-processor register candidate goes either in its own
4839 class of registers or on the stack. */
4840 if (!pcum->aapcs_cprc_failed[slot])
4841 {
4842 /* C1.cp - Try to allocate the argument to co-processor
4843 registers. */
4844 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4845 return;
4846
4847 /* C2.cp - Put the argument on the stack and note that we
4848 can't assign any more candidates in this slot. We also
4849 need to note that we have allocated stack space, so that
4850 we won't later try to split a non-cprc candidate between
4851 core registers and the stack. */
4852 pcum->aapcs_cprc_failed[slot] = true;
4853 pcum->can_split = false;
4854 }
4855
4856 /* We didn't get a register, so this argument goes on the
4857 stack. */
4858 gcc_assert (pcum->can_split == false);
4859 return;
4860 }
4861 }
4862
4863 /* C3 - For double-word aligned arguments, round the NCRN up to the
4864 next even number. */
4865 ncrn = pcum->aapcs_ncrn;
4866 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4867 ncrn++;
4868
4869 nregs = ARM_NUM_REGS2(mode, type);
4870
4871 /* Sigh, this test should really assert that nregs > 0, but a GCC
4872 extension allows empty structs and then gives them empty size; it
4873 then allows such a structure to be passed by value. For some of
4874 the code below we have to pretend that such an argument has
4875 non-zero size so that we 'locate' it correctly either in
4876 registers or on the stack. */
4877 gcc_assert (nregs >= 0);
4878
4879 nregs2 = nregs ? nregs : 1;
4880
4881 /* C4 - Argument fits entirely in core registers. */
4882 if (ncrn + nregs2 <= NUM_ARG_REGS)
4883 {
4884 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4885 pcum->aapcs_next_ncrn = ncrn + nregs;
4886 return;
4887 }
4888
4889 /* C5 - Some core registers left and there are no arguments already
4890 on the stack: split this argument between the remaining core
4891 registers and the stack. */
4892 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4893 {
4894 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4895 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4896 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4897 return;
4898 }
4899
4900 /* C6 - NCRN is set to 4. */
4901 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4902
4903 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4904 return;
4905 }
4906
4907 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4908 for a call to a function whose data type is FNTYPE.
4909 For a library call, FNTYPE is NULL. */
4910 void
4911 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4912 rtx libname,
4913 tree fndecl ATTRIBUTE_UNUSED)
4914 {
4915 /* Long call handling. */
4916 if (fntype)
4917 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4918 else
4919 pcum->pcs_variant = arm_pcs_default;
4920
4921 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4922 {
4923 if (arm_libcall_uses_aapcs_base (libname))
4924 pcum->pcs_variant = ARM_PCS_AAPCS;
4925
4926 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4927 pcum->aapcs_reg = NULL_RTX;
4928 pcum->aapcs_partial = 0;
4929 pcum->aapcs_arg_processed = false;
4930 pcum->aapcs_cprc_slot = -1;
4931 pcum->can_split = true;
4932
4933 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4934 {
4935 int i;
4936
4937 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4938 {
4939 pcum->aapcs_cprc_failed[i] = false;
4940 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4941 }
4942 }
4943 return;
4944 }
4945
4946 /* Legacy ABIs */
4947
4948 /* On the ARM, the offset starts at 0. */
4949 pcum->nregs = 0;
4950 pcum->iwmmxt_nregs = 0;
4951 pcum->can_split = true;
4952
4953 /* Varargs vectors are treated the same as long long.
4954 named_count avoids having to change the way arm handles 'named' */
4955 pcum->named_count = 0;
4956 pcum->nargs = 0;
4957
4958 if (TARGET_REALLY_IWMMXT && fntype)
4959 {
4960 tree fn_arg;
4961
4962 for (fn_arg = TYPE_ARG_TYPES (fntype);
4963 fn_arg;
4964 fn_arg = TREE_CHAIN (fn_arg))
4965 pcum->named_count += 1;
4966
4967 if (! pcum->named_count)
4968 pcum->named_count = INT_MAX;
4969 }
4970 }
4971
4972
4973 /* Return true if mode/type need doubleword alignment. */
4974 static bool
4975 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4976 {
4977 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4978 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4979 }
4980
4981
4982 /* Determine where to put an argument to a function.
4983 Value is zero to push the argument on the stack,
4984 or a hard register in which to store the argument.
4985
4986 MODE is the argument's machine mode.
4987 TYPE is the data type of the argument (as a tree).
4988 This is null for libcalls where that information may
4989 not be available.
4990 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4991 the preceding args and about the function being called.
4992 NAMED is nonzero if this argument is a named parameter
4993 (otherwise it is an extra parameter matching an ellipsis).
4994
4995 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4996 other arguments are passed on the stack. If (NAMED == 0) (which happens
4997 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4998 defined), say it is passed in the stack (function_prologue will
4999 indeed make it pass in the stack if necessary). */
5000
5001 static rtx
5002 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
5003 const_tree type, bool named)
5004 {
5005 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5006 int nregs;
5007
5008 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5009 a call insn (op3 of a call_value insn). */
5010 if (mode == VOIDmode)
5011 return const0_rtx;
5012
5013 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5014 {
5015 aapcs_layout_arg (pcum, mode, type, named);
5016 return pcum->aapcs_reg;
5017 }
5018
5019 /* Varargs vectors are treated the same as long long.
5020 named_count avoids having to change the way arm handles 'named' */
5021 if (TARGET_IWMMXT_ABI
5022 && arm_vector_mode_supported_p (mode)
5023 && pcum->named_count > pcum->nargs + 1)
5024 {
5025 if (pcum->iwmmxt_nregs <= 9)
5026 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5027 else
5028 {
5029 pcum->can_split = false;
5030 return NULL_RTX;
5031 }
5032 }
5033
5034 /* Put doubleword aligned quantities in even register pairs. */
5035 if (pcum->nregs & 1
5036 && ARM_DOUBLEWORD_ALIGN
5037 && arm_needs_doubleword_align (mode, type))
5038 pcum->nregs++;
5039
5040 /* Only allow splitting an arg between regs and memory if all preceding
5041 args were allocated to regs. For args passed by reference we only count
5042 the reference pointer. */
5043 if (pcum->can_split)
5044 nregs = 1;
5045 else
5046 nregs = ARM_NUM_REGS2 (mode, type);
5047
5048 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5049 return NULL_RTX;
5050
5051 return gen_rtx_REG (mode, pcum->nregs);
5052 }
5053
5054 static unsigned int
5055 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
5056 {
5057 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5058 ? DOUBLEWORD_ALIGNMENT
5059 : PARM_BOUNDARY);
5060 }
5061
5062 static int
5063 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
5064 tree type, bool named)
5065 {
5066 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5067 int nregs = pcum->nregs;
5068
5069 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5070 {
5071 aapcs_layout_arg (pcum, mode, type, named);
5072 return pcum->aapcs_partial;
5073 }
5074
5075 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
5076 return 0;
5077
5078 if (NUM_ARG_REGS > nregs
5079 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
5080 && pcum->can_split)
5081 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
5082
5083 return 0;
5084 }
5085
5086 /* Update the data in PCUM to advance over an argument
5087 of mode MODE and data type TYPE.
5088 (TYPE is null for libcalls where that information may not be available.) */
5089
5090 static void
5091 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
5092 const_tree type, bool named)
5093 {
5094 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5095
5096 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5097 {
5098 aapcs_layout_arg (pcum, mode, type, named);
5099
5100 if (pcum->aapcs_cprc_slot >= 0)
5101 {
5102 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
5103 type);
5104 pcum->aapcs_cprc_slot = -1;
5105 }
5106
5107 /* Generic stuff. */
5108 pcum->aapcs_arg_processed = false;
5109 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
5110 pcum->aapcs_reg = NULL_RTX;
5111 pcum->aapcs_partial = 0;
5112 }
5113 else
5114 {
5115 pcum->nargs += 1;
5116 if (arm_vector_mode_supported_p (mode)
5117 && pcum->named_count > pcum->nargs
5118 && TARGET_IWMMXT_ABI)
5119 pcum->iwmmxt_nregs += 1;
5120 else
5121 pcum->nregs += ARM_NUM_REGS2 (mode, type);
5122 }
5123 }
5124
5125 /* Variable sized types are passed by reference. This is a GCC
5126 extension to the ARM ABI. */
5127
5128 static bool
5129 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
5130 enum machine_mode mode ATTRIBUTE_UNUSED,
5131 const_tree type, bool named ATTRIBUTE_UNUSED)
5132 {
5133 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
5134 }
5135 \f
5136 /* Encode the current state of the #pragma [no_]long_calls. */
5137 typedef enum
5138 {
5139 OFF, /* No #pragma [no_]long_calls is in effect. */
5140 LONG, /* #pragma long_calls is in effect. */
5141 SHORT /* #pragma no_long_calls is in effect. */
5142 } arm_pragma_enum;
5143
5144 static arm_pragma_enum arm_pragma_long_calls = OFF;
5145
5146 void
5147 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5148 {
5149 arm_pragma_long_calls = LONG;
5150 }
5151
5152 void
5153 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5154 {
5155 arm_pragma_long_calls = SHORT;
5156 }
5157
5158 void
5159 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5160 {
5161 arm_pragma_long_calls = OFF;
5162 }
5163 \f
5164 /* Handle an attribute requiring a FUNCTION_DECL;
5165 arguments as in struct attribute_spec.handler. */
5166 static tree
5167 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
5168 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5169 {
5170 if (TREE_CODE (*node) != FUNCTION_DECL)
5171 {
5172 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5173 name);
5174 *no_add_attrs = true;
5175 }
5176
5177 return NULL_TREE;
5178 }
5179
5180 /* Handle an "interrupt" or "isr" attribute;
5181 arguments as in struct attribute_spec.handler. */
5182 static tree
5183 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
5184 bool *no_add_attrs)
5185 {
5186 if (DECL_P (*node))
5187 {
5188 if (TREE_CODE (*node) != FUNCTION_DECL)
5189 {
5190 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5191 name);
5192 *no_add_attrs = true;
5193 }
5194 /* FIXME: the argument if any is checked for type attributes;
5195 should it be checked for decl ones? */
5196 }
5197 else
5198 {
5199 if (TREE_CODE (*node) == FUNCTION_TYPE
5200 || TREE_CODE (*node) == METHOD_TYPE)
5201 {
5202 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5203 {
5204 warning (OPT_Wattributes, "%qE attribute ignored",
5205 name);
5206 *no_add_attrs = true;
5207 }
5208 }
5209 else if (TREE_CODE (*node) == POINTER_TYPE
5210 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5211 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
5212 && arm_isr_value (args) != ARM_FT_UNKNOWN)
5213 {
5214 *node = build_variant_type_copy (*node);
5215 TREE_TYPE (*node) = build_type_attribute_variant
5216 (TREE_TYPE (*node),
5217 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
5218 *no_add_attrs = true;
5219 }
5220 else
5221 {
5222 /* Possibly pass this attribute on from the type to a decl. */
5223 if (flags & ((int) ATTR_FLAG_DECL_NEXT
5224 | (int) ATTR_FLAG_FUNCTION_NEXT
5225 | (int) ATTR_FLAG_ARRAY_NEXT))
5226 {
5227 *no_add_attrs = true;
5228 return tree_cons (name, args, NULL_TREE);
5229 }
5230 else
5231 {
5232 warning (OPT_Wattributes, "%qE attribute ignored",
5233 name);
5234 }
5235 }
5236 }
5237
5238 return NULL_TREE;
5239 }
5240
5241 /* Handle a "pcs" attribute; arguments as in struct
5242 attribute_spec.handler. */
5243 static tree
5244 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5245 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5246 {
5247 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5248 {
5249 warning (OPT_Wattributes, "%qE attribute ignored", name);
5250 *no_add_attrs = true;
5251 }
5252 return NULL_TREE;
5253 }
5254
5255 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5256 /* Handle the "notshared" attribute. This attribute is another way of
5257 requesting hidden visibility. ARM's compiler supports
5258 "__declspec(notshared)"; we support the same thing via an
5259 attribute. */
5260
5261 static tree
5262 arm_handle_notshared_attribute (tree *node,
5263 tree name ATTRIBUTE_UNUSED,
5264 tree args ATTRIBUTE_UNUSED,
5265 int flags ATTRIBUTE_UNUSED,
5266 bool *no_add_attrs)
5267 {
5268 tree decl = TYPE_NAME (*node);
5269
5270 if (decl)
5271 {
5272 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5273 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5274 *no_add_attrs = false;
5275 }
5276 return NULL_TREE;
5277 }
5278 #endif
5279
5280 /* Return 0 if the attributes for two types are incompatible, 1 if they
5281 are compatible, and 2 if they are nearly compatible (which causes a
5282 warning to be generated). */
5283 static int
5284 arm_comp_type_attributes (const_tree type1, const_tree type2)
5285 {
5286 int l1, l2, s1, s2;
5287
5288 /* Check for mismatch of non-default calling convention. */
5289 if (TREE_CODE (type1) != FUNCTION_TYPE)
5290 return 1;
5291
5292 /* Check for mismatched call attributes. */
5293 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5294 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5295 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5296 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5297
5298 /* Only bother to check if an attribute is defined. */
5299 if (l1 | l2 | s1 | s2)
5300 {
5301 /* If one type has an attribute, the other must have the same attribute. */
5302 if ((l1 != l2) || (s1 != s2))
5303 return 0;
5304
5305 /* Disallow mixed attributes. */
5306 if ((l1 & s2) || (l2 & s1))
5307 return 0;
5308 }
5309
5310 /* Check for mismatched ISR attribute. */
5311 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5312 if (! l1)
5313 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5314 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5315 if (! l2)
5316 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5317 if (l1 != l2)
5318 return 0;
5319
5320 return 1;
5321 }
5322
5323 /* Assigns default attributes to newly defined type. This is used to
5324 set short_call/long_call attributes for function types of
5325 functions defined inside corresponding #pragma scopes. */
5326 static void
5327 arm_set_default_type_attributes (tree type)
5328 {
5329 /* Add __attribute__ ((long_call)) to all functions, when
5330 inside #pragma long_calls or __attribute__ ((short_call)),
5331 when inside #pragma no_long_calls. */
5332 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5333 {
5334 tree type_attr_list, attr_name;
5335 type_attr_list = TYPE_ATTRIBUTES (type);
5336
5337 if (arm_pragma_long_calls == LONG)
5338 attr_name = get_identifier ("long_call");
5339 else if (arm_pragma_long_calls == SHORT)
5340 attr_name = get_identifier ("short_call");
5341 else
5342 return;
5343
5344 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5345 TYPE_ATTRIBUTES (type) = type_attr_list;
5346 }
5347 }
5348 \f
5349 /* Return true if DECL is known to be linked into section SECTION. */
5350
5351 static bool
5352 arm_function_in_section_p (tree decl, section *section)
5353 {
5354 /* We can only be certain about functions defined in the same
5355 compilation unit. */
5356 if (!TREE_STATIC (decl))
5357 return false;
5358
5359 /* Make sure that SYMBOL always binds to the definition in this
5360 compilation unit. */
5361 if (!targetm.binds_local_p (decl))
5362 return false;
5363
5364 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5365 if (!DECL_SECTION_NAME (decl))
5366 {
5367 /* Make sure that we will not create a unique section for DECL. */
5368 if (flag_function_sections || DECL_ONE_ONLY (decl))
5369 return false;
5370 }
5371
5372 return function_section (decl) == section;
5373 }
5374
5375 /* Return nonzero if a 32-bit "long_call" should be generated for
5376 a call from the current function to DECL. We generate a long_call
5377 if the function:
5378
5379 a. has an __attribute__((long call))
5380 or b. is within the scope of a #pragma long_calls
5381 or c. the -mlong-calls command line switch has been specified
5382
5383 However we do not generate a long call if the function:
5384
5385 d. has an __attribute__ ((short_call))
5386 or e. is inside the scope of a #pragma no_long_calls
5387 or f. is defined in the same section as the current function. */
5388
5389 bool
5390 arm_is_long_call_p (tree decl)
5391 {
5392 tree attrs;
5393
5394 if (!decl)
5395 return TARGET_LONG_CALLS;
5396
5397 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5398 if (lookup_attribute ("short_call", attrs))
5399 return false;
5400
5401 /* For "f", be conservative, and only cater for cases in which the
5402 whole of the current function is placed in the same section. */
5403 if (!flag_reorder_blocks_and_partition
5404 && TREE_CODE (decl) == FUNCTION_DECL
5405 && arm_function_in_section_p (decl, current_function_section ()))
5406 return false;
5407
5408 if (lookup_attribute ("long_call", attrs))
5409 return true;
5410
5411 return TARGET_LONG_CALLS;
5412 }
5413
5414 /* Return nonzero if it is ok to make a tail-call to DECL. */
5415 static bool
5416 arm_function_ok_for_sibcall (tree decl, tree exp)
5417 {
5418 unsigned long func_type;
5419
5420 if (cfun->machine->sibcall_blocked)
5421 return false;
5422
5423 /* Never tailcall something if we are generating code for Thumb-1. */
5424 if (TARGET_THUMB1)
5425 return false;
5426
5427 /* The PIC register is live on entry to VxWorks PLT entries, so we
5428 must make the call before restoring the PIC register. */
5429 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5430 return false;
5431
5432 /* Cannot tail-call to long calls, since these are out of range of
5433 a branch instruction. */
5434 if (decl && arm_is_long_call_p (decl))
5435 return false;
5436
5437 /* If we are interworking and the function is not declared static
5438 then we can't tail-call it unless we know that it exists in this
5439 compilation unit (since it might be a Thumb routine). */
5440 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
5441 && !TREE_ASM_WRITTEN (decl))
5442 return false;
5443
5444 func_type = arm_current_func_type ();
5445 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5446 if (IS_INTERRUPT (func_type))
5447 return false;
5448
5449 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5450 {
5451 /* Check that the return value locations are the same. For
5452 example that we aren't returning a value from the sibling in
5453 a VFP register but then need to transfer it to a core
5454 register. */
5455 rtx a, b;
5456
5457 a = arm_function_value (TREE_TYPE (exp), decl, false);
5458 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5459 cfun->decl, false);
5460 if (!rtx_equal_p (a, b))
5461 return false;
5462 }
5463
5464 /* Never tailcall if function may be called with a misaligned SP. */
5465 if (IS_STACKALIGN (func_type))
5466 return false;
5467
5468 /* The AAPCS says that, on bare-metal, calls to unresolved weak
5469 references should become a NOP. Don't convert such calls into
5470 sibling calls. */
5471 if (TARGET_AAPCS_BASED
5472 && arm_abi == ARM_ABI_AAPCS
5473 && decl
5474 && DECL_WEAK (decl))
5475 return false;
5476
5477 /* Everything else is ok. */
5478 return true;
5479 }
5480
5481 \f
5482 /* Addressing mode support functions. */
5483
5484 /* Return nonzero if X is a legitimate immediate operand when compiling
5485 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5486 int
5487 legitimate_pic_operand_p (rtx x)
5488 {
5489 if (GET_CODE (x) == SYMBOL_REF
5490 || (GET_CODE (x) == CONST
5491 && GET_CODE (XEXP (x, 0)) == PLUS
5492 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5493 return 0;
5494
5495 return 1;
5496 }
5497
5498 /* Record that the current function needs a PIC register. Initialize
5499 cfun->machine->pic_reg if we have not already done so. */
5500
5501 static void
5502 require_pic_register (void)
5503 {
5504 /* A lot of the logic here is made obscure by the fact that this
5505 routine gets called as part of the rtx cost estimation process.
5506 We don't want those calls to affect any assumptions about the real
5507 function; and further, we can't call entry_of_function() until we
5508 start the real expansion process. */
5509 if (!crtl->uses_pic_offset_table)
5510 {
5511 gcc_assert (can_create_pseudo_p ());
5512 if (arm_pic_register != INVALID_REGNUM)
5513 {
5514 if (!cfun->machine->pic_reg)
5515 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5516
5517 /* Play games to avoid marking the function as needing pic
5518 if we are being called as part of the cost-estimation
5519 process. */
5520 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5521 crtl->uses_pic_offset_table = 1;
5522 }
5523 else
5524 {
5525 rtx seq, insn;
5526
5527 if (!cfun->machine->pic_reg)
5528 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5529
5530 /* Play games to avoid marking the function as needing pic
5531 if we are being called as part of the cost-estimation
5532 process. */
5533 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5534 {
5535 crtl->uses_pic_offset_table = 1;
5536 start_sequence ();
5537
5538 arm_load_pic_register (0UL);
5539
5540 seq = get_insns ();
5541 end_sequence ();
5542
5543 for (insn = seq; insn; insn = NEXT_INSN (insn))
5544 if (INSN_P (insn))
5545 INSN_LOCATION (insn) = prologue_location;
5546
5547 /* We can be called during expansion of PHI nodes, where
5548 we can't yet emit instructions directly in the final
5549 insn stream. Queue the insns on the entry edge, they will
5550 be committed after everything else is expanded. */
5551 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5552 }
5553 }
5554 }
5555 }
5556
5557 rtx
5558 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5559 {
5560 if (GET_CODE (orig) == SYMBOL_REF
5561 || GET_CODE (orig) == LABEL_REF)
5562 {
5563 rtx insn;
5564
5565 if (reg == 0)
5566 {
5567 gcc_assert (can_create_pseudo_p ());
5568 reg = gen_reg_rtx (Pmode);
5569 }
5570
5571 /* VxWorks does not impose a fixed gap between segments; the run-time
5572 gap can be different from the object-file gap. We therefore can't
5573 use GOTOFF unless we are absolutely sure that the symbol is in the
5574 same segment as the GOT. Unfortunately, the flexibility of linker
5575 scripts means that we can't be sure of that in general, so assume
5576 that GOTOFF is never valid on VxWorks. */
5577 if ((GET_CODE (orig) == LABEL_REF
5578 || (GET_CODE (orig) == SYMBOL_REF &&
5579 SYMBOL_REF_LOCAL_P (orig)))
5580 && NEED_GOT_RELOC
5581 && !TARGET_VXWORKS_RTP)
5582 insn = arm_pic_static_addr (orig, reg);
5583 else
5584 {
5585 rtx pat;
5586 rtx mem;
5587
5588 /* If this function doesn't have a pic register, create one now. */
5589 require_pic_register ();
5590
5591 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5592
5593 /* Make the MEM as close to a constant as possible. */
5594 mem = SET_SRC (pat);
5595 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5596 MEM_READONLY_P (mem) = 1;
5597 MEM_NOTRAP_P (mem) = 1;
5598
5599 insn = emit_insn (pat);
5600 }
5601
5602 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5603 by loop. */
5604 set_unique_reg_note (insn, REG_EQUAL, orig);
5605
5606 return reg;
5607 }
5608 else if (GET_CODE (orig) == CONST)
5609 {
5610 rtx base, offset;
5611
5612 if (GET_CODE (XEXP (orig, 0)) == PLUS
5613 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5614 return orig;
5615
5616 /* Handle the case where we have: const (UNSPEC_TLS). */
5617 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5618 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5619 return orig;
5620
5621 /* Handle the case where we have:
5622 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5623 CONST_INT. */
5624 if (GET_CODE (XEXP (orig, 0)) == PLUS
5625 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5626 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5627 {
5628 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
5629 return orig;
5630 }
5631
5632 if (reg == 0)
5633 {
5634 gcc_assert (can_create_pseudo_p ());
5635 reg = gen_reg_rtx (Pmode);
5636 }
5637
5638 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5639
5640 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5641 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5642 base == reg ? 0 : reg);
5643
5644 if (CONST_INT_P (offset))
5645 {
5646 /* The base register doesn't really matter, we only want to
5647 test the index for the appropriate mode. */
5648 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5649 {
5650 gcc_assert (can_create_pseudo_p ());
5651 offset = force_reg (Pmode, offset);
5652 }
5653
5654 if (CONST_INT_P (offset))
5655 return plus_constant (Pmode, base, INTVAL (offset));
5656 }
5657
5658 if (GET_MODE_SIZE (mode) > 4
5659 && (GET_MODE_CLASS (mode) == MODE_INT
5660 || TARGET_SOFT_FLOAT))
5661 {
5662 emit_insn (gen_addsi3 (reg, base, offset));
5663 return reg;
5664 }
5665
5666 return gen_rtx_PLUS (Pmode, base, offset);
5667 }
5668
5669 return orig;
5670 }
5671
5672
5673 /* Find a spare register to use during the prolog of a function. */
5674
5675 static int
5676 thumb_find_work_register (unsigned long pushed_regs_mask)
5677 {
5678 int reg;
5679
5680 /* Check the argument registers first as these are call-used. The
5681 register allocation order means that sometimes r3 might be used
5682 but earlier argument registers might not, so check them all. */
5683 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5684 if (!df_regs_ever_live_p (reg))
5685 return reg;
5686
5687 /* Before going on to check the call-saved registers we can try a couple
5688 more ways of deducing that r3 is available. The first is when we are
5689 pushing anonymous arguments onto the stack and we have less than 4
5690 registers worth of fixed arguments(*). In this case r3 will be part of
5691 the variable argument list and so we can be sure that it will be
5692 pushed right at the start of the function. Hence it will be available
5693 for the rest of the prologue.
5694 (*): ie crtl->args.pretend_args_size is greater than 0. */
5695 if (cfun->machine->uses_anonymous_args
5696 && crtl->args.pretend_args_size > 0)
5697 return LAST_ARG_REGNUM;
5698
5699 /* The other case is when we have fixed arguments but less than 4 registers
5700 worth. In this case r3 might be used in the body of the function, but
5701 it is not being used to convey an argument into the function. In theory
5702 we could just check crtl->args.size to see how many bytes are
5703 being passed in argument registers, but it seems that it is unreliable.
5704 Sometimes it will have the value 0 when in fact arguments are being
5705 passed. (See testcase execute/20021111-1.c for an example). So we also
5706 check the args_info.nregs field as well. The problem with this field is
5707 that it makes no allowances for arguments that are passed to the
5708 function but which are not used. Hence we could miss an opportunity
5709 when a function has an unused argument in r3. But it is better to be
5710 safe than to be sorry. */
5711 if (! cfun->machine->uses_anonymous_args
5712 && crtl->args.size >= 0
5713 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5714 && (TARGET_AAPCS_BASED
5715 ? crtl->args.info.aapcs_ncrn < 4
5716 : crtl->args.info.nregs < 4))
5717 return LAST_ARG_REGNUM;
5718
5719 /* Otherwise look for a call-saved register that is going to be pushed. */
5720 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5721 if (pushed_regs_mask & (1 << reg))
5722 return reg;
5723
5724 if (TARGET_THUMB2)
5725 {
5726 /* Thumb-2 can use high regs. */
5727 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5728 if (pushed_regs_mask & (1 << reg))
5729 return reg;
5730 }
5731 /* Something went wrong - thumb_compute_save_reg_mask()
5732 should have arranged for a suitable register to be pushed. */
5733 gcc_unreachable ();
5734 }
5735
5736 static GTY(()) int pic_labelno;
5737
5738 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5739 low register. */
5740
5741 void
5742 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5743 {
5744 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5745
5746 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5747 return;
5748
5749 gcc_assert (flag_pic);
5750
5751 pic_reg = cfun->machine->pic_reg;
5752 if (TARGET_VXWORKS_RTP)
5753 {
5754 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5755 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5756 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5757
5758 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5759
5760 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5761 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5762 }
5763 else
5764 {
5765 /* We use an UNSPEC rather than a LABEL_REF because this label
5766 never appears in the code stream. */
5767
5768 labelno = GEN_INT (pic_labelno++);
5769 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5770 l1 = gen_rtx_CONST (VOIDmode, l1);
5771
5772 /* On the ARM the PC register contains 'dot + 8' at the time of the
5773 addition, on the Thumb it is 'dot + 4'. */
5774 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5775 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5776 UNSPEC_GOTSYM_OFF);
5777 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5778
5779 if (TARGET_32BIT)
5780 {
5781 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5782 }
5783 else /* TARGET_THUMB1 */
5784 {
5785 if (arm_pic_register != INVALID_REGNUM
5786 && REGNO (pic_reg) > LAST_LO_REGNUM)
5787 {
5788 /* We will have pushed the pic register, so we should always be
5789 able to find a work register. */
5790 pic_tmp = gen_rtx_REG (SImode,
5791 thumb_find_work_register (saved_regs));
5792 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5793 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5794 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5795 }
5796 else
5797 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5798 }
5799 }
5800
5801 /* Need to emit this whether or not we obey regdecls,
5802 since setjmp/longjmp can cause life info to screw up. */
5803 emit_use (pic_reg);
5804 }
5805
5806 /* Generate code to load the address of a static var when flag_pic is set. */
5807 static rtx
5808 arm_pic_static_addr (rtx orig, rtx reg)
5809 {
5810 rtx l1, labelno, offset_rtx, insn;
5811
5812 gcc_assert (flag_pic);
5813
5814 /* We use an UNSPEC rather than a LABEL_REF because this label
5815 never appears in the code stream. */
5816 labelno = GEN_INT (pic_labelno++);
5817 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5818 l1 = gen_rtx_CONST (VOIDmode, l1);
5819
5820 /* On the ARM the PC register contains 'dot + 8' at the time of the
5821 addition, on the Thumb it is 'dot + 4'. */
5822 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5823 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5824 UNSPEC_SYMBOL_OFFSET);
5825 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5826
5827 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
5828 return insn;
5829 }
5830
5831 /* Return nonzero if X is valid as an ARM state addressing register. */
5832 static int
5833 arm_address_register_rtx_p (rtx x, int strict_p)
5834 {
5835 int regno;
5836
5837 if (!REG_P (x))
5838 return 0;
5839
5840 regno = REGNO (x);
5841
5842 if (strict_p)
5843 return ARM_REGNO_OK_FOR_BASE_P (regno);
5844
5845 return (regno <= LAST_ARM_REGNUM
5846 || regno >= FIRST_PSEUDO_REGISTER
5847 || regno == FRAME_POINTER_REGNUM
5848 || regno == ARG_POINTER_REGNUM);
5849 }
5850
5851 /* Return TRUE if this rtx is the difference of a symbol and a label,
5852 and will reduce to a PC-relative relocation in the object file.
5853 Expressions like this can be left alone when generating PIC, rather
5854 than forced through the GOT. */
5855 static int
5856 pcrel_constant_p (rtx x)
5857 {
5858 if (GET_CODE (x) == MINUS)
5859 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5860
5861 return FALSE;
5862 }
5863
5864 /* Return true if X will surely end up in an index register after next
5865 splitting pass. */
5866 static bool
5867 will_be_in_index_register (const_rtx x)
5868 {
5869 /* arm.md: calculate_pic_address will split this into a register. */
5870 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
5871 }
5872
5873 /* Return nonzero if X is a valid ARM state address operand. */
5874 int
5875 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5876 int strict_p)
5877 {
5878 bool use_ldrd;
5879 enum rtx_code code = GET_CODE (x);
5880
5881 if (arm_address_register_rtx_p (x, strict_p))
5882 return 1;
5883
5884 use_ldrd = (TARGET_LDRD
5885 && (mode == DImode
5886 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5887
5888 if (code == POST_INC || code == PRE_DEC
5889 || ((code == PRE_INC || code == POST_DEC)
5890 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5891 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5892
5893 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5894 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5895 && GET_CODE (XEXP (x, 1)) == PLUS
5896 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5897 {
5898 rtx addend = XEXP (XEXP (x, 1), 1);
5899
5900 /* Don't allow ldrd post increment by register because it's hard
5901 to fixup invalid register choices. */
5902 if (use_ldrd
5903 && GET_CODE (x) == POST_MODIFY
5904 && REG_P (addend))
5905 return 0;
5906
5907 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5908 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5909 }
5910
5911 /* After reload constants split into minipools will have addresses
5912 from a LABEL_REF. */
5913 else if (reload_completed
5914 && (code == LABEL_REF
5915 || (code == CONST
5916 && GET_CODE (XEXP (x, 0)) == PLUS
5917 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5918 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
5919 return 1;
5920
5921 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5922 return 0;
5923
5924 else if (code == PLUS)
5925 {
5926 rtx xop0 = XEXP (x, 0);
5927 rtx xop1 = XEXP (x, 1);
5928
5929 return ((arm_address_register_rtx_p (xop0, strict_p)
5930 && ((CONST_INT_P (xop1)
5931 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5932 || (!strict_p && will_be_in_index_register (xop1))))
5933 || (arm_address_register_rtx_p (xop1, strict_p)
5934 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5935 }
5936
5937 #if 0
5938 /* Reload currently can't handle MINUS, so disable this for now */
5939 else if (GET_CODE (x) == MINUS)
5940 {
5941 rtx xop0 = XEXP (x, 0);
5942 rtx xop1 = XEXP (x, 1);
5943
5944 return (arm_address_register_rtx_p (xop0, strict_p)
5945 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5946 }
5947 #endif
5948
5949 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5950 && code == SYMBOL_REF
5951 && CONSTANT_POOL_ADDRESS_P (x)
5952 && ! (flag_pic
5953 && symbol_mentioned_p (get_pool_constant (x))
5954 && ! pcrel_constant_p (get_pool_constant (x))))
5955 return 1;
5956
5957 return 0;
5958 }
5959
5960 /* Return nonzero if X is a valid Thumb-2 address operand. */
5961 static int
5962 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5963 {
5964 bool use_ldrd;
5965 enum rtx_code code = GET_CODE (x);
5966
5967 if (arm_address_register_rtx_p (x, strict_p))
5968 return 1;
5969
5970 use_ldrd = (TARGET_LDRD
5971 && (mode == DImode
5972 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5973
5974 if (code == POST_INC || code == PRE_DEC
5975 || ((code == PRE_INC || code == POST_DEC)
5976 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5977 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5978
5979 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5980 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5981 && GET_CODE (XEXP (x, 1)) == PLUS
5982 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5983 {
5984 /* Thumb-2 only has autoincrement by constant. */
5985 rtx addend = XEXP (XEXP (x, 1), 1);
5986 HOST_WIDE_INT offset;
5987
5988 if (!CONST_INT_P (addend))
5989 return 0;
5990
5991 offset = INTVAL(addend);
5992 if (GET_MODE_SIZE (mode) <= 4)
5993 return (offset > -256 && offset < 256);
5994
5995 return (use_ldrd && offset > -1024 && offset < 1024
5996 && (offset & 3) == 0);
5997 }
5998
5999 /* After reload constants split into minipools will have addresses
6000 from a LABEL_REF. */
6001 else if (reload_completed
6002 && (code == LABEL_REF
6003 || (code == CONST
6004 && GET_CODE (XEXP (x, 0)) == PLUS
6005 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6006 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6007 return 1;
6008
6009 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6010 return 0;
6011
6012 else if (code == PLUS)
6013 {
6014 rtx xop0 = XEXP (x, 0);
6015 rtx xop1 = XEXP (x, 1);
6016
6017 return ((arm_address_register_rtx_p (xop0, strict_p)
6018 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6019 || (!strict_p && will_be_in_index_register (xop1))))
6020 || (arm_address_register_rtx_p (xop1, strict_p)
6021 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6022 }
6023
6024 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6025 && code == SYMBOL_REF
6026 && CONSTANT_POOL_ADDRESS_P (x)
6027 && ! (flag_pic
6028 && symbol_mentioned_p (get_pool_constant (x))
6029 && ! pcrel_constant_p (get_pool_constant (x))))
6030 return 1;
6031
6032 return 0;
6033 }
6034
6035 /* Return nonzero if INDEX is valid for an address index operand in
6036 ARM state. */
6037 static int
6038 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
6039 int strict_p)
6040 {
6041 HOST_WIDE_INT range;
6042 enum rtx_code code = GET_CODE (index);
6043
6044 /* Standard coprocessor addressing modes. */
6045 if (TARGET_HARD_FLOAT
6046 && TARGET_VFP
6047 && (mode == SFmode || mode == DFmode))
6048 return (code == CONST_INT && INTVAL (index) < 1024
6049 && INTVAL (index) > -1024
6050 && (INTVAL (index) & 3) == 0);
6051
6052 /* For quad modes, we restrict the constant offset to be slightly less
6053 than what the instruction format permits. We do this because for
6054 quad mode moves, we will actually decompose them into two separate
6055 double-mode reads or writes. INDEX must therefore be a valid
6056 (double-mode) offset and so should INDEX+8. */
6057 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6058 return (code == CONST_INT
6059 && INTVAL (index) < 1016
6060 && INTVAL (index) > -1024
6061 && (INTVAL (index) & 3) == 0);
6062
6063 /* We have no such constraint on double mode offsets, so we permit the
6064 full range of the instruction format. */
6065 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6066 return (code == CONST_INT
6067 && INTVAL (index) < 1024
6068 && INTVAL (index) > -1024
6069 && (INTVAL (index) & 3) == 0);
6070
6071 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6072 return (code == CONST_INT
6073 && INTVAL (index) < 1024
6074 && INTVAL (index) > -1024
6075 && (INTVAL (index) & 3) == 0);
6076
6077 if (arm_address_register_rtx_p (index, strict_p)
6078 && (GET_MODE_SIZE (mode) <= 4))
6079 return 1;
6080
6081 if (mode == DImode || mode == DFmode)
6082 {
6083 if (code == CONST_INT)
6084 {
6085 HOST_WIDE_INT val = INTVAL (index);
6086
6087 if (TARGET_LDRD)
6088 return val > -256 && val < 256;
6089 else
6090 return val > -4096 && val < 4092;
6091 }
6092
6093 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
6094 }
6095
6096 if (GET_MODE_SIZE (mode) <= 4
6097 && ! (arm_arch4
6098 && (mode == HImode
6099 || mode == HFmode
6100 || (mode == QImode && outer == SIGN_EXTEND))))
6101 {
6102 if (code == MULT)
6103 {
6104 rtx xiop0 = XEXP (index, 0);
6105 rtx xiop1 = XEXP (index, 1);
6106
6107 return ((arm_address_register_rtx_p (xiop0, strict_p)
6108 && power_of_two_operand (xiop1, SImode))
6109 || (arm_address_register_rtx_p (xiop1, strict_p)
6110 && power_of_two_operand (xiop0, SImode)));
6111 }
6112 else if (code == LSHIFTRT || code == ASHIFTRT
6113 || code == ASHIFT || code == ROTATERT)
6114 {
6115 rtx op = XEXP (index, 1);
6116
6117 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6118 && CONST_INT_P (op)
6119 && INTVAL (op) > 0
6120 && INTVAL (op) <= 31);
6121 }
6122 }
6123
6124 /* For ARM v4 we may be doing a sign-extend operation during the
6125 load. */
6126 if (arm_arch4)
6127 {
6128 if (mode == HImode
6129 || mode == HFmode
6130 || (outer == SIGN_EXTEND && mode == QImode))
6131 range = 256;
6132 else
6133 range = 4096;
6134 }
6135 else
6136 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
6137
6138 return (code == CONST_INT
6139 && INTVAL (index) < range
6140 && INTVAL (index) > -range);
6141 }
6142
6143 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6144 index operand. i.e. 1, 2, 4 or 8. */
6145 static bool
6146 thumb2_index_mul_operand (rtx op)
6147 {
6148 HOST_WIDE_INT val;
6149
6150 if (!CONST_INT_P (op))
6151 return false;
6152
6153 val = INTVAL(op);
6154 return (val == 1 || val == 2 || val == 4 || val == 8);
6155 }
6156
6157 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6158 static int
6159 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
6160 {
6161 enum rtx_code code = GET_CODE (index);
6162
6163 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6164 /* Standard coprocessor addressing modes. */
6165 if (TARGET_HARD_FLOAT
6166 && TARGET_VFP
6167 && (mode == SFmode || mode == DFmode))
6168 return (code == CONST_INT && INTVAL (index) < 1024
6169 /* Thumb-2 allows only > -256 index range for it's core register
6170 load/stores. Since we allow SF/DF in core registers, we have
6171 to use the intersection between -256~4096 (core) and -1024~1024
6172 (coprocessor). */
6173 && INTVAL (index) > -256
6174 && (INTVAL (index) & 3) == 0);
6175
6176 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6177 {
6178 /* For DImode assume values will usually live in core regs
6179 and only allow LDRD addressing modes. */
6180 if (!TARGET_LDRD || mode != DImode)
6181 return (code == CONST_INT
6182 && INTVAL (index) < 1024
6183 && INTVAL (index) > -1024
6184 && (INTVAL (index) & 3) == 0);
6185 }
6186
6187 /* For quad modes, we restrict the constant offset to be slightly less
6188 than what the instruction format permits. We do this because for
6189 quad mode moves, we will actually decompose them into two separate
6190 double-mode reads or writes. INDEX must therefore be a valid
6191 (double-mode) offset and so should INDEX+8. */
6192 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6193 return (code == CONST_INT
6194 && INTVAL (index) < 1016
6195 && INTVAL (index) > -1024
6196 && (INTVAL (index) & 3) == 0);
6197
6198 /* We have no such constraint on double mode offsets, so we permit the
6199 full range of the instruction format. */
6200 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6201 return (code == CONST_INT
6202 && INTVAL (index) < 1024
6203 && INTVAL (index) > -1024
6204 && (INTVAL (index) & 3) == 0);
6205
6206 if (arm_address_register_rtx_p (index, strict_p)
6207 && (GET_MODE_SIZE (mode) <= 4))
6208 return 1;
6209
6210 if (mode == DImode || mode == DFmode)
6211 {
6212 if (code == CONST_INT)
6213 {
6214 HOST_WIDE_INT val = INTVAL (index);
6215 /* ??? Can we assume ldrd for thumb2? */
6216 /* Thumb-2 ldrd only has reg+const addressing modes. */
6217 /* ldrd supports offsets of +-1020.
6218 However the ldr fallback does not. */
6219 return val > -256 && val < 256 && (val & 3) == 0;
6220 }
6221 else
6222 return 0;
6223 }
6224
6225 if (code == MULT)
6226 {
6227 rtx xiop0 = XEXP (index, 0);
6228 rtx xiop1 = XEXP (index, 1);
6229
6230 return ((arm_address_register_rtx_p (xiop0, strict_p)
6231 && thumb2_index_mul_operand (xiop1))
6232 || (arm_address_register_rtx_p (xiop1, strict_p)
6233 && thumb2_index_mul_operand (xiop0)));
6234 }
6235 else if (code == ASHIFT)
6236 {
6237 rtx op = XEXP (index, 1);
6238
6239 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6240 && CONST_INT_P (op)
6241 && INTVAL (op) > 0
6242 && INTVAL (op) <= 3);
6243 }
6244
6245 return (code == CONST_INT
6246 && INTVAL (index) < 4096
6247 && INTVAL (index) > -256);
6248 }
6249
6250 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6251 static int
6252 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6253 {
6254 int regno;
6255
6256 if (!REG_P (x))
6257 return 0;
6258
6259 regno = REGNO (x);
6260
6261 if (strict_p)
6262 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6263
6264 return (regno <= LAST_LO_REGNUM
6265 || regno > LAST_VIRTUAL_REGISTER
6266 || regno == FRAME_POINTER_REGNUM
6267 || (GET_MODE_SIZE (mode) >= 4
6268 && (regno == STACK_POINTER_REGNUM
6269 || regno >= FIRST_PSEUDO_REGISTER
6270 || x == hard_frame_pointer_rtx
6271 || x == arg_pointer_rtx)));
6272 }
6273
6274 /* Return nonzero if x is a legitimate index register. This is the case
6275 for any base register that can access a QImode object. */
6276 inline static int
6277 thumb1_index_register_rtx_p (rtx x, int strict_p)
6278 {
6279 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6280 }
6281
6282 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6283
6284 The AP may be eliminated to either the SP or the FP, so we use the
6285 least common denominator, e.g. SImode, and offsets from 0 to 64.
6286
6287 ??? Verify whether the above is the right approach.
6288
6289 ??? Also, the FP may be eliminated to the SP, so perhaps that
6290 needs special handling also.
6291
6292 ??? Look at how the mips16 port solves this problem. It probably uses
6293 better ways to solve some of these problems.
6294
6295 Although it is not incorrect, we don't accept QImode and HImode
6296 addresses based on the frame pointer or arg pointer until the
6297 reload pass starts. This is so that eliminating such addresses
6298 into stack based ones won't produce impossible code. */
6299 int
6300 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6301 {
6302 /* ??? Not clear if this is right. Experiment. */
6303 if (GET_MODE_SIZE (mode) < 4
6304 && !(reload_in_progress || reload_completed)
6305 && (reg_mentioned_p (frame_pointer_rtx, x)
6306 || reg_mentioned_p (arg_pointer_rtx, x)
6307 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6308 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6309 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6310 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6311 return 0;
6312
6313 /* Accept any base register. SP only in SImode or larger. */
6314 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6315 return 1;
6316
6317 /* This is PC relative data before arm_reorg runs. */
6318 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6319 && GET_CODE (x) == SYMBOL_REF
6320 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6321 return 1;
6322
6323 /* This is PC relative data after arm_reorg runs. */
6324 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6325 && reload_completed
6326 && (GET_CODE (x) == LABEL_REF
6327 || (GET_CODE (x) == CONST
6328 && GET_CODE (XEXP (x, 0)) == PLUS
6329 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6330 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6331 return 1;
6332
6333 /* Post-inc indexing only supported for SImode and larger. */
6334 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6335 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6336 return 1;
6337
6338 else if (GET_CODE (x) == PLUS)
6339 {
6340 /* REG+REG address can be any two index registers. */
6341 /* We disallow FRAME+REG addressing since we know that FRAME
6342 will be replaced with STACK, and SP relative addressing only
6343 permits SP+OFFSET. */
6344 if (GET_MODE_SIZE (mode) <= 4
6345 && XEXP (x, 0) != frame_pointer_rtx
6346 && XEXP (x, 1) != frame_pointer_rtx
6347 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6348 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
6349 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
6350 return 1;
6351
6352 /* REG+const has 5-7 bit offset for non-SP registers. */
6353 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6354 || XEXP (x, 0) == arg_pointer_rtx)
6355 && CONST_INT_P (XEXP (x, 1))
6356 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6357 return 1;
6358
6359 /* REG+const has 10-bit offset for SP, but only SImode and
6360 larger is supported. */
6361 /* ??? Should probably check for DI/DFmode overflow here
6362 just like GO_IF_LEGITIMATE_OFFSET does. */
6363 else if (REG_P (XEXP (x, 0))
6364 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
6365 && GET_MODE_SIZE (mode) >= 4
6366 && CONST_INT_P (XEXP (x, 1))
6367 && INTVAL (XEXP (x, 1)) >= 0
6368 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6369 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6370 return 1;
6371
6372 else if (REG_P (XEXP (x, 0))
6373 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6374 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6375 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6376 && REGNO (XEXP (x, 0))
6377 <= LAST_VIRTUAL_POINTER_REGISTER))
6378 && GET_MODE_SIZE (mode) >= 4
6379 && CONST_INT_P (XEXP (x, 1))
6380 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6381 return 1;
6382 }
6383
6384 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6385 && GET_MODE_SIZE (mode) == 4
6386 && GET_CODE (x) == SYMBOL_REF
6387 && CONSTANT_POOL_ADDRESS_P (x)
6388 && ! (flag_pic
6389 && symbol_mentioned_p (get_pool_constant (x))
6390 && ! pcrel_constant_p (get_pool_constant (x))))
6391 return 1;
6392
6393 return 0;
6394 }
6395
6396 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6397 instruction of mode MODE. */
6398 int
6399 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6400 {
6401 switch (GET_MODE_SIZE (mode))
6402 {
6403 case 1:
6404 return val >= 0 && val < 32;
6405
6406 case 2:
6407 return val >= 0 && val < 64 && (val & 1) == 0;
6408
6409 default:
6410 return (val >= 0
6411 && (val + GET_MODE_SIZE (mode)) <= 128
6412 && (val & 3) == 0);
6413 }
6414 }
6415
6416 bool
6417 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6418 {
6419 if (TARGET_ARM)
6420 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6421 else if (TARGET_THUMB2)
6422 return thumb2_legitimate_address_p (mode, x, strict_p);
6423 else /* if (TARGET_THUMB1) */
6424 return thumb1_legitimate_address_p (mode, x, strict_p);
6425 }
6426
6427 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
6428
6429 Given an rtx X being reloaded into a reg required to be
6430 in class CLASS, return the class of reg to actually use.
6431 In general this is just CLASS, but for the Thumb core registers and
6432 immediate constants we prefer a LO_REGS class or a subset. */
6433
6434 static reg_class_t
6435 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
6436 {
6437 if (TARGET_32BIT)
6438 return rclass;
6439 else
6440 {
6441 if (rclass == GENERAL_REGS
6442 || rclass == HI_REGS
6443 || rclass == NO_REGS
6444 || rclass == STACK_REG)
6445 return LO_REGS;
6446 else
6447 return rclass;
6448 }
6449 }
6450
6451 /* Build the SYMBOL_REF for __tls_get_addr. */
6452
6453 static GTY(()) rtx tls_get_addr_libfunc;
6454
6455 static rtx
6456 get_tls_get_addr (void)
6457 {
6458 if (!tls_get_addr_libfunc)
6459 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6460 return tls_get_addr_libfunc;
6461 }
6462
6463 rtx
6464 arm_load_tp (rtx target)
6465 {
6466 if (!target)
6467 target = gen_reg_rtx (SImode);
6468
6469 if (TARGET_HARD_TP)
6470 {
6471 /* Can return in any reg. */
6472 emit_insn (gen_load_tp_hard (target));
6473 }
6474 else
6475 {
6476 /* Always returned in r0. Immediately copy the result into a pseudo,
6477 otherwise other uses of r0 (e.g. setting up function arguments) may
6478 clobber the value. */
6479
6480 rtx tmp;
6481
6482 emit_insn (gen_load_tp_soft ());
6483
6484 tmp = gen_rtx_REG (SImode, 0);
6485 emit_move_insn (target, tmp);
6486 }
6487 return target;
6488 }
6489
6490 static rtx
6491 load_tls_operand (rtx x, rtx reg)
6492 {
6493 rtx tmp;
6494
6495 if (reg == NULL_RTX)
6496 reg = gen_reg_rtx (SImode);
6497
6498 tmp = gen_rtx_CONST (SImode, x);
6499
6500 emit_move_insn (reg, tmp);
6501
6502 return reg;
6503 }
6504
6505 static rtx
6506 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6507 {
6508 rtx insns, label, labelno, sum;
6509
6510 gcc_assert (reloc != TLS_DESCSEQ);
6511 start_sequence ();
6512
6513 labelno = GEN_INT (pic_labelno++);
6514 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6515 label = gen_rtx_CONST (VOIDmode, label);
6516
6517 sum = gen_rtx_UNSPEC (Pmode,
6518 gen_rtvec (4, x, GEN_INT (reloc), label,
6519 GEN_INT (TARGET_ARM ? 8 : 4)),
6520 UNSPEC_TLS);
6521 reg = load_tls_operand (sum, reg);
6522
6523 if (TARGET_ARM)
6524 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6525 else
6526 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6527
6528 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6529 LCT_PURE, /* LCT_CONST? */
6530 Pmode, 1, reg, Pmode);
6531
6532 insns = get_insns ();
6533 end_sequence ();
6534
6535 return insns;
6536 }
6537
6538 static rtx
6539 arm_tls_descseq_addr (rtx x, rtx reg)
6540 {
6541 rtx labelno = GEN_INT (pic_labelno++);
6542 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6543 rtx sum = gen_rtx_UNSPEC (Pmode,
6544 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6545 gen_rtx_CONST (VOIDmode, label),
6546 GEN_INT (!TARGET_ARM)),
6547 UNSPEC_TLS);
6548 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6549
6550 emit_insn (gen_tlscall (x, labelno));
6551 if (!reg)
6552 reg = gen_reg_rtx (SImode);
6553 else
6554 gcc_assert (REGNO (reg) != 0);
6555
6556 emit_move_insn (reg, reg0);
6557
6558 return reg;
6559 }
6560
6561 rtx
6562 legitimize_tls_address (rtx x, rtx reg)
6563 {
6564 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6565 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6566
6567 switch (model)
6568 {
6569 case TLS_MODEL_GLOBAL_DYNAMIC:
6570 if (TARGET_GNU2_TLS)
6571 {
6572 reg = arm_tls_descseq_addr (x, reg);
6573
6574 tp = arm_load_tp (NULL_RTX);
6575
6576 dest = gen_rtx_PLUS (Pmode, tp, reg);
6577 }
6578 else
6579 {
6580 /* Original scheme */
6581 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6582 dest = gen_reg_rtx (Pmode);
6583 emit_libcall_block (insns, dest, ret, x);
6584 }
6585 return dest;
6586
6587 case TLS_MODEL_LOCAL_DYNAMIC:
6588 if (TARGET_GNU2_TLS)
6589 {
6590 reg = arm_tls_descseq_addr (x, reg);
6591
6592 tp = arm_load_tp (NULL_RTX);
6593
6594 dest = gen_rtx_PLUS (Pmode, tp, reg);
6595 }
6596 else
6597 {
6598 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6599
6600 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6601 share the LDM result with other LD model accesses. */
6602 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6603 UNSPEC_TLS);
6604 dest = gen_reg_rtx (Pmode);
6605 emit_libcall_block (insns, dest, ret, eqv);
6606
6607 /* Load the addend. */
6608 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
6609 GEN_INT (TLS_LDO32)),
6610 UNSPEC_TLS);
6611 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6612 dest = gen_rtx_PLUS (Pmode, dest, addend);
6613 }
6614 return dest;
6615
6616 case TLS_MODEL_INITIAL_EXEC:
6617 labelno = GEN_INT (pic_labelno++);
6618 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6619 label = gen_rtx_CONST (VOIDmode, label);
6620 sum = gen_rtx_UNSPEC (Pmode,
6621 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6622 GEN_INT (TARGET_ARM ? 8 : 4)),
6623 UNSPEC_TLS);
6624 reg = load_tls_operand (sum, reg);
6625
6626 if (TARGET_ARM)
6627 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6628 else if (TARGET_THUMB2)
6629 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6630 else
6631 {
6632 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6633 emit_move_insn (reg, gen_const_mem (SImode, reg));
6634 }
6635
6636 tp = arm_load_tp (NULL_RTX);
6637
6638 return gen_rtx_PLUS (Pmode, tp, reg);
6639
6640 case TLS_MODEL_LOCAL_EXEC:
6641 tp = arm_load_tp (NULL_RTX);
6642
6643 reg = gen_rtx_UNSPEC (Pmode,
6644 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6645 UNSPEC_TLS);
6646 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6647
6648 return gen_rtx_PLUS (Pmode, tp, reg);
6649
6650 default:
6651 abort ();
6652 }
6653 }
6654
6655 /* Try machine-dependent ways of modifying an illegitimate address
6656 to be legitimate. If we find one, return the new, valid address. */
6657 rtx
6658 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6659 {
6660 if (!TARGET_ARM)
6661 {
6662 /* TODO: legitimize_address for Thumb2. */
6663 if (TARGET_THUMB2)
6664 return x;
6665 return thumb_legitimize_address (x, orig_x, mode);
6666 }
6667
6668 if (arm_tls_symbol_p (x))
6669 return legitimize_tls_address (x, NULL_RTX);
6670
6671 if (GET_CODE (x) == PLUS)
6672 {
6673 rtx xop0 = XEXP (x, 0);
6674 rtx xop1 = XEXP (x, 1);
6675
6676 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6677 xop0 = force_reg (SImode, xop0);
6678
6679 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6680 xop1 = force_reg (SImode, xop1);
6681
6682 if (ARM_BASE_REGISTER_RTX_P (xop0)
6683 && CONST_INT_P (xop1))
6684 {
6685 HOST_WIDE_INT n, low_n;
6686 rtx base_reg, val;
6687 n = INTVAL (xop1);
6688
6689 /* VFP addressing modes actually allow greater offsets, but for
6690 now we just stick with the lowest common denominator. */
6691 if (mode == DImode
6692 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6693 {
6694 low_n = n & 0x0f;
6695 n &= ~0x0f;
6696 if (low_n > 4)
6697 {
6698 n += 16;
6699 low_n -= 16;
6700 }
6701 }
6702 else
6703 {
6704 low_n = ((mode) == TImode ? 0
6705 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6706 n -= low_n;
6707 }
6708
6709 base_reg = gen_reg_rtx (SImode);
6710 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
6711 emit_move_insn (base_reg, val);
6712 x = plus_constant (Pmode, base_reg, low_n);
6713 }
6714 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6715 x = gen_rtx_PLUS (SImode, xop0, xop1);
6716 }
6717
6718 /* XXX We don't allow MINUS any more -- see comment in
6719 arm_legitimate_address_outer_p (). */
6720 else if (GET_CODE (x) == MINUS)
6721 {
6722 rtx xop0 = XEXP (x, 0);
6723 rtx xop1 = XEXP (x, 1);
6724
6725 if (CONSTANT_P (xop0))
6726 xop0 = force_reg (SImode, xop0);
6727
6728 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6729 xop1 = force_reg (SImode, xop1);
6730
6731 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6732 x = gen_rtx_MINUS (SImode, xop0, xop1);
6733 }
6734
6735 /* Make sure to take full advantage of the pre-indexed addressing mode
6736 with absolute addresses which often allows for the base register to
6737 be factorized for multiple adjacent memory references, and it might
6738 even allows for the mini pool to be avoided entirely. */
6739 else if (CONST_INT_P (x) && optimize > 0)
6740 {
6741 unsigned int bits;
6742 HOST_WIDE_INT mask, base, index;
6743 rtx base_reg;
6744
6745 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6746 use a 8-bit index. So let's use a 12-bit index for SImode only and
6747 hope that arm_gen_constant will enable ldrb to use more bits. */
6748 bits = (mode == SImode) ? 12 : 8;
6749 mask = (1 << bits) - 1;
6750 base = INTVAL (x) & ~mask;
6751 index = INTVAL (x) & mask;
6752 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6753 {
6754 /* It'll most probably be more efficient to generate the base
6755 with more bits set and use a negative index instead. */
6756 base |= mask;
6757 index -= mask;
6758 }
6759 base_reg = force_reg (SImode, GEN_INT (base));
6760 x = plus_constant (Pmode, base_reg, index);
6761 }
6762
6763 if (flag_pic)
6764 {
6765 /* We need to find and carefully transform any SYMBOL and LABEL
6766 references; so go back to the original address expression. */
6767 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6768
6769 if (new_x != orig_x)
6770 x = new_x;
6771 }
6772
6773 return x;
6774 }
6775
6776
6777 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6778 to be legitimate. If we find one, return the new, valid address. */
6779 rtx
6780 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6781 {
6782 if (arm_tls_symbol_p (x))
6783 return legitimize_tls_address (x, NULL_RTX);
6784
6785 if (GET_CODE (x) == PLUS
6786 && CONST_INT_P (XEXP (x, 1))
6787 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6788 || INTVAL (XEXP (x, 1)) < 0))
6789 {
6790 rtx xop0 = XEXP (x, 0);
6791 rtx xop1 = XEXP (x, 1);
6792 HOST_WIDE_INT offset = INTVAL (xop1);
6793
6794 /* Try and fold the offset into a biasing of the base register and
6795 then offsetting that. Don't do this when optimizing for space
6796 since it can cause too many CSEs. */
6797 if (optimize_size && offset >= 0
6798 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6799 {
6800 HOST_WIDE_INT delta;
6801
6802 if (offset >= 256)
6803 delta = offset - (256 - GET_MODE_SIZE (mode));
6804 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6805 delta = 31 * GET_MODE_SIZE (mode);
6806 else
6807 delta = offset & (~31 * GET_MODE_SIZE (mode));
6808
6809 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
6810 NULL_RTX);
6811 x = plus_constant (Pmode, xop0, delta);
6812 }
6813 else if (offset < 0 && offset > -256)
6814 /* Small negative offsets are best done with a subtract before the
6815 dereference, forcing these into a register normally takes two
6816 instructions. */
6817 x = force_operand (x, NULL_RTX);
6818 else
6819 {
6820 /* For the remaining cases, force the constant into a register. */
6821 xop1 = force_reg (SImode, xop1);
6822 x = gen_rtx_PLUS (SImode, xop0, xop1);
6823 }
6824 }
6825 else if (GET_CODE (x) == PLUS
6826 && s_register_operand (XEXP (x, 1), SImode)
6827 && !s_register_operand (XEXP (x, 0), SImode))
6828 {
6829 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6830
6831 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6832 }
6833
6834 if (flag_pic)
6835 {
6836 /* We need to find and carefully transform any SYMBOL and LABEL
6837 references; so go back to the original address expression. */
6838 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6839
6840 if (new_x != orig_x)
6841 x = new_x;
6842 }
6843
6844 return x;
6845 }
6846
6847 bool
6848 arm_legitimize_reload_address (rtx *p,
6849 enum machine_mode mode,
6850 int opnum, int type,
6851 int ind_levels ATTRIBUTE_UNUSED)
6852 {
6853 /* We must recognize output that we have already generated ourselves. */
6854 if (GET_CODE (*p) == PLUS
6855 && GET_CODE (XEXP (*p, 0)) == PLUS
6856 && REG_P (XEXP (XEXP (*p, 0), 0))
6857 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
6858 && CONST_INT_P (XEXP (*p, 1)))
6859 {
6860 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6861 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6862 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6863 return true;
6864 }
6865
6866 if (GET_CODE (*p) == PLUS
6867 && REG_P (XEXP (*p, 0))
6868 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6869 /* If the base register is equivalent to a constant, let the generic
6870 code handle it. Otherwise we will run into problems if a future
6871 reload pass decides to rematerialize the constant. */
6872 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
6873 && CONST_INT_P (XEXP (*p, 1)))
6874 {
6875 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6876 HOST_WIDE_INT low, high;
6877
6878 /* Detect coprocessor load/stores. */
6879 bool coproc_p = ((TARGET_HARD_FLOAT
6880 && TARGET_VFP
6881 && (mode == SFmode || mode == DFmode))
6882 || (TARGET_REALLY_IWMMXT
6883 && VALID_IWMMXT_REG_MODE (mode))
6884 || (TARGET_NEON
6885 && (VALID_NEON_DREG_MODE (mode)
6886 || VALID_NEON_QREG_MODE (mode))));
6887
6888 /* For some conditions, bail out when lower two bits are unaligned. */
6889 if ((val & 0x3) != 0
6890 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6891 && (coproc_p
6892 /* For DI, and DF under soft-float: */
6893 || ((mode == DImode || mode == DFmode)
6894 /* Without ldrd, we use stm/ldm, which does not
6895 fair well with unaligned bits. */
6896 && (! TARGET_LDRD
6897 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6898 || TARGET_THUMB2))))
6899 return false;
6900
6901 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6902 of which the (reg+high) gets turned into a reload add insn,
6903 we try to decompose the index into high/low values that can often
6904 also lead to better reload CSE.
6905 For example:
6906 ldr r0, [r2, #4100] // Offset too large
6907 ldr r1, [r2, #4104] // Offset too large
6908
6909 is best reloaded as:
6910 add t1, r2, #4096
6911 ldr r0, [t1, #4]
6912 add t2, r2, #4096
6913 ldr r1, [t2, #8]
6914
6915 which post-reload CSE can simplify in most cases to eliminate the
6916 second add instruction:
6917 add t1, r2, #4096
6918 ldr r0, [t1, #4]
6919 ldr r1, [t1, #8]
6920
6921 The idea here is that we want to split out the bits of the constant
6922 as a mask, rather than as subtracting the maximum offset that the
6923 respective type of load/store used can handle.
6924
6925 When encountering negative offsets, we can still utilize it even if
6926 the overall offset is positive; sometimes this may lead to an immediate
6927 that can be constructed with fewer instructions.
6928 For example:
6929 ldr r0, [r2, #0x3FFFFC]
6930
6931 This is best reloaded as:
6932 add t1, r2, #0x400000
6933 ldr r0, [t1, #-4]
6934
6935 The trick for spotting this for a load insn with N bits of offset
6936 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6937 negative offset that is going to make bit N and all the bits below
6938 it become zero in the remainder part.
6939
6940 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6941 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6942 used in most cases of ARM load/store instructions. */
6943
6944 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6945 (((VAL) & ((1 << (N)) - 1)) \
6946 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6947 : 0)
6948
6949 if (coproc_p)
6950 {
6951 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6952
6953 /* NEON quad-word load/stores are made of two double-word accesses,
6954 so the valid index range is reduced by 8. Treat as 9-bit range if
6955 we go over it. */
6956 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6957 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6958 }
6959 else if (GET_MODE_SIZE (mode) == 8)
6960 {
6961 if (TARGET_LDRD)
6962 low = (TARGET_THUMB2
6963 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6964 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6965 else
6966 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6967 to access doublewords. The supported load/store offsets are
6968 -8, -4, and 4, which we try to produce here. */
6969 low = ((val & 0xf) ^ 0x8) - 0x8;
6970 }
6971 else if (GET_MODE_SIZE (mode) < 8)
6972 {
6973 /* NEON element load/stores do not have an offset. */
6974 if (TARGET_NEON_FP16 && mode == HFmode)
6975 return false;
6976
6977 if (TARGET_THUMB2)
6978 {
6979 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6980 Try the wider 12-bit range first, and re-try if the result
6981 is out of range. */
6982 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6983 if (low < -255)
6984 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6985 }
6986 else
6987 {
6988 if (mode == HImode || mode == HFmode)
6989 {
6990 if (arm_arch4)
6991 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6992 else
6993 {
6994 /* The storehi/movhi_bytes fallbacks can use only
6995 [-4094,+4094] of the full ldrb/strb index range. */
6996 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6997 if (low == 4095 || low == -4095)
6998 return false;
6999 }
7000 }
7001 else
7002 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7003 }
7004 }
7005 else
7006 return false;
7007
7008 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7009 ^ (unsigned HOST_WIDE_INT) 0x80000000)
7010 - (unsigned HOST_WIDE_INT) 0x80000000);
7011 /* Check for overflow or zero */
7012 if (low == 0 || high == 0 || (high + low != val))
7013 return false;
7014
7015 /* Reload the high part into a base reg; leave the low part
7016 in the mem. */
7017 *p = gen_rtx_PLUS (GET_MODE (*p),
7018 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
7019 GEN_INT (high)),
7020 GEN_INT (low));
7021 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7022 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7023 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7024 return true;
7025 }
7026
7027 return false;
7028 }
7029
7030 rtx
7031 thumb_legitimize_reload_address (rtx *x_p,
7032 enum machine_mode mode,
7033 int opnum, int type,
7034 int ind_levels ATTRIBUTE_UNUSED)
7035 {
7036 rtx x = *x_p;
7037
7038 if (GET_CODE (x) == PLUS
7039 && GET_MODE_SIZE (mode) < 4
7040 && REG_P (XEXP (x, 0))
7041 && XEXP (x, 0) == stack_pointer_rtx
7042 && CONST_INT_P (XEXP (x, 1))
7043 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7044 {
7045 rtx orig_x = x;
7046
7047 x = copy_rtx (x);
7048 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7049 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7050 return x;
7051 }
7052
7053 /* If both registers are hi-regs, then it's better to reload the
7054 entire expression rather than each register individually. That
7055 only requires one reload register rather than two. */
7056 if (GET_CODE (x) == PLUS
7057 && REG_P (XEXP (x, 0))
7058 && REG_P (XEXP (x, 1))
7059 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
7060 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
7061 {
7062 rtx orig_x = x;
7063
7064 x = copy_rtx (x);
7065 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7066 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7067 return x;
7068 }
7069
7070 return NULL;
7071 }
7072
7073 /* Test for various thread-local symbols. */
7074
7075 /* Return TRUE if X is a thread-local symbol. */
7076
7077 static bool
7078 arm_tls_symbol_p (rtx x)
7079 {
7080 if (! TARGET_HAVE_TLS)
7081 return false;
7082
7083 if (GET_CODE (x) != SYMBOL_REF)
7084 return false;
7085
7086 return SYMBOL_REF_TLS_MODEL (x) != 0;
7087 }
7088
7089 /* Helper for arm_tls_referenced_p. */
7090
7091 static int
7092 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
7093 {
7094 if (GET_CODE (*x) == SYMBOL_REF)
7095 return SYMBOL_REF_TLS_MODEL (*x) != 0;
7096
7097 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7098 TLS offsets, not real symbol references. */
7099 if (GET_CODE (*x) == UNSPEC
7100 && XINT (*x, 1) == UNSPEC_TLS)
7101 return -1;
7102
7103 return 0;
7104 }
7105
7106 /* Return TRUE if X contains any TLS symbol references. */
7107
7108 bool
7109 arm_tls_referenced_p (rtx x)
7110 {
7111 if (! TARGET_HAVE_TLS)
7112 return false;
7113
7114 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
7115 }
7116
7117 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7118
7119 On the ARM, allow any integer (invalid ones are removed later by insn
7120 patterns), nice doubles and symbol_refs which refer to the function's
7121 constant pool XXX.
7122
7123 When generating pic allow anything. */
7124
7125 static bool
7126 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
7127 {
7128 /* At present, we have no support for Neon structure constants, so forbid
7129 them here. It might be possible to handle simple cases like 0 and -1
7130 in future. */
7131 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
7132 return false;
7133
7134 return flag_pic || !label_mentioned_p (x);
7135 }
7136
7137 static bool
7138 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7139 {
7140 return (CONST_INT_P (x)
7141 || CONST_DOUBLE_P (x)
7142 || CONSTANT_ADDRESS_P (x)
7143 || flag_pic);
7144 }
7145
7146 static bool
7147 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
7148 {
7149 return (!arm_cannot_force_const_mem (mode, x)
7150 && (TARGET_32BIT
7151 ? arm_legitimate_constant_p_1 (mode, x)
7152 : thumb_legitimate_constant_p (mode, x)));
7153 }
7154
7155 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7156
7157 static bool
7158 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7159 {
7160 rtx base, offset;
7161
7162 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
7163 {
7164 split_const (x, &base, &offset);
7165 if (GET_CODE (base) == SYMBOL_REF
7166 && !offset_within_block_p (base, INTVAL (offset)))
7167 return true;
7168 }
7169 return arm_tls_referenced_p (x);
7170 }
7171 \f
7172 #define REG_OR_SUBREG_REG(X) \
7173 (REG_P (X) \
7174 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
7175
7176 #define REG_OR_SUBREG_RTX(X) \
7177 (REG_P (X) ? (X) : SUBREG_REG (X))
7178
7179 static inline int
7180 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7181 {
7182 enum machine_mode mode = GET_MODE (x);
7183 int total, words;
7184
7185 switch (code)
7186 {
7187 case ASHIFT:
7188 case ASHIFTRT:
7189 case LSHIFTRT:
7190 case ROTATERT:
7191 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
7192
7193 case PLUS:
7194 case MINUS:
7195 case COMPARE:
7196 case NEG:
7197 case NOT:
7198 return COSTS_N_INSNS (1);
7199
7200 case MULT:
7201 if (CONST_INT_P (XEXP (x, 1)))
7202 {
7203 int cycles = 0;
7204 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7205
7206 while (i)
7207 {
7208 i >>= 2;
7209 cycles++;
7210 }
7211 return COSTS_N_INSNS (2) + cycles;
7212 }
7213 return COSTS_N_INSNS (1) + 16;
7214
7215 case SET:
7216 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
7217 the mode. */
7218 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
7219 return (COSTS_N_INSNS (words)
7220 + 4 * ((MEM_P (SET_SRC (x)))
7221 + MEM_P (SET_DEST (x))));
7222
7223 case CONST_INT:
7224 if (outer == SET)
7225 {
7226 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7227 return 0;
7228 if (thumb_shiftable_const (INTVAL (x)))
7229 return COSTS_N_INSNS (2);
7230 return COSTS_N_INSNS (3);
7231 }
7232 else if ((outer == PLUS || outer == COMPARE)
7233 && INTVAL (x) < 256 && INTVAL (x) > -256)
7234 return 0;
7235 else if ((outer == IOR || outer == XOR || outer == AND)
7236 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7237 return COSTS_N_INSNS (1);
7238 else if (outer == AND)
7239 {
7240 int i;
7241 /* This duplicates the tests in the andsi3 expander. */
7242 for (i = 9; i <= 31; i++)
7243 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7244 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7245 return COSTS_N_INSNS (2);
7246 }
7247 else if (outer == ASHIFT || outer == ASHIFTRT
7248 || outer == LSHIFTRT)
7249 return 0;
7250 return COSTS_N_INSNS (2);
7251
7252 case CONST:
7253 case CONST_DOUBLE:
7254 case LABEL_REF:
7255 case SYMBOL_REF:
7256 return COSTS_N_INSNS (3);
7257
7258 case UDIV:
7259 case UMOD:
7260 case DIV:
7261 case MOD:
7262 return 100;
7263
7264 case TRUNCATE:
7265 return 99;
7266
7267 case AND:
7268 case XOR:
7269 case IOR:
7270 /* XXX guess. */
7271 return 8;
7272
7273 case MEM:
7274 /* XXX another guess. */
7275 /* Memory costs quite a lot for the first word, but subsequent words
7276 load at the equivalent of a single insn each. */
7277 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7278 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7279 ? 4 : 0));
7280
7281 case IF_THEN_ELSE:
7282 /* XXX a guess. */
7283 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7284 return 14;
7285 return 2;
7286
7287 case SIGN_EXTEND:
7288 case ZERO_EXTEND:
7289 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7290 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7291
7292 if (mode == SImode)
7293 return total;
7294
7295 if (arm_arch6)
7296 return total + COSTS_N_INSNS (1);
7297
7298 /* Assume a two-shift sequence. Increase the cost slightly so
7299 we prefer actual shifts over an extend operation. */
7300 return total + 1 + COSTS_N_INSNS (2);
7301
7302 default:
7303 return 99;
7304 }
7305 }
7306
7307 static inline bool
7308 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7309 {
7310 enum machine_mode mode = GET_MODE (x);
7311 enum rtx_code subcode;
7312 rtx operand;
7313 enum rtx_code code = GET_CODE (x);
7314 *total = 0;
7315
7316 switch (code)
7317 {
7318 case MEM:
7319 /* Memory costs quite a lot for the first word, but subsequent words
7320 load at the equivalent of a single insn each. */
7321 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7322 return true;
7323
7324 case DIV:
7325 case MOD:
7326 case UDIV:
7327 case UMOD:
7328 if (TARGET_HARD_FLOAT && mode == SFmode)
7329 *total = COSTS_N_INSNS (2);
7330 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7331 *total = COSTS_N_INSNS (4);
7332 else
7333 *total = COSTS_N_INSNS (20);
7334 return false;
7335
7336 case ROTATE:
7337 if (REG_P (XEXP (x, 1)))
7338 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
7339 else if (!CONST_INT_P (XEXP (x, 1)))
7340 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
7341
7342 /* Fall through */
7343 case ROTATERT:
7344 if (mode != SImode)
7345 {
7346 *total += COSTS_N_INSNS (4);
7347 return true;
7348 }
7349
7350 /* Fall through */
7351 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
7352 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7353 if (mode == DImode)
7354 {
7355 *total += COSTS_N_INSNS (3);
7356 return true;
7357 }
7358
7359 *total += COSTS_N_INSNS (1);
7360 /* Increase the cost of complex shifts because they aren't any faster,
7361 and reduce dual issue opportunities. */
7362 if (arm_tune_cortex_a9
7363 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
7364 ++*total;
7365
7366 return true;
7367
7368 case MINUS:
7369 if (mode == DImode)
7370 {
7371 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7372 if (CONST_INT_P (XEXP (x, 0))
7373 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7374 {
7375 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7376 return true;
7377 }
7378
7379 if (CONST_INT_P (XEXP (x, 1))
7380 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
7381 {
7382 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7383 return true;
7384 }
7385
7386 return false;
7387 }
7388
7389 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7390 {
7391 if (TARGET_HARD_FLOAT
7392 && (mode == SFmode
7393 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7394 {
7395 *total = COSTS_N_INSNS (1);
7396 if (CONST_DOUBLE_P (XEXP (x, 0))
7397 && arm_const_double_rtx (XEXP (x, 0)))
7398 {
7399 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7400 return true;
7401 }
7402
7403 if (CONST_DOUBLE_P (XEXP (x, 1))
7404 && arm_const_double_rtx (XEXP (x, 1)))
7405 {
7406 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7407 return true;
7408 }
7409
7410 return false;
7411 }
7412 *total = COSTS_N_INSNS (20);
7413 return false;
7414 }
7415
7416 *total = COSTS_N_INSNS (1);
7417 if (CONST_INT_P (XEXP (x, 0))
7418 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7419 {
7420 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7421 return true;
7422 }
7423
7424 subcode = GET_CODE (XEXP (x, 1));
7425 if (subcode == ASHIFT || subcode == ASHIFTRT
7426 || subcode == LSHIFTRT
7427 || subcode == ROTATE || subcode == ROTATERT)
7428 {
7429 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7430 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7431 return true;
7432 }
7433
7434 /* A shift as a part of RSB costs no more than RSB itself. */
7435 if (GET_CODE (XEXP (x, 0)) == MULT
7436 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7437 {
7438 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
7439 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7440 return true;
7441 }
7442
7443 if (subcode == MULT
7444 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
7445 {
7446 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7447 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7448 return true;
7449 }
7450
7451 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
7452 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
7453 {
7454 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7455 if (REG_P (XEXP (XEXP (x, 1), 0))
7456 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7457 *total += COSTS_N_INSNS (1);
7458
7459 return true;
7460 }
7461
7462 /* Fall through */
7463
7464 case PLUS:
7465 if (code == PLUS && arm_arch6 && mode == SImode
7466 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7467 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7468 {
7469 *total = COSTS_N_INSNS (1);
7470 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7471 0, speed);
7472 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7473 return true;
7474 }
7475
7476 /* MLA: All arguments must be registers. We filter out
7477 multiplication by a power of two, so that we fall down into
7478 the code below. */
7479 if (GET_CODE (XEXP (x, 0)) == MULT
7480 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7481 {
7482 /* The cost comes from the cost of the multiply. */
7483 return false;
7484 }
7485
7486 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7487 {
7488 if (TARGET_HARD_FLOAT
7489 && (mode == SFmode
7490 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7491 {
7492 *total = COSTS_N_INSNS (1);
7493 if (CONST_DOUBLE_P (XEXP (x, 1))
7494 && arm_const_double_rtx (XEXP (x, 1)))
7495 {
7496 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7497 return true;
7498 }
7499
7500 return false;
7501 }
7502
7503 *total = COSTS_N_INSNS (20);
7504 return false;
7505 }
7506
7507 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7508 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7509 {
7510 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
7511 if (REG_P (XEXP (XEXP (x, 0), 0))
7512 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7513 *total += COSTS_N_INSNS (1);
7514 return true;
7515 }
7516
7517 /* Fall through */
7518
7519 case AND: case XOR: case IOR:
7520
7521 /* Normally the frame registers will be spilt into reg+const during
7522 reload, so it is a bad idea to combine them with other instructions,
7523 since then they might not be moved outside of loops. As a compromise
7524 we allow integration with ops that have a constant as their second
7525 operand. */
7526 if (REG_OR_SUBREG_REG (XEXP (x, 0))
7527 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7528 && !CONST_INT_P (XEXP (x, 1)))
7529 *total = COSTS_N_INSNS (1);
7530
7531 if (mode == DImode)
7532 {
7533 *total += COSTS_N_INSNS (2);
7534 if (CONST_INT_P (XEXP (x, 1))
7535 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7536 {
7537 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7538 return true;
7539 }
7540
7541 return false;
7542 }
7543
7544 *total += COSTS_N_INSNS (1);
7545 if (CONST_INT_P (XEXP (x, 1))
7546 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7547 {
7548 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7549 return true;
7550 }
7551 subcode = GET_CODE (XEXP (x, 0));
7552 if (subcode == ASHIFT || subcode == ASHIFTRT
7553 || subcode == LSHIFTRT
7554 || subcode == ROTATE || subcode == ROTATERT)
7555 {
7556 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7557 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7558 return true;
7559 }
7560
7561 if (subcode == MULT
7562 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7563 {
7564 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7565 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7566 return true;
7567 }
7568
7569 if (subcode == UMIN || subcode == UMAX
7570 || subcode == SMIN || subcode == SMAX)
7571 {
7572 *total = COSTS_N_INSNS (3);
7573 return true;
7574 }
7575
7576 return false;
7577
7578 case MULT:
7579 /* This should have been handled by the CPU specific routines. */
7580 gcc_unreachable ();
7581
7582 case TRUNCATE:
7583 if (arm_arch3m && mode == SImode
7584 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7585 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7586 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7587 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7588 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7589 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7590 {
7591 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
7592 return true;
7593 }
7594 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7595 return false;
7596
7597 case NEG:
7598 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7599 {
7600 if (TARGET_HARD_FLOAT
7601 && (mode == SFmode
7602 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7603 {
7604 *total = COSTS_N_INSNS (1);
7605 return false;
7606 }
7607 *total = COSTS_N_INSNS (2);
7608 return false;
7609 }
7610
7611 /* Fall through */
7612 case NOT:
7613 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7614 if (mode == SImode && code == NOT)
7615 {
7616 subcode = GET_CODE (XEXP (x, 0));
7617 if (subcode == ASHIFT || subcode == ASHIFTRT
7618 || subcode == LSHIFTRT
7619 || subcode == ROTATE || subcode == ROTATERT
7620 || (subcode == MULT
7621 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7622 {
7623 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7624 /* Register shifts cost an extra cycle. */
7625 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
7626 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7627 subcode, 1, speed);
7628 return true;
7629 }
7630 }
7631
7632 return false;
7633
7634 case IF_THEN_ELSE:
7635 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7636 {
7637 *total = COSTS_N_INSNS (4);
7638 return true;
7639 }
7640
7641 operand = XEXP (x, 0);
7642
7643 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7644 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7645 && REG_P (XEXP (operand, 0))
7646 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7647 *total += COSTS_N_INSNS (1);
7648 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
7649 + rtx_cost (XEXP (x, 2), code, 2, speed));
7650 return true;
7651
7652 case NE:
7653 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7654 {
7655 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7656 return true;
7657 }
7658 goto scc_insn;
7659
7660 case GE:
7661 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
7662 && mode == SImode && XEXP (x, 1) == const0_rtx)
7663 {
7664 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7665 return true;
7666 }
7667 goto scc_insn;
7668
7669 case LT:
7670 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
7671 && mode == SImode && XEXP (x, 1) == const0_rtx)
7672 {
7673 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7674 return true;
7675 }
7676 goto scc_insn;
7677
7678 case EQ:
7679 case GT:
7680 case LE:
7681 case GEU:
7682 case LTU:
7683 case GTU:
7684 case LEU:
7685 case UNORDERED:
7686 case ORDERED:
7687 case UNEQ:
7688 case UNGE:
7689 case UNLT:
7690 case UNGT:
7691 case UNLE:
7692 scc_insn:
7693 /* SCC insns. In the case where the comparison has already been
7694 performed, then they cost 2 instructions. Otherwise they need
7695 an additional comparison before them. */
7696 *total = COSTS_N_INSNS (2);
7697 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
7698 {
7699 return true;
7700 }
7701
7702 /* Fall through */
7703 case COMPARE:
7704 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
7705 {
7706 *total = 0;
7707 return true;
7708 }
7709
7710 *total += COSTS_N_INSNS (1);
7711 if (CONST_INT_P (XEXP (x, 1))
7712 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7713 {
7714 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7715 return true;
7716 }
7717
7718 subcode = GET_CODE (XEXP (x, 0));
7719 if (subcode == ASHIFT || subcode == ASHIFTRT
7720 || subcode == LSHIFTRT
7721 || subcode == ROTATE || subcode == ROTATERT)
7722 {
7723 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7724 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7725 return true;
7726 }
7727
7728 if (subcode == MULT
7729 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7730 {
7731 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7732 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7733 return true;
7734 }
7735
7736 return false;
7737
7738 case UMIN:
7739 case UMAX:
7740 case SMIN:
7741 case SMAX:
7742 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7743 if (!CONST_INT_P (XEXP (x, 1))
7744 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7745 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7746 return true;
7747
7748 case ABS:
7749 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7750 {
7751 if (TARGET_HARD_FLOAT
7752 && (mode == SFmode
7753 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7754 {
7755 *total = COSTS_N_INSNS (1);
7756 return false;
7757 }
7758 *total = COSTS_N_INSNS (20);
7759 return false;
7760 }
7761 *total = COSTS_N_INSNS (1);
7762 if (mode == DImode)
7763 *total += COSTS_N_INSNS (3);
7764 return false;
7765
7766 case SIGN_EXTEND:
7767 case ZERO_EXTEND:
7768 *total = 0;
7769 if (GET_MODE_CLASS (mode) == MODE_INT)
7770 {
7771 rtx op = XEXP (x, 0);
7772 enum machine_mode opmode = GET_MODE (op);
7773
7774 if (mode == DImode)
7775 *total += COSTS_N_INSNS (1);
7776
7777 if (opmode != SImode)
7778 {
7779 if (MEM_P (op))
7780 {
7781 /* If !arm_arch4, we use one of the extendhisi2_mem
7782 or movhi_bytes patterns for HImode. For a QImode
7783 sign extension, we first zero-extend from memory
7784 and then perform a shift sequence. */
7785 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7786 *total += COSTS_N_INSNS (2);
7787 }
7788 else if (arm_arch6)
7789 *total += COSTS_N_INSNS (1);
7790
7791 /* We don't have the necessary insn, so we need to perform some
7792 other operation. */
7793 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7794 /* An and with constant 255. */
7795 *total += COSTS_N_INSNS (1);
7796 else
7797 /* A shift sequence. Increase costs slightly to avoid
7798 combining two shifts into an extend operation. */
7799 *total += COSTS_N_INSNS (2) + 1;
7800 }
7801
7802 return false;
7803 }
7804
7805 switch (GET_MODE (XEXP (x, 0)))
7806 {
7807 case V8QImode:
7808 case V4HImode:
7809 case V2SImode:
7810 case V4QImode:
7811 case V2HImode:
7812 *total = COSTS_N_INSNS (1);
7813 return false;
7814
7815 default:
7816 gcc_unreachable ();
7817 }
7818 gcc_unreachable ();
7819
7820 case ZERO_EXTRACT:
7821 case SIGN_EXTRACT:
7822 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7823 return true;
7824
7825 case CONST_INT:
7826 if (const_ok_for_arm (INTVAL (x))
7827 || const_ok_for_arm (~INTVAL (x)))
7828 *total = COSTS_N_INSNS (1);
7829 else
7830 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7831 INTVAL (x), NULL_RTX,
7832 NULL_RTX, 0, 0));
7833 return true;
7834
7835 case CONST:
7836 case LABEL_REF:
7837 case SYMBOL_REF:
7838 *total = COSTS_N_INSNS (3);
7839 return true;
7840
7841 case HIGH:
7842 *total = COSTS_N_INSNS (1);
7843 return true;
7844
7845 case LO_SUM:
7846 *total = COSTS_N_INSNS (1);
7847 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7848 return true;
7849
7850 case CONST_DOUBLE:
7851 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7852 && (mode == SFmode || !TARGET_VFP_SINGLE))
7853 *total = COSTS_N_INSNS (1);
7854 else
7855 *total = COSTS_N_INSNS (4);
7856 return true;
7857
7858 case SET:
7859 /* The vec_extract patterns accept memory operands that require an
7860 address reload. Account for the cost of that reload to give the
7861 auto-inc-dec pass an incentive to try to replace them. */
7862 if (TARGET_NEON && MEM_P (SET_DEST (x))
7863 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
7864 {
7865 *total = rtx_cost (SET_DEST (x), code, 0, speed);
7866 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
7867 *total += COSTS_N_INSNS (1);
7868 return true;
7869 }
7870 /* Likewise for the vec_set patterns. */
7871 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
7872 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
7873 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
7874 {
7875 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
7876 *total = rtx_cost (mem, code, 0, speed);
7877 if (!neon_vector_mem_operand (mem, 2, true))
7878 *total += COSTS_N_INSNS (1);
7879 return true;
7880 }
7881 return false;
7882
7883 case UNSPEC:
7884 /* We cost this as high as our memory costs to allow this to
7885 be hoisted from loops. */
7886 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
7887 {
7888 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7889 }
7890 return true;
7891
7892 case CONST_VECTOR:
7893 if (TARGET_NEON
7894 && TARGET_HARD_FLOAT
7895 && outer == SET
7896 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
7897 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
7898 *total = COSTS_N_INSNS (1);
7899 else
7900 *total = COSTS_N_INSNS (4);
7901 return true;
7902
7903 default:
7904 *total = COSTS_N_INSNS (4);
7905 return false;
7906 }
7907 }
7908
7909 /* Estimates the size cost of thumb1 instructions.
7910 For now most of the code is copied from thumb1_rtx_costs. We need more
7911 fine grain tuning when we have more related test cases. */
7912 static inline int
7913 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7914 {
7915 enum machine_mode mode = GET_MODE (x);
7916 int words;
7917
7918 switch (code)
7919 {
7920 case ASHIFT:
7921 case ASHIFTRT:
7922 case LSHIFTRT:
7923 case ROTATERT:
7924 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
7925
7926 case PLUS:
7927 case MINUS:
7928 case COMPARE:
7929 case NEG:
7930 case NOT:
7931 return COSTS_N_INSNS (1);
7932
7933 case MULT:
7934 if (CONST_INT_P (XEXP (x, 1)))
7935 {
7936 /* Thumb1 mul instruction can't operate on const. We must Load it
7937 into a register first. */
7938 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7939 return COSTS_N_INSNS (1) + const_size;
7940 }
7941 return COSTS_N_INSNS (1);
7942
7943 case SET:
7944 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
7945 the mode. */
7946 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
7947 return (COSTS_N_INSNS (words)
7948 + 4 * ((MEM_P (SET_SRC (x)))
7949 + MEM_P (SET_DEST (x))));
7950
7951 case CONST_INT:
7952 if (outer == SET)
7953 {
7954 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7955 return COSTS_N_INSNS (1);
7956 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7957 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7958 return COSTS_N_INSNS (2);
7959 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7960 if (thumb_shiftable_const (INTVAL (x)))
7961 return COSTS_N_INSNS (2);
7962 return COSTS_N_INSNS (3);
7963 }
7964 else if ((outer == PLUS || outer == COMPARE)
7965 && INTVAL (x) < 256 && INTVAL (x) > -256)
7966 return 0;
7967 else if ((outer == IOR || outer == XOR || outer == AND)
7968 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7969 return COSTS_N_INSNS (1);
7970 else if (outer == AND)
7971 {
7972 int i;
7973 /* This duplicates the tests in the andsi3 expander. */
7974 for (i = 9; i <= 31; i++)
7975 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7976 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7977 return COSTS_N_INSNS (2);
7978 }
7979 else if (outer == ASHIFT || outer == ASHIFTRT
7980 || outer == LSHIFTRT)
7981 return 0;
7982 return COSTS_N_INSNS (2);
7983
7984 case CONST:
7985 case CONST_DOUBLE:
7986 case LABEL_REF:
7987 case SYMBOL_REF:
7988 return COSTS_N_INSNS (3);
7989
7990 case UDIV:
7991 case UMOD:
7992 case DIV:
7993 case MOD:
7994 return 100;
7995
7996 case TRUNCATE:
7997 return 99;
7998
7999 case AND:
8000 case XOR:
8001 case IOR:
8002 /* XXX guess. */
8003 return 8;
8004
8005 case MEM:
8006 /* XXX another guess. */
8007 /* Memory costs quite a lot for the first word, but subsequent words
8008 load at the equivalent of a single insn each. */
8009 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8010 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8011 ? 4 : 0));
8012
8013 case IF_THEN_ELSE:
8014 /* XXX a guess. */
8015 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8016 return 14;
8017 return 2;
8018
8019 case ZERO_EXTEND:
8020 /* XXX still guessing. */
8021 switch (GET_MODE (XEXP (x, 0)))
8022 {
8023 case QImode:
8024 return (1 + (mode == DImode ? 4 : 0)
8025 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8026
8027 case HImode:
8028 return (4 + (mode == DImode ? 4 : 0)
8029 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8030
8031 case SImode:
8032 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8033
8034 default:
8035 return 99;
8036 }
8037
8038 default:
8039 return 99;
8040 }
8041 }
8042
8043 /* RTX costs when optimizing for size. */
8044 static bool
8045 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8046 int *total)
8047 {
8048 enum machine_mode mode = GET_MODE (x);
8049 if (TARGET_THUMB1)
8050 {
8051 *total = thumb1_size_rtx_costs (x, code, outer_code);
8052 return true;
8053 }
8054
8055 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8056 switch (code)
8057 {
8058 case MEM:
8059 /* A memory access costs 1 insn if the mode is small, or the address is
8060 a single register, otherwise it costs one insn per word. */
8061 if (REG_P (XEXP (x, 0)))
8062 *total = COSTS_N_INSNS (1);
8063 else if (flag_pic
8064 && GET_CODE (XEXP (x, 0)) == PLUS
8065 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
8066 /* This will be split into two instructions.
8067 See arm.md:calculate_pic_address. */
8068 *total = COSTS_N_INSNS (2);
8069 else
8070 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8071 return true;
8072
8073 case DIV:
8074 case MOD:
8075 case UDIV:
8076 case UMOD:
8077 /* Needs a libcall, so it costs about this. */
8078 *total = COSTS_N_INSNS (2);
8079 return false;
8080
8081 case ROTATE:
8082 if (mode == SImode && REG_P (XEXP (x, 1)))
8083 {
8084 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
8085 return true;
8086 }
8087 /* Fall through */
8088 case ROTATERT:
8089 case ASHIFT:
8090 case LSHIFTRT:
8091 case ASHIFTRT:
8092 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8093 {
8094 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
8095 return true;
8096 }
8097 else if (mode == SImode)
8098 {
8099 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
8100 /* Slightly disparage register shifts, but not by much. */
8101 if (!CONST_INT_P (XEXP (x, 1)))
8102 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
8103 return true;
8104 }
8105
8106 /* Needs a libcall. */
8107 *total = COSTS_N_INSNS (2);
8108 return false;
8109
8110 case MINUS:
8111 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8112 && (mode == SFmode || !TARGET_VFP_SINGLE))
8113 {
8114 *total = COSTS_N_INSNS (1);
8115 return false;
8116 }
8117
8118 if (mode == SImode)
8119 {
8120 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
8121 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
8122
8123 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
8124 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
8125 || subcode1 == ROTATE || subcode1 == ROTATERT
8126 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
8127 || subcode1 == ASHIFTRT)
8128 {
8129 /* It's just the cost of the two operands. */
8130 *total = 0;
8131 return false;
8132 }
8133
8134 *total = COSTS_N_INSNS (1);
8135 return false;
8136 }
8137
8138 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8139 return false;
8140
8141 case PLUS:
8142 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8143 && (mode == SFmode || !TARGET_VFP_SINGLE))
8144 {
8145 *total = COSTS_N_INSNS (1);
8146 return false;
8147 }
8148
8149 /* A shift as a part of ADD costs nothing. */
8150 if (GET_CODE (XEXP (x, 0)) == MULT
8151 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8152 {
8153 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
8154 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
8155 *total += rtx_cost (XEXP (x, 1), code, 1, false);
8156 return true;
8157 }
8158
8159 /* Fall through */
8160 case AND: case XOR: case IOR:
8161 if (mode == SImode)
8162 {
8163 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
8164
8165 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
8166 || subcode == LSHIFTRT || subcode == ASHIFTRT
8167 || (code == AND && subcode == NOT))
8168 {
8169 /* It's just the cost of the two operands. */
8170 *total = 0;
8171 return false;
8172 }
8173 }
8174
8175 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8176 return false;
8177
8178 case MULT:
8179 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8180 return false;
8181
8182 case NEG:
8183 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8184 && (mode == SFmode || !TARGET_VFP_SINGLE))
8185 {
8186 *total = COSTS_N_INSNS (1);
8187 return false;
8188 }
8189
8190 /* Fall through */
8191 case NOT:
8192 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8193
8194 return false;
8195
8196 case IF_THEN_ELSE:
8197 *total = 0;
8198 return false;
8199
8200 case COMPARE:
8201 if (cc_register (XEXP (x, 0), VOIDmode))
8202 * total = 0;
8203 else
8204 *total = COSTS_N_INSNS (1);
8205 return false;
8206
8207 case ABS:
8208 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8209 && (mode == SFmode || !TARGET_VFP_SINGLE))
8210 *total = COSTS_N_INSNS (1);
8211 else
8212 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
8213 return false;
8214
8215 case SIGN_EXTEND:
8216 case ZERO_EXTEND:
8217 return arm_rtx_costs_1 (x, outer_code, total, 0);
8218
8219 case CONST_INT:
8220 if (const_ok_for_arm (INTVAL (x)))
8221 /* A multiplication by a constant requires another instruction
8222 to load the constant to a register. */
8223 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
8224 ? 1 : 0);
8225 else if (const_ok_for_arm (~INTVAL (x)))
8226 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
8227 else if (const_ok_for_arm (-INTVAL (x)))
8228 {
8229 if (outer_code == COMPARE || outer_code == PLUS
8230 || outer_code == MINUS)
8231 *total = 0;
8232 else
8233 *total = COSTS_N_INSNS (1);
8234 }
8235 else
8236 *total = COSTS_N_INSNS (2);
8237 return true;
8238
8239 case CONST:
8240 case LABEL_REF:
8241 case SYMBOL_REF:
8242 *total = COSTS_N_INSNS (2);
8243 return true;
8244
8245 case CONST_DOUBLE:
8246 *total = COSTS_N_INSNS (4);
8247 return true;
8248
8249 case CONST_VECTOR:
8250 if (TARGET_NEON
8251 && TARGET_HARD_FLOAT
8252 && outer_code == SET
8253 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8254 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8255 *total = COSTS_N_INSNS (1);
8256 else
8257 *total = COSTS_N_INSNS (4);
8258 return true;
8259
8260 case HIGH:
8261 case LO_SUM:
8262 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8263 cost of these slightly. */
8264 *total = COSTS_N_INSNS (1) + 1;
8265 return true;
8266
8267 case SET:
8268 return false;
8269
8270 default:
8271 if (mode != VOIDmode)
8272 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8273 else
8274 *total = COSTS_N_INSNS (4); /* How knows? */
8275 return false;
8276 }
8277 }
8278
8279 /* RTX costs when optimizing for size. */
8280 static bool
8281 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
8282 int *total, bool speed)
8283 {
8284 if (!speed)
8285 return arm_size_rtx_costs (x, (enum rtx_code) code,
8286 (enum rtx_code) outer_code, total);
8287 else
8288 return current_tune->rtx_costs (x, (enum rtx_code) code,
8289 (enum rtx_code) outer_code,
8290 total, speed);
8291 }
8292
8293 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
8294 supported on any "slowmul" cores, so it can be ignored. */
8295
8296 static bool
8297 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8298 int *total, bool speed)
8299 {
8300 enum machine_mode mode = GET_MODE (x);
8301
8302 if (TARGET_THUMB)
8303 {
8304 *total = thumb1_rtx_costs (x, code, outer_code);
8305 return true;
8306 }
8307
8308 switch (code)
8309 {
8310 case MULT:
8311 if (GET_MODE_CLASS (mode) == MODE_FLOAT
8312 || mode == DImode)
8313 {
8314 *total = COSTS_N_INSNS (20);
8315 return false;
8316 }
8317
8318 if (CONST_INT_P (XEXP (x, 1)))
8319 {
8320 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8321 & (unsigned HOST_WIDE_INT) 0xffffffff);
8322 int cost, const_ok = const_ok_for_arm (i);
8323 int j, booth_unit_size;
8324
8325 /* Tune as appropriate. */
8326 cost = const_ok ? 4 : 8;
8327 booth_unit_size = 2;
8328 for (j = 0; i && j < 32; j += booth_unit_size)
8329 {
8330 i >>= booth_unit_size;
8331 cost++;
8332 }
8333
8334 *total = COSTS_N_INSNS (cost);
8335 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8336 return true;
8337 }
8338
8339 *total = COSTS_N_INSNS (20);
8340 return false;
8341
8342 default:
8343 return arm_rtx_costs_1 (x, outer_code, total, speed);;
8344 }
8345 }
8346
8347
8348 /* RTX cost for cores with a fast multiply unit (M variants). */
8349
8350 static bool
8351 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8352 int *total, bool speed)
8353 {
8354 enum machine_mode mode = GET_MODE (x);
8355
8356 if (TARGET_THUMB1)
8357 {
8358 *total = thumb1_rtx_costs (x, code, outer_code);
8359 return true;
8360 }
8361
8362 /* ??? should thumb2 use different costs? */
8363 switch (code)
8364 {
8365 case MULT:
8366 /* There is no point basing this on the tuning, since it is always the
8367 fast variant if it exists at all. */
8368 if (mode == DImode
8369 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8370 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8371 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8372 {
8373 *total = COSTS_N_INSNS(2);
8374 return false;
8375 }
8376
8377
8378 if (mode == DImode)
8379 {
8380 *total = COSTS_N_INSNS (5);
8381 return false;
8382 }
8383
8384 if (CONST_INT_P (XEXP (x, 1)))
8385 {
8386 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8387 & (unsigned HOST_WIDE_INT) 0xffffffff);
8388 int cost, const_ok = const_ok_for_arm (i);
8389 int j, booth_unit_size;
8390
8391 /* Tune as appropriate. */
8392 cost = const_ok ? 4 : 8;
8393 booth_unit_size = 8;
8394 for (j = 0; i && j < 32; j += booth_unit_size)
8395 {
8396 i >>= booth_unit_size;
8397 cost++;
8398 }
8399
8400 *total = COSTS_N_INSNS(cost);
8401 return false;
8402 }
8403
8404 if (mode == SImode)
8405 {
8406 *total = COSTS_N_INSNS (4);
8407 return false;
8408 }
8409
8410 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8411 {
8412 if (TARGET_HARD_FLOAT
8413 && (mode == SFmode
8414 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8415 {
8416 *total = COSTS_N_INSNS (1);
8417 return false;
8418 }
8419 }
8420
8421 /* Requires a lib call */
8422 *total = COSTS_N_INSNS (20);
8423 return false;
8424
8425 default:
8426 return arm_rtx_costs_1 (x, outer_code, total, speed);
8427 }
8428 }
8429
8430
8431 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
8432 so it can be ignored. */
8433
8434 static bool
8435 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8436 int *total, bool speed)
8437 {
8438 enum machine_mode mode = GET_MODE (x);
8439
8440 if (TARGET_THUMB)
8441 {
8442 *total = thumb1_rtx_costs (x, code, outer_code);
8443 return true;
8444 }
8445
8446 switch (code)
8447 {
8448 case COMPARE:
8449 if (GET_CODE (XEXP (x, 0)) != MULT)
8450 return arm_rtx_costs_1 (x, outer_code, total, speed);
8451
8452 /* A COMPARE of a MULT is slow on XScale; the muls instruction
8453 will stall until the multiplication is complete. */
8454 *total = COSTS_N_INSNS (3);
8455 return false;
8456
8457 case MULT:
8458 /* There is no point basing this on the tuning, since it is always the
8459 fast variant if it exists at all. */
8460 if (mode == DImode
8461 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8462 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8463 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8464 {
8465 *total = COSTS_N_INSNS (2);
8466 return false;
8467 }
8468
8469
8470 if (mode == DImode)
8471 {
8472 *total = COSTS_N_INSNS (5);
8473 return false;
8474 }
8475
8476 if (CONST_INT_P (XEXP (x, 1)))
8477 {
8478 /* If operand 1 is a constant we can more accurately
8479 calculate the cost of the multiply. The multiplier can
8480 retire 15 bits on the first cycle and a further 12 on the
8481 second. We do, of course, have to load the constant into
8482 a register first. */
8483 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8484 /* There's a general overhead of one cycle. */
8485 int cost = 1;
8486 unsigned HOST_WIDE_INT masked_const;
8487
8488 if (i & 0x80000000)
8489 i = ~i;
8490
8491 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
8492
8493 masked_const = i & 0xffff8000;
8494 if (masked_const != 0)
8495 {
8496 cost++;
8497 masked_const = i & 0xf8000000;
8498 if (masked_const != 0)
8499 cost++;
8500 }
8501 *total = COSTS_N_INSNS (cost);
8502 return false;
8503 }
8504
8505 if (mode == SImode)
8506 {
8507 *total = COSTS_N_INSNS (3);
8508 return false;
8509 }
8510
8511 /* Requires a lib call */
8512 *total = COSTS_N_INSNS (20);
8513 return false;
8514
8515 default:
8516 return arm_rtx_costs_1 (x, outer_code, total, speed);
8517 }
8518 }
8519
8520
8521 /* RTX costs for 9e (and later) cores. */
8522
8523 static bool
8524 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8525 int *total, bool speed)
8526 {
8527 enum machine_mode mode = GET_MODE (x);
8528
8529 if (TARGET_THUMB1)
8530 {
8531 switch (code)
8532 {
8533 case MULT:
8534 *total = COSTS_N_INSNS (3);
8535 return true;
8536
8537 default:
8538 *total = thumb1_rtx_costs (x, code, outer_code);
8539 return true;
8540 }
8541 }
8542
8543 switch (code)
8544 {
8545 case MULT:
8546 /* There is no point basing this on the tuning, since it is always the
8547 fast variant if it exists at all. */
8548 if (mode == DImode
8549 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8550 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8551 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8552 {
8553 *total = COSTS_N_INSNS (2);
8554 return false;
8555 }
8556
8557
8558 if (mode == DImode)
8559 {
8560 *total = COSTS_N_INSNS (5);
8561 return false;
8562 }
8563
8564 if (mode == SImode)
8565 {
8566 *total = COSTS_N_INSNS (2);
8567 return false;
8568 }
8569
8570 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8571 {
8572 if (TARGET_HARD_FLOAT
8573 && (mode == SFmode
8574 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8575 {
8576 *total = COSTS_N_INSNS (1);
8577 return false;
8578 }
8579 }
8580
8581 *total = COSTS_N_INSNS (20);
8582 return false;
8583
8584 default:
8585 return arm_rtx_costs_1 (x, outer_code, total, speed);
8586 }
8587 }
8588 /* All address computations that can be done are free, but rtx cost returns
8589 the same for practically all of them. So we weight the different types
8590 of address here in the order (most pref first):
8591 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8592 static inline int
8593 arm_arm_address_cost (rtx x)
8594 {
8595 enum rtx_code c = GET_CODE (x);
8596
8597 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
8598 return 0;
8599 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
8600 return 10;
8601
8602 if (c == PLUS)
8603 {
8604 if (CONST_INT_P (XEXP (x, 1)))
8605 return 2;
8606
8607 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
8608 return 3;
8609
8610 return 4;
8611 }
8612
8613 return 6;
8614 }
8615
8616 static inline int
8617 arm_thumb_address_cost (rtx x)
8618 {
8619 enum rtx_code c = GET_CODE (x);
8620
8621 if (c == REG)
8622 return 1;
8623 if (c == PLUS
8624 && REG_P (XEXP (x, 0))
8625 && CONST_INT_P (XEXP (x, 1)))
8626 return 1;
8627
8628 return 2;
8629 }
8630
8631 static int
8632 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
8633 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
8634 {
8635 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
8636 }
8637
8638 /* Adjust cost hook for XScale. */
8639 static bool
8640 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8641 {
8642 /* Some true dependencies can have a higher cost depending
8643 on precisely how certain input operands are used. */
8644 if (REG_NOTE_KIND(link) == 0
8645 && recog_memoized (insn) >= 0
8646 && recog_memoized (dep) >= 0)
8647 {
8648 int shift_opnum = get_attr_shift (insn);
8649 enum attr_type attr_type = get_attr_type (dep);
8650
8651 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8652 operand for INSN. If we have a shifted input operand and the
8653 instruction we depend on is another ALU instruction, then we may
8654 have to account for an additional stall. */
8655 if (shift_opnum != 0
8656 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
8657 {
8658 rtx shifted_operand;
8659 int opno;
8660
8661 /* Get the shifted operand. */
8662 extract_insn (insn);
8663 shifted_operand = recog_data.operand[shift_opnum];
8664
8665 /* Iterate over all the operands in DEP. If we write an operand
8666 that overlaps with SHIFTED_OPERAND, then we have increase the
8667 cost of this dependency. */
8668 extract_insn (dep);
8669 preprocess_constraints ();
8670 for (opno = 0; opno < recog_data.n_operands; opno++)
8671 {
8672 /* We can ignore strict inputs. */
8673 if (recog_data.operand_type[opno] == OP_IN)
8674 continue;
8675
8676 if (reg_overlap_mentioned_p (recog_data.operand[opno],
8677 shifted_operand))
8678 {
8679 *cost = 2;
8680 return false;
8681 }
8682 }
8683 }
8684 }
8685 return true;
8686 }
8687
8688 /* Adjust cost hook for Cortex A9. */
8689 static bool
8690 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8691 {
8692 switch (REG_NOTE_KIND (link))
8693 {
8694 case REG_DEP_ANTI:
8695 *cost = 0;
8696 return false;
8697
8698 case REG_DEP_TRUE:
8699 case REG_DEP_OUTPUT:
8700 if (recog_memoized (insn) >= 0
8701 && recog_memoized (dep) >= 0)
8702 {
8703 if (GET_CODE (PATTERN (insn)) == SET)
8704 {
8705 if (GET_MODE_CLASS
8706 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8707 || GET_MODE_CLASS
8708 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8709 {
8710 enum attr_type attr_type_insn = get_attr_type (insn);
8711 enum attr_type attr_type_dep = get_attr_type (dep);
8712
8713 /* By default all dependencies of the form
8714 s0 = s0 <op> s1
8715 s0 = s0 <op> s2
8716 have an extra latency of 1 cycle because
8717 of the input and output dependency in this
8718 case. However this gets modeled as an true
8719 dependency and hence all these checks. */
8720 if (REG_P (SET_DEST (PATTERN (insn)))
8721 && REG_P (SET_DEST (PATTERN (dep)))
8722 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8723 SET_DEST (PATTERN (dep))))
8724 {
8725 /* FMACS is a special case where the dependent
8726 instruction can be issued 3 cycles before
8727 the normal latency in case of an output
8728 dependency. */
8729 if ((attr_type_insn == TYPE_FMACS
8730 || attr_type_insn == TYPE_FMACD)
8731 && (attr_type_dep == TYPE_FMACS
8732 || attr_type_dep == TYPE_FMACD))
8733 {
8734 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8735 *cost = insn_default_latency (dep) - 3;
8736 else
8737 *cost = insn_default_latency (dep);
8738 return false;
8739 }
8740 else
8741 {
8742 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8743 *cost = insn_default_latency (dep) + 1;
8744 else
8745 *cost = insn_default_latency (dep);
8746 }
8747 return false;
8748 }
8749 }
8750 }
8751 }
8752 break;
8753
8754 default:
8755 gcc_unreachable ();
8756 }
8757
8758 return true;
8759 }
8760
8761 /* Adjust cost hook for FA726TE. */
8762 static bool
8763 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8764 {
8765 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8766 have penalty of 3. */
8767 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8768 && recog_memoized (insn) >= 0
8769 && recog_memoized (dep) >= 0
8770 && get_attr_conds (dep) == CONDS_SET)
8771 {
8772 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8773 if (get_attr_conds (insn) == CONDS_USE
8774 && get_attr_type (insn) != TYPE_BRANCH)
8775 {
8776 *cost = 3;
8777 return false;
8778 }
8779
8780 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8781 || get_attr_conds (insn) == CONDS_USE)
8782 {
8783 *cost = 0;
8784 return false;
8785 }
8786 }
8787
8788 return true;
8789 }
8790
8791 /* Implement TARGET_REGISTER_MOVE_COST.
8792
8793 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
8794 it is typically more expensive than a single memory access. We set
8795 the cost to less than two memory accesses so that floating
8796 point to integer conversion does not go through memory. */
8797
8798 int
8799 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
8800 reg_class_t from, reg_class_t to)
8801 {
8802 if (TARGET_32BIT)
8803 {
8804 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
8805 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
8806 return 15;
8807 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
8808 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
8809 return 4;
8810 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
8811 return 20;
8812 else
8813 return 2;
8814 }
8815 else
8816 {
8817 if (from == HI_REGS || to == HI_REGS)
8818 return 4;
8819 else
8820 return 2;
8821 }
8822 }
8823
8824 /* Implement TARGET_MEMORY_MOVE_COST. */
8825
8826 int
8827 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
8828 bool in ATTRIBUTE_UNUSED)
8829 {
8830 if (TARGET_32BIT)
8831 return 10;
8832 else
8833 {
8834 if (GET_MODE_SIZE (mode) < 4)
8835 return 8;
8836 else
8837 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
8838 }
8839 }
8840
8841 /* Vectorizer cost model implementation. */
8842
8843 /* Implement targetm.vectorize.builtin_vectorization_cost. */
8844 static int
8845 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
8846 tree vectype,
8847 int misalign ATTRIBUTE_UNUSED)
8848 {
8849 unsigned elements;
8850
8851 switch (type_of_cost)
8852 {
8853 case scalar_stmt:
8854 return current_tune->vec_costs->scalar_stmt_cost;
8855
8856 case scalar_load:
8857 return current_tune->vec_costs->scalar_load_cost;
8858
8859 case scalar_store:
8860 return current_tune->vec_costs->scalar_store_cost;
8861
8862 case vector_stmt:
8863 return current_tune->vec_costs->vec_stmt_cost;
8864
8865 case vector_load:
8866 return current_tune->vec_costs->vec_align_load_cost;
8867
8868 case vector_store:
8869 return current_tune->vec_costs->vec_store_cost;
8870
8871 case vec_to_scalar:
8872 return current_tune->vec_costs->vec_to_scalar_cost;
8873
8874 case scalar_to_vec:
8875 return current_tune->vec_costs->scalar_to_vec_cost;
8876
8877 case unaligned_load:
8878 return current_tune->vec_costs->vec_unalign_load_cost;
8879
8880 case unaligned_store:
8881 return current_tune->vec_costs->vec_unalign_store_cost;
8882
8883 case cond_branch_taken:
8884 return current_tune->vec_costs->cond_taken_branch_cost;
8885
8886 case cond_branch_not_taken:
8887 return current_tune->vec_costs->cond_not_taken_branch_cost;
8888
8889 case vec_perm:
8890 case vec_promote_demote:
8891 return current_tune->vec_costs->vec_stmt_cost;
8892
8893 case vec_construct:
8894 elements = TYPE_VECTOR_SUBPARTS (vectype);
8895 return elements / 2 + 1;
8896
8897 default:
8898 gcc_unreachable ();
8899 }
8900 }
8901
8902 /* Implement targetm.vectorize.add_stmt_cost. */
8903
8904 static unsigned
8905 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
8906 struct _stmt_vec_info *stmt_info, int misalign,
8907 enum vect_cost_model_location where)
8908 {
8909 unsigned *cost = (unsigned *) data;
8910 unsigned retval = 0;
8911
8912 if (flag_vect_cost_model)
8913 {
8914 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
8915 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
8916
8917 /* Statements in an inner loop relative to the loop being
8918 vectorized are weighted more heavily. The value here is
8919 arbitrary and could potentially be improved with analysis. */
8920 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
8921 count *= 50; /* FIXME. */
8922
8923 retval = (unsigned) (count * stmt_cost);
8924 cost[where] += retval;
8925 }
8926
8927 return retval;
8928 }
8929
8930 /* Return true if and only if this insn can dual-issue only as older. */
8931 static bool
8932 cortexa7_older_only (rtx insn)
8933 {
8934 if (recog_memoized (insn) < 0)
8935 return false;
8936
8937 if (get_attr_insn (insn) == INSN_MOV)
8938 return false;
8939
8940 switch (get_attr_type (insn))
8941 {
8942 case TYPE_ALU_REG:
8943 case TYPE_LOAD_BYTE:
8944 case TYPE_LOAD1:
8945 case TYPE_STORE1:
8946 case TYPE_FFARITHS:
8947 case TYPE_FADDS:
8948 case TYPE_FFARITHD:
8949 case TYPE_FADDD:
8950 case TYPE_FCPYS:
8951 case TYPE_F_CVT:
8952 case TYPE_FCMPS:
8953 case TYPE_FCMPD:
8954 case TYPE_FCONSTS:
8955 case TYPE_FCONSTD:
8956 case TYPE_FMULS:
8957 case TYPE_FMACS:
8958 case TYPE_FMULD:
8959 case TYPE_FMACD:
8960 case TYPE_FDIVS:
8961 case TYPE_FDIVD:
8962 case TYPE_F_2_R:
8963 case TYPE_F_FLAG:
8964 case TYPE_F_LOADS:
8965 case TYPE_F_STORES:
8966 return true;
8967 default:
8968 return false;
8969 }
8970 }
8971
8972 /* Return true if and only if this insn can dual-issue as younger. */
8973 static bool
8974 cortexa7_younger (FILE *file, int verbose, rtx insn)
8975 {
8976 if (recog_memoized (insn) < 0)
8977 {
8978 if (verbose > 5)
8979 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
8980 return false;
8981 }
8982
8983 if (get_attr_insn (insn) == INSN_MOV)
8984 return true;
8985
8986 switch (get_attr_type (insn))
8987 {
8988 case TYPE_SIMPLE_ALU_IMM:
8989 case TYPE_SIMPLE_ALU_SHIFT:
8990 case TYPE_BRANCH:
8991 case TYPE_CALL:
8992 return true;
8993 default:
8994 return false;
8995 }
8996 }
8997
8998
8999 /* Look for an instruction that can dual issue only as an older
9000 instruction, and move it in front of any instructions that can
9001 dual-issue as younger, while preserving the relative order of all
9002 other instructions in the ready list. This is a hueuristic to help
9003 dual-issue in later cycles, by postponing issue of more flexible
9004 instructions. This heuristic may affect dual issue opportunities
9005 in the current cycle. */
9006 static void
9007 cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
9008 int clock)
9009 {
9010 int i;
9011 int first_older_only = -1, first_younger = -1;
9012
9013 if (verbose > 5)
9014 fprintf (file,
9015 ";; sched_reorder for cycle %d with %d insns in ready list\n",
9016 clock,
9017 *n_readyp);
9018
9019 /* Traverse the ready list from the head (the instruction to issue
9020 first), and looking for the first instruction that can issue as
9021 younger and the first instruction that can dual-issue only as
9022 older. */
9023 for (i = *n_readyp - 1; i >= 0; i--)
9024 {
9025 rtx insn = ready[i];
9026 if (cortexa7_older_only (insn))
9027 {
9028 first_older_only = i;
9029 if (verbose > 5)
9030 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
9031 break;
9032 }
9033 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
9034 first_younger = i;
9035 }
9036
9037 /* Nothing to reorder because either no younger insn found or insn
9038 that can dual-issue only as older appears before any insn that
9039 can dual-issue as younger. */
9040 if (first_younger == -1)
9041 {
9042 if (verbose > 5)
9043 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
9044 return;
9045 }
9046
9047 /* Nothing to reorder because no older-only insn in the ready list. */
9048 if (first_older_only == -1)
9049 {
9050 if (verbose > 5)
9051 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
9052 return;
9053 }
9054
9055 /* Move first_older_only insn before first_younger. */
9056 if (verbose > 5)
9057 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
9058 INSN_UID(ready [first_older_only]),
9059 INSN_UID(ready [first_younger]));
9060 rtx first_older_only_insn = ready [first_older_only];
9061 for (i = first_older_only; i < first_younger; i++)
9062 {
9063 ready[i] = ready[i+1];
9064 }
9065
9066 ready[i] = first_older_only_insn;
9067 return;
9068 }
9069
9070 /* Implement TARGET_SCHED_REORDER. */
9071 static int
9072 arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
9073 int clock)
9074 {
9075 switch (arm_tune)
9076 {
9077 case cortexa7:
9078 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
9079 break;
9080 default:
9081 /* Do nothing for other cores. */
9082 break;
9083 }
9084
9085 return arm_issue_rate ();
9086 }
9087
9088 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
9089 It corrects the value of COST based on the relationship between
9090 INSN and DEP through the dependence LINK. It returns the new
9091 value. There is a per-core adjust_cost hook to adjust scheduler costs
9092 and the per-core hook can choose to completely override the generic
9093 adjust_cost function. Only put bits of code into arm_adjust_cost that
9094 are common across all cores. */
9095 static int
9096 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
9097 {
9098 rtx i_pat, d_pat;
9099
9100 /* When generating Thumb-1 code, we want to place flag-setting operations
9101 close to a conditional branch which depends on them, so that we can
9102 omit the comparison. */
9103 if (TARGET_THUMB1
9104 && REG_NOTE_KIND (link) == 0
9105 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
9106 && recog_memoized (dep) >= 0
9107 && get_attr_conds (dep) == CONDS_SET)
9108 return 0;
9109
9110 if (current_tune->sched_adjust_cost != NULL)
9111 {
9112 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
9113 return cost;
9114 }
9115
9116 /* XXX Is this strictly true? */
9117 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9118 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
9119 return 0;
9120
9121 /* Call insns don't incur a stall, even if they follow a load. */
9122 if (REG_NOTE_KIND (link) == 0
9123 && CALL_P (insn))
9124 return 1;
9125
9126 if ((i_pat = single_set (insn)) != NULL
9127 && MEM_P (SET_SRC (i_pat))
9128 && (d_pat = single_set (dep)) != NULL
9129 && MEM_P (SET_DEST (d_pat)))
9130 {
9131 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
9132 /* This is a load after a store, there is no conflict if the load reads
9133 from a cached area. Assume that loads from the stack, and from the
9134 constant pool are cached, and that others will miss. This is a
9135 hack. */
9136
9137 if ((GET_CODE (src_mem) == SYMBOL_REF
9138 && CONSTANT_POOL_ADDRESS_P (src_mem))
9139 || reg_mentioned_p (stack_pointer_rtx, src_mem)
9140 || reg_mentioned_p (frame_pointer_rtx, src_mem)
9141 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
9142 return 1;
9143 }
9144
9145 return cost;
9146 }
9147
9148 static int
9149 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
9150 {
9151 if (TARGET_32BIT)
9152 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
9153 else
9154 return (optimize > 0) ? 2 : 0;
9155 }
9156
9157 static int
9158 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
9159 {
9160 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
9161 }
9162
9163 static bool fp_consts_inited = false;
9164
9165 static REAL_VALUE_TYPE value_fp0;
9166
9167 static void
9168 init_fp_table (void)
9169 {
9170 REAL_VALUE_TYPE r;
9171
9172 r = REAL_VALUE_ATOF ("0", DFmode);
9173 value_fp0 = r;
9174 fp_consts_inited = true;
9175 }
9176
9177 /* Return TRUE if rtx X is a valid immediate FP constant. */
9178 int
9179 arm_const_double_rtx (rtx x)
9180 {
9181 REAL_VALUE_TYPE r;
9182
9183 if (!fp_consts_inited)
9184 init_fp_table ();
9185
9186 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9187 if (REAL_VALUE_MINUS_ZERO (r))
9188 return 0;
9189
9190 if (REAL_VALUES_EQUAL (r, value_fp0))
9191 return 1;
9192
9193 return 0;
9194 }
9195
9196 /* VFPv3 has a fairly wide range of representable immediates, formed from
9197 "quarter-precision" floating-point values. These can be evaluated using this
9198 formula (with ^ for exponentiation):
9199
9200 -1^s * n * 2^-r
9201
9202 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
9203 16 <= n <= 31 and 0 <= r <= 7.
9204
9205 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
9206
9207 - A (most-significant) is the sign bit.
9208 - BCD are the exponent (encoded as r XOR 3).
9209 - EFGH are the mantissa (encoded as n - 16).
9210 */
9211
9212 /* Return an integer index for a VFPv3 immediate operand X suitable for the
9213 fconst[sd] instruction, or -1 if X isn't suitable. */
9214 static int
9215 vfp3_const_double_index (rtx x)
9216 {
9217 REAL_VALUE_TYPE r, m;
9218 int sign, exponent;
9219 unsigned HOST_WIDE_INT mantissa, mant_hi;
9220 unsigned HOST_WIDE_INT mask;
9221 HOST_WIDE_INT m1, m2;
9222 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
9223
9224 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
9225 return -1;
9226
9227 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9228
9229 /* We can't represent these things, so detect them first. */
9230 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
9231 return -1;
9232
9233 /* Extract sign, exponent and mantissa. */
9234 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
9235 r = real_value_abs (&r);
9236 exponent = REAL_EXP (&r);
9237 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
9238 highest (sign) bit, with a fixed binary point at bit point_pos.
9239 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
9240 bits for the mantissa, this may fail (low bits would be lost). */
9241 real_ldexp (&m, &r, point_pos - exponent);
9242 REAL_VALUE_TO_INT (&m1, &m2, m);
9243 mantissa = m1;
9244 mant_hi = m2;
9245
9246 /* If there are bits set in the low part of the mantissa, we can't
9247 represent this value. */
9248 if (mantissa != 0)
9249 return -1;
9250
9251 /* Now make it so that mantissa contains the most-significant bits, and move
9252 the point_pos to indicate that the least-significant bits have been
9253 discarded. */
9254 point_pos -= HOST_BITS_PER_WIDE_INT;
9255 mantissa = mant_hi;
9256
9257 /* We can permit four significant bits of mantissa only, plus a high bit
9258 which is always 1. */
9259 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
9260 if ((mantissa & mask) != 0)
9261 return -1;
9262
9263 /* Now we know the mantissa is in range, chop off the unneeded bits. */
9264 mantissa >>= point_pos - 5;
9265
9266 /* The mantissa may be zero. Disallow that case. (It's possible to load the
9267 floating-point immediate zero with Neon using an integer-zero load, but
9268 that case is handled elsewhere.) */
9269 if (mantissa == 0)
9270 return -1;
9271
9272 gcc_assert (mantissa >= 16 && mantissa <= 31);
9273
9274 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
9275 normalized significands are in the range [1, 2). (Our mantissa is shifted
9276 left 4 places at this point relative to normalized IEEE754 values). GCC
9277 internally uses [0.5, 1) (see real.c), so the exponent returned from
9278 REAL_EXP must be altered. */
9279 exponent = 5 - exponent;
9280
9281 if (exponent < 0 || exponent > 7)
9282 return -1;
9283
9284 /* Sign, mantissa and exponent are now in the correct form to plug into the
9285 formula described in the comment above. */
9286 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
9287 }
9288
9289 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
9290 int
9291 vfp3_const_double_rtx (rtx x)
9292 {
9293 if (!TARGET_VFP3)
9294 return 0;
9295
9296 return vfp3_const_double_index (x) != -1;
9297 }
9298
9299 /* Recognize immediates which can be used in various Neon instructions. Legal
9300 immediates are described by the following table (for VMVN variants, the
9301 bitwise inverse of the constant shown is recognized. In either case, VMOV
9302 is output and the correct instruction to use for a given constant is chosen
9303 by the assembler). The constant shown is replicated across all elements of
9304 the destination vector.
9305
9306 insn elems variant constant (binary)
9307 ---- ----- ------- -----------------
9308 vmov i32 0 00000000 00000000 00000000 abcdefgh
9309 vmov i32 1 00000000 00000000 abcdefgh 00000000
9310 vmov i32 2 00000000 abcdefgh 00000000 00000000
9311 vmov i32 3 abcdefgh 00000000 00000000 00000000
9312 vmov i16 4 00000000 abcdefgh
9313 vmov i16 5 abcdefgh 00000000
9314 vmvn i32 6 00000000 00000000 00000000 abcdefgh
9315 vmvn i32 7 00000000 00000000 abcdefgh 00000000
9316 vmvn i32 8 00000000 abcdefgh 00000000 00000000
9317 vmvn i32 9 abcdefgh 00000000 00000000 00000000
9318 vmvn i16 10 00000000 abcdefgh
9319 vmvn i16 11 abcdefgh 00000000
9320 vmov i32 12 00000000 00000000 abcdefgh 11111111
9321 vmvn i32 13 00000000 00000000 abcdefgh 11111111
9322 vmov i32 14 00000000 abcdefgh 11111111 11111111
9323 vmvn i32 15 00000000 abcdefgh 11111111 11111111
9324 vmov i8 16 abcdefgh
9325 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
9326 eeeeeeee ffffffff gggggggg hhhhhhhh
9327 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
9328 vmov f32 19 00000000 00000000 00000000 00000000
9329
9330 For case 18, B = !b. Representable values are exactly those accepted by
9331 vfp3_const_double_index, but are output as floating-point numbers rather
9332 than indices.
9333
9334 For case 19, we will change it to vmov.i32 when assembling.
9335
9336 Variants 0-5 (inclusive) may also be used as immediates for the second
9337 operand of VORR/VBIC instructions.
9338
9339 The INVERSE argument causes the bitwise inverse of the given operand to be
9340 recognized instead (used for recognizing legal immediates for the VAND/VORN
9341 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
9342 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
9343 output, rather than the real insns vbic/vorr).
9344
9345 INVERSE makes no difference to the recognition of float vectors.
9346
9347 The return value is the variant of immediate as shown in the above table, or
9348 -1 if the given value doesn't match any of the listed patterns.
9349 */
9350 static int
9351 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
9352 rtx *modconst, int *elementwidth)
9353 {
9354 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
9355 matches = 1; \
9356 for (i = 0; i < idx; i += (STRIDE)) \
9357 if (!(TEST)) \
9358 matches = 0; \
9359 if (matches) \
9360 { \
9361 immtype = (CLASS); \
9362 elsize = (ELSIZE); \
9363 break; \
9364 }
9365
9366 unsigned int i, elsize = 0, idx = 0, n_elts;
9367 unsigned int innersize;
9368 unsigned char bytes[16];
9369 int immtype = -1, matches;
9370 unsigned int invmask = inverse ? 0xff : 0;
9371 bool vector = GET_CODE (op) == CONST_VECTOR;
9372
9373 if (vector)
9374 {
9375 n_elts = CONST_VECTOR_NUNITS (op);
9376 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9377 }
9378 else
9379 {
9380 n_elts = 1;
9381 if (mode == VOIDmode)
9382 mode = DImode;
9383 innersize = GET_MODE_SIZE (mode);
9384 }
9385
9386 /* Vectors of float constants. */
9387 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
9388 {
9389 rtx el0 = CONST_VECTOR_ELT (op, 0);
9390 REAL_VALUE_TYPE r0;
9391
9392 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
9393 return -1;
9394
9395 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
9396
9397 for (i = 1; i < n_elts; i++)
9398 {
9399 rtx elt = CONST_VECTOR_ELT (op, i);
9400 REAL_VALUE_TYPE re;
9401
9402 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
9403
9404 if (!REAL_VALUES_EQUAL (r0, re))
9405 return -1;
9406 }
9407
9408 if (modconst)
9409 *modconst = CONST_VECTOR_ELT (op, 0);
9410
9411 if (elementwidth)
9412 *elementwidth = 0;
9413
9414 if (el0 == CONST0_RTX (GET_MODE (el0)))
9415 return 19;
9416 else
9417 return 18;
9418 }
9419
9420 /* Splat vector constant out into a byte vector. */
9421 for (i = 0; i < n_elts; i++)
9422 {
9423 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
9424 unsigned HOST_WIDE_INT elpart;
9425 unsigned int part, parts;
9426
9427 if (CONST_INT_P (el))
9428 {
9429 elpart = INTVAL (el);
9430 parts = 1;
9431 }
9432 else if (CONST_DOUBLE_P (el))
9433 {
9434 elpart = CONST_DOUBLE_LOW (el);
9435 parts = 2;
9436 }
9437 else
9438 gcc_unreachable ();
9439
9440 for (part = 0; part < parts; part++)
9441 {
9442 unsigned int byte;
9443 for (byte = 0; byte < innersize; byte++)
9444 {
9445 bytes[idx++] = (elpart & 0xff) ^ invmask;
9446 elpart >>= BITS_PER_UNIT;
9447 }
9448 if (CONST_DOUBLE_P (el))
9449 elpart = CONST_DOUBLE_HIGH (el);
9450 }
9451 }
9452
9453 /* Sanity check. */
9454 gcc_assert (idx == GET_MODE_SIZE (mode));
9455
9456 do
9457 {
9458 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
9459 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9460
9461 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
9462 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9463
9464 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
9465 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9466
9467 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
9468 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
9469
9470 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
9471
9472 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
9473
9474 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
9475 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9476
9477 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
9478 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9479
9480 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
9481 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9482
9483 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
9484 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
9485
9486 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
9487
9488 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
9489
9490 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
9491 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9492
9493 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
9494 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9495
9496 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
9497 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9498
9499 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
9500 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9501
9502 CHECK (1, 8, 16, bytes[i] == bytes[0]);
9503
9504 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
9505 && bytes[i] == bytes[(i + 8) % idx]);
9506 }
9507 while (0);
9508
9509 if (immtype == -1)
9510 return -1;
9511
9512 if (elementwidth)
9513 *elementwidth = elsize;
9514
9515 if (modconst)
9516 {
9517 unsigned HOST_WIDE_INT imm = 0;
9518
9519 /* Un-invert bytes of recognized vector, if necessary. */
9520 if (invmask != 0)
9521 for (i = 0; i < idx; i++)
9522 bytes[i] ^= invmask;
9523
9524 if (immtype == 17)
9525 {
9526 /* FIXME: Broken on 32-bit H_W_I hosts. */
9527 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
9528
9529 for (i = 0; i < 8; i++)
9530 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
9531 << (i * BITS_PER_UNIT);
9532
9533 *modconst = GEN_INT (imm);
9534 }
9535 else
9536 {
9537 unsigned HOST_WIDE_INT imm = 0;
9538
9539 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
9540 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
9541
9542 *modconst = GEN_INT (imm);
9543 }
9544 }
9545
9546 return immtype;
9547 #undef CHECK
9548 }
9549
9550 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
9551 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
9552 float elements), and a modified constant (whatever should be output for a
9553 VMOV) in *MODCONST. */
9554
9555 int
9556 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
9557 rtx *modconst, int *elementwidth)
9558 {
9559 rtx tmpconst;
9560 int tmpwidth;
9561 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
9562
9563 if (retval == -1)
9564 return 0;
9565
9566 if (modconst)
9567 *modconst = tmpconst;
9568
9569 if (elementwidth)
9570 *elementwidth = tmpwidth;
9571
9572 return 1;
9573 }
9574
9575 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
9576 the immediate is valid, write a constant suitable for using as an operand
9577 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
9578 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
9579
9580 int
9581 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
9582 rtx *modconst, int *elementwidth)
9583 {
9584 rtx tmpconst;
9585 int tmpwidth;
9586 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
9587
9588 if (retval < 0 || retval > 5)
9589 return 0;
9590
9591 if (modconst)
9592 *modconst = tmpconst;
9593
9594 if (elementwidth)
9595 *elementwidth = tmpwidth;
9596
9597 return 1;
9598 }
9599
9600 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
9601 the immediate is valid, write a constant suitable for using as an operand
9602 to VSHR/VSHL to *MODCONST and the corresponding element width to
9603 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
9604 because they have different limitations. */
9605
9606 int
9607 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
9608 rtx *modconst, int *elementwidth,
9609 bool isleftshift)
9610 {
9611 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9612 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
9613 unsigned HOST_WIDE_INT last_elt = 0;
9614 unsigned HOST_WIDE_INT maxshift;
9615
9616 /* Split vector constant out into a byte vector. */
9617 for (i = 0; i < n_elts; i++)
9618 {
9619 rtx el = CONST_VECTOR_ELT (op, i);
9620 unsigned HOST_WIDE_INT elpart;
9621
9622 if (CONST_INT_P (el))
9623 elpart = INTVAL (el);
9624 else if (CONST_DOUBLE_P (el))
9625 return 0;
9626 else
9627 gcc_unreachable ();
9628
9629 if (i != 0 && elpart != last_elt)
9630 return 0;
9631
9632 last_elt = elpart;
9633 }
9634
9635 /* Shift less than element size. */
9636 maxshift = innersize * 8;
9637
9638 if (isleftshift)
9639 {
9640 /* Left shift immediate value can be from 0 to <size>-1. */
9641 if (last_elt >= maxshift)
9642 return 0;
9643 }
9644 else
9645 {
9646 /* Right shift immediate value can be from 1 to <size>. */
9647 if (last_elt == 0 || last_elt > maxshift)
9648 return 0;
9649 }
9650
9651 if (elementwidth)
9652 *elementwidth = innersize * 8;
9653
9654 if (modconst)
9655 *modconst = CONST_VECTOR_ELT (op, 0);
9656
9657 return 1;
9658 }
9659
9660 /* Return a string suitable for output of Neon immediate logic operation
9661 MNEM. */
9662
9663 char *
9664 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
9665 int inverse, int quad)
9666 {
9667 int width, is_valid;
9668 static char templ[40];
9669
9670 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
9671
9672 gcc_assert (is_valid != 0);
9673
9674 if (quad)
9675 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
9676 else
9677 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
9678
9679 return templ;
9680 }
9681
9682 /* Return a string suitable for output of Neon immediate shift operation
9683 (VSHR or VSHL) MNEM. */
9684
9685 char *
9686 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
9687 enum machine_mode mode, int quad,
9688 bool isleftshift)
9689 {
9690 int width, is_valid;
9691 static char templ[40];
9692
9693 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
9694 gcc_assert (is_valid != 0);
9695
9696 if (quad)
9697 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
9698 else
9699 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
9700
9701 return templ;
9702 }
9703
9704 /* Output a sequence of pairwise operations to implement a reduction.
9705 NOTE: We do "too much work" here, because pairwise operations work on two
9706 registers-worth of operands in one go. Unfortunately we can't exploit those
9707 extra calculations to do the full operation in fewer steps, I don't think.
9708 Although all vector elements of the result but the first are ignored, we
9709 actually calculate the same result in each of the elements. An alternative
9710 such as initially loading a vector with zero to use as each of the second
9711 operands would use up an additional register and take an extra instruction,
9712 for no particular gain. */
9713
9714 void
9715 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
9716 rtx (*reduc) (rtx, rtx, rtx))
9717 {
9718 enum machine_mode inner = GET_MODE_INNER (mode);
9719 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
9720 rtx tmpsum = op1;
9721
9722 for (i = parts / 2; i >= 1; i /= 2)
9723 {
9724 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
9725 emit_insn (reduc (dest, tmpsum, tmpsum));
9726 tmpsum = dest;
9727 }
9728 }
9729
9730 /* If VALS is a vector constant that can be loaded into a register
9731 using VDUP, generate instructions to do so and return an RTX to
9732 assign to the register. Otherwise return NULL_RTX. */
9733
9734 static rtx
9735 neon_vdup_constant (rtx vals)
9736 {
9737 enum machine_mode mode = GET_MODE (vals);
9738 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9739 int n_elts = GET_MODE_NUNITS (mode);
9740 bool all_same = true;
9741 rtx x;
9742 int i;
9743
9744 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
9745 return NULL_RTX;
9746
9747 for (i = 0; i < n_elts; ++i)
9748 {
9749 x = XVECEXP (vals, 0, i);
9750 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9751 all_same = false;
9752 }
9753
9754 if (!all_same)
9755 /* The elements are not all the same. We could handle repeating
9756 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9757 {0, C, 0, C, 0, C, 0, C} which can be loaded using
9758 vdup.i16). */
9759 return NULL_RTX;
9760
9761 /* We can load this constant by using VDUP and a constant in a
9762 single ARM register. This will be cheaper than a vector
9763 load. */
9764
9765 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9766 return gen_rtx_VEC_DUPLICATE (mode, x);
9767 }
9768
9769 /* Generate code to load VALS, which is a PARALLEL containing only
9770 constants (for vec_init) or CONST_VECTOR, efficiently into a
9771 register. Returns an RTX to copy into the register, or NULL_RTX
9772 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9773
9774 rtx
9775 neon_make_constant (rtx vals)
9776 {
9777 enum machine_mode mode = GET_MODE (vals);
9778 rtx target;
9779 rtx const_vec = NULL_RTX;
9780 int n_elts = GET_MODE_NUNITS (mode);
9781 int n_const = 0;
9782 int i;
9783
9784 if (GET_CODE (vals) == CONST_VECTOR)
9785 const_vec = vals;
9786 else if (GET_CODE (vals) == PARALLEL)
9787 {
9788 /* A CONST_VECTOR must contain only CONST_INTs and
9789 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9790 Only store valid constants in a CONST_VECTOR. */
9791 for (i = 0; i < n_elts; ++i)
9792 {
9793 rtx x = XVECEXP (vals, 0, i);
9794 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
9795 n_const++;
9796 }
9797 if (n_const == n_elts)
9798 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9799 }
9800 else
9801 gcc_unreachable ();
9802
9803 if (const_vec != NULL
9804 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
9805 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9806 return const_vec;
9807 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
9808 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9809 pipeline cycle; creating the constant takes one or two ARM
9810 pipeline cycles. */
9811 return target;
9812 else if (const_vec != NULL_RTX)
9813 /* Load from constant pool. On Cortex-A8 this takes two cycles
9814 (for either double or quad vectors). We can not take advantage
9815 of single-cycle VLD1 because we need a PC-relative addressing
9816 mode. */
9817 return const_vec;
9818 else
9819 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9820 We can not construct an initializer. */
9821 return NULL_RTX;
9822 }
9823
9824 /* Initialize vector TARGET to VALS. */
9825
9826 void
9827 neon_expand_vector_init (rtx target, rtx vals)
9828 {
9829 enum machine_mode mode = GET_MODE (target);
9830 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9831 int n_elts = GET_MODE_NUNITS (mode);
9832 int n_var = 0, one_var = -1;
9833 bool all_same = true;
9834 rtx x, mem;
9835 int i;
9836
9837 for (i = 0; i < n_elts; ++i)
9838 {
9839 x = XVECEXP (vals, 0, i);
9840 if (!CONSTANT_P (x))
9841 ++n_var, one_var = i;
9842
9843 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9844 all_same = false;
9845 }
9846
9847 if (n_var == 0)
9848 {
9849 rtx constant = neon_make_constant (vals);
9850 if (constant != NULL_RTX)
9851 {
9852 emit_move_insn (target, constant);
9853 return;
9854 }
9855 }
9856
9857 /* Splat a single non-constant element if we can. */
9858 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
9859 {
9860 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9861 emit_insn (gen_rtx_SET (VOIDmode, target,
9862 gen_rtx_VEC_DUPLICATE (mode, x)));
9863 return;
9864 }
9865
9866 /* One field is non-constant. Load constant then overwrite varying
9867 field. This is more efficient than using the stack. */
9868 if (n_var == 1)
9869 {
9870 rtx copy = copy_rtx (vals);
9871 rtx index = GEN_INT (one_var);
9872
9873 /* Load constant part of vector, substitute neighboring value for
9874 varying element. */
9875 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
9876 neon_expand_vector_init (target, copy);
9877
9878 /* Insert variable. */
9879 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
9880 switch (mode)
9881 {
9882 case V8QImode:
9883 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
9884 break;
9885 case V16QImode:
9886 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
9887 break;
9888 case V4HImode:
9889 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
9890 break;
9891 case V8HImode:
9892 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
9893 break;
9894 case V2SImode:
9895 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
9896 break;
9897 case V4SImode:
9898 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
9899 break;
9900 case V2SFmode:
9901 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
9902 break;
9903 case V4SFmode:
9904 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
9905 break;
9906 case V2DImode:
9907 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
9908 break;
9909 default:
9910 gcc_unreachable ();
9911 }
9912 return;
9913 }
9914
9915 /* Construct the vector in memory one field at a time
9916 and load the whole vector. */
9917 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
9918 for (i = 0; i < n_elts; i++)
9919 emit_move_insn (adjust_address_nv (mem, inner_mode,
9920 i * GET_MODE_SIZE (inner_mode)),
9921 XVECEXP (vals, 0, i));
9922 emit_move_insn (target, mem);
9923 }
9924
9925 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9926 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9927 reported source locations are bogus. */
9928
9929 static void
9930 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9931 const char *err)
9932 {
9933 HOST_WIDE_INT lane;
9934
9935 gcc_assert (CONST_INT_P (operand));
9936
9937 lane = INTVAL (operand);
9938
9939 if (lane < low || lane >= high)
9940 error (err);
9941 }
9942
9943 /* Bounds-check lanes. */
9944
9945 void
9946 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9947 {
9948 bounds_check (operand, low, high, "lane out of range");
9949 }
9950
9951 /* Bounds-check constants. */
9952
9953 void
9954 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9955 {
9956 bounds_check (operand, low, high, "constant out of range");
9957 }
9958
9959 HOST_WIDE_INT
9960 neon_element_bits (enum machine_mode mode)
9961 {
9962 if (mode == DImode)
9963 return GET_MODE_BITSIZE (mode);
9964 else
9965 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
9966 }
9967
9968 \f
9969 /* Predicates for `match_operand' and `match_operator'. */
9970
9971 /* Return TRUE if OP is a valid coprocessor memory address pattern.
9972 WB is true if full writeback address modes are allowed and is false
9973 if limited writeback address modes (POST_INC and PRE_DEC) are
9974 allowed. */
9975
9976 int
9977 arm_coproc_mem_operand (rtx op, bool wb)
9978 {
9979 rtx ind;
9980
9981 /* Reject eliminable registers. */
9982 if (! (reload_in_progress || reload_completed)
9983 && ( reg_mentioned_p (frame_pointer_rtx, op)
9984 || reg_mentioned_p (arg_pointer_rtx, op)
9985 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9986 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9987 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9988 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9989 return FALSE;
9990
9991 /* Constants are converted into offsets from labels. */
9992 if (!MEM_P (op))
9993 return FALSE;
9994
9995 ind = XEXP (op, 0);
9996
9997 if (reload_completed
9998 && (GET_CODE (ind) == LABEL_REF
9999 || (GET_CODE (ind) == CONST
10000 && GET_CODE (XEXP (ind, 0)) == PLUS
10001 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
10002 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
10003 return TRUE;
10004
10005 /* Match: (mem (reg)). */
10006 if (REG_P (ind))
10007 return arm_address_register_rtx_p (ind, 0);
10008
10009 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
10010 acceptable in any case (subject to verification by
10011 arm_address_register_rtx_p). We need WB to be true to accept
10012 PRE_INC and POST_DEC. */
10013 if (GET_CODE (ind) == POST_INC
10014 || GET_CODE (ind) == PRE_DEC
10015 || (wb
10016 && (GET_CODE (ind) == PRE_INC
10017 || GET_CODE (ind) == POST_DEC)))
10018 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
10019
10020 if (wb
10021 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
10022 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
10023 && GET_CODE (XEXP (ind, 1)) == PLUS
10024 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
10025 ind = XEXP (ind, 1);
10026
10027 /* Match:
10028 (plus (reg)
10029 (const)). */
10030 if (GET_CODE (ind) == PLUS
10031 && REG_P (XEXP (ind, 0))
10032 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
10033 && CONST_INT_P (XEXP (ind, 1))
10034 && INTVAL (XEXP (ind, 1)) > -1024
10035 && INTVAL (XEXP (ind, 1)) < 1024
10036 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
10037 return TRUE;
10038
10039 return FALSE;
10040 }
10041
10042 /* Return TRUE if OP is a memory operand which we can load or store a vector
10043 to/from. TYPE is one of the following values:
10044 0 - Vector load/stor (vldr)
10045 1 - Core registers (ldm)
10046 2 - Element/structure loads (vld1)
10047 */
10048 int
10049 neon_vector_mem_operand (rtx op, int type, bool strict)
10050 {
10051 rtx ind;
10052
10053 /* Reject eliminable registers. */
10054 if (! (reload_in_progress || reload_completed)
10055 && ( reg_mentioned_p (frame_pointer_rtx, op)
10056 || reg_mentioned_p (arg_pointer_rtx, op)
10057 || reg_mentioned_p (virtual_incoming_args_rtx, op)
10058 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
10059 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
10060 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
10061 return !strict;
10062
10063 /* Constants are converted into offsets from labels. */
10064 if (!MEM_P (op))
10065 return FALSE;
10066
10067 ind = XEXP (op, 0);
10068
10069 if (reload_completed
10070 && (GET_CODE (ind) == LABEL_REF
10071 || (GET_CODE (ind) == CONST
10072 && GET_CODE (XEXP (ind, 0)) == PLUS
10073 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
10074 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
10075 return TRUE;
10076
10077 /* Match: (mem (reg)). */
10078 if (REG_P (ind))
10079 return arm_address_register_rtx_p (ind, 0);
10080
10081 /* Allow post-increment with Neon registers. */
10082 if ((type != 1 && GET_CODE (ind) == POST_INC)
10083 || (type == 0 && GET_CODE (ind) == PRE_DEC))
10084 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
10085
10086 /* FIXME: vld1 allows register post-modify. */
10087
10088 /* Match:
10089 (plus (reg)
10090 (const)). */
10091 if (type == 0
10092 && GET_CODE (ind) == PLUS
10093 && REG_P (XEXP (ind, 0))
10094 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
10095 && CONST_INT_P (XEXP (ind, 1))
10096 && INTVAL (XEXP (ind, 1)) > -1024
10097 /* For quad modes, we restrict the constant offset to be slightly less
10098 than what the instruction format permits. We have no such constraint
10099 on double mode offsets. (This must match arm_legitimate_index_p.) */
10100 && (INTVAL (XEXP (ind, 1))
10101 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
10102 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
10103 return TRUE;
10104
10105 return FALSE;
10106 }
10107
10108 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
10109 type. */
10110 int
10111 neon_struct_mem_operand (rtx op)
10112 {
10113 rtx ind;
10114
10115 /* Reject eliminable registers. */
10116 if (! (reload_in_progress || reload_completed)
10117 && ( reg_mentioned_p (frame_pointer_rtx, op)
10118 || reg_mentioned_p (arg_pointer_rtx, op)
10119 || reg_mentioned_p (virtual_incoming_args_rtx, op)
10120 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
10121 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
10122 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
10123 return FALSE;
10124
10125 /* Constants are converted into offsets from labels. */
10126 if (!MEM_P (op))
10127 return FALSE;
10128
10129 ind = XEXP (op, 0);
10130
10131 if (reload_completed
10132 && (GET_CODE (ind) == LABEL_REF
10133 || (GET_CODE (ind) == CONST
10134 && GET_CODE (XEXP (ind, 0)) == PLUS
10135 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
10136 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
10137 return TRUE;
10138
10139 /* Match: (mem (reg)). */
10140 if (REG_P (ind))
10141 return arm_address_register_rtx_p (ind, 0);
10142
10143 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
10144 if (GET_CODE (ind) == POST_INC
10145 || GET_CODE (ind) == PRE_DEC)
10146 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
10147
10148 return FALSE;
10149 }
10150
10151 /* Return true if X is a register that will be eliminated later on. */
10152 int
10153 arm_eliminable_register (rtx x)
10154 {
10155 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
10156 || REGNO (x) == ARG_POINTER_REGNUM
10157 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
10158 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
10159 }
10160
10161 /* Return GENERAL_REGS if a scratch register required to reload x to/from
10162 coprocessor registers. Otherwise return NO_REGS. */
10163
10164 enum reg_class
10165 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
10166 {
10167 if (mode == HFmode)
10168 {
10169 if (!TARGET_NEON_FP16)
10170 return GENERAL_REGS;
10171 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
10172 return NO_REGS;
10173 return GENERAL_REGS;
10174 }
10175
10176 /* The neon move patterns handle all legitimate vector and struct
10177 addresses. */
10178 if (TARGET_NEON
10179 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
10180 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
10181 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
10182 || VALID_NEON_STRUCT_MODE (mode)))
10183 return NO_REGS;
10184
10185 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
10186 return NO_REGS;
10187
10188 return GENERAL_REGS;
10189 }
10190
10191 /* Values which must be returned in the most-significant end of the return
10192 register. */
10193
10194 static bool
10195 arm_return_in_msb (const_tree valtype)
10196 {
10197 return (TARGET_AAPCS_BASED
10198 && BYTES_BIG_ENDIAN
10199 && (AGGREGATE_TYPE_P (valtype)
10200 || TREE_CODE (valtype) == COMPLEX_TYPE
10201 || FIXED_POINT_TYPE_P (valtype)));
10202 }
10203
10204 /* Return TRUE if X references a SYMBOL_REF. */
10205 int
10206 symbol_mentioned_p (rtx x)
10207 {
10208 const char * fmt;
10209 int i;
10210
10211 if (GET_CODE (x) == SYMBOL_REF)
10212 return 1;
10213
10214 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
10215 are constant offsets, not symbols. */
10216 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
10217 return 0;
10218
10219 fmt = GET_RTX_FORMAT (GET_CODE (x));
10220
10221 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10222 {
10223 if (fmt[i] == 'E')
10224 {
10225 int j;
10226
10227 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10228 if (symbol_mentioned_p (XVECEXP (x, i, j)))
10229 return 1;
10230 }
10231 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
10232 return 1;
10233 }
10234
10235 return 0;
10236 }
10237
10238 /* Return TRUE if X references a LABEL_REF. */
10239 int
10240 label_mentioned_p (rtx x)
10241 {
10242 const char * fmt;
10243 int i;
10244
10245 if (GET_CODE (x) == LABEL_REF)
10246 return 1;
10247
10248 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
10249 instruction, but they are constant offsets, not symbols. */
10250 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
10251 return 0;
10252
10253 fmt = GET_RTX_FORMAT (GET_CODE (x));
10254 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10255 {
10256 if (fmt[i] == 'E')
10257 {
10258 int j;
10259
10260 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10261 if (label_mentioned_p (XVECEXP (x, i, j)))
10262 return 1;
10263 }
10264 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
10265 return 1;
10266 }
10267
10268 return 0;
10269 }
10270
10271 int
10272 tls_mentioned_p (rtx x)
10273 {
10274 switch (GET_CODE (x))
10275 {
10276 case CONST:
10277 return tls_mentioned_p (XEXP (x, 0));
10278
10279 case UNSPEC:
10280 if (XINT (x, 1) == UNSPEC_TLS)
10281 return 1;
10282
10283 default:
10284 return 0;
10285 }
10286 }
10287
10288 /* Must not copy any rtx that uses a pc-relative address. */
10289
10290 static int
10291 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
10292 {
10293 if (GET_CODE (*x) == UNSPEC
10294 && (XINT (*x, 1) == UNSPEC_PIC_BASE
10295 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
10296 return 1;
10297 return 0;
10298 }
10299
10300 static bool
10301 arm_cannot_copy_insn_p (rtx insn)
10302 {
10303 /* The tls call insn cannot be copied, as it is paired with a data
10304 word. */
10305 if (recog_memoized (insn) == CODE_FOR_tlscall)
10306 return true;
10307
10308 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
10309 }
10310
10311 enum rtx_code
10312 minmax_code (rtx x)
10313 {
10314 enum rtx_code code = GET_CODE (x);
10315
10316 switch (code)
10317 {
10318 case SMAX:
10319 return GE;
10320 case SMIN:
10321 return LE;
10322 case UMIN:
10323 return LEU;
10324 case UMAX:
10325 return GEU;
10326 default:
10327 gcc_unreachable ();
10328 }
10329 }
10330
10331 /* Match pair of min/max operators that can be implemented via usat/ssat. */
10332
10333 bool
10334 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
10335 int *mask, bool *signed_sat)
10336 {
10337 /* The high bound must be a power of two minus one. */
10338 int log = exact_log2 (INTVAL (hi_bound) + 1);
10339 if (log == -1)
10340 return false;
10341
10342 /* The low bound is either zero (for usat) or one less than the
10343 negation of the high bound (for ssat). */
10344 if (INTVAL (lo_bound) == 0)
10345 {
10346 if (mask)
10347 *mask = log;
10348 if (signed_sat)
10349 *signed_sat = false;
10350
10351 return true;
10352 }
10353
10354 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
10355 {
10356 if (mask)
10357 *mask = log + 1;
10358 if (signed_sat)
10359 *signed_sat = true;
10360
10361 return true;
10362 }
10363
10364 return false;
10365 }
10366
10367 /* Return 1 if memory locations are adjacent. */
10368 int
10369 adjacent_mem_locations (rtx a, rtx b)
10370 {
10371 /* We don't guarantee to preserve the order of these memory refs. */
10372 if (volatile_refs_p (a) || volatile_refs_p (b))
10373 return 0;
10374
10375 if ((REG_P (XEXP (a, 0))
10376 || (GET_CODE (XEXP (a, 0)) == PLUS
10377 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
10378 && (REG_P (XEXP (b, 0))
10379 || (GET_CODE (XEXP (b, 0)) == PLUS
10380 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
10381 {
10382 HOST_WIDE_INT val0 = 0, val1 = 0;
10383 rtx reg0, reg1;
10384 int val_diff;
10385
10386 if (GET_CODE (XEXP (a, 0)) == PLUS)
10387 {
10388 reg0 = XEXP (XEXP (a, 0), 0);
10389 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
10390 }
10391 else
10392 reg0 = XEXP (a, 0);
10393
10394 if (GET_CODE (XEXP (b, 0)) == PLUS)
10395 {
10396 reg1 = XEXP (XEXP (b, 0), 0);
10397 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
10398 }
10399 else
10400 reg1 = XEXP (b, 0);
10401
10402 /* Don't accept any offset that will require multiple
10403 instructions to handle, since this would cause the
10404 arith_adjacentmem pattern to output an overlong sequence. */
10405 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
10406 return 0;
10407
10408 /* Don't allow an eliminable register: register elimination can make
10409 the offset too large. */
10410 if (arm_eliminable_register (reg0))
10411 return 0;
10412
10413 val_diff = val1 - val0;
10414
10415 if (arm_ld_sched)
10416 {
10417 /* If the target has load delay slots, then there's no benefit
10418 to using an ldm instruction unless the offset is zero and
10419 we are optimizing for size. */
10420 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
10421 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
10422 && (val_diff == 4 || val_diff == -4));
10423 }
10424
10425 return ((REGNO (reg0) == REGNO (reg1))
10426 && (val_diff == 4 || val_diff == -4));
10427 }
10428
10429 return 0;
10430 }
10431
10432 /* Return true if OP is a valid load or store multiple operation. LOAD is true
10433 for load operations, false for store operations. CONSECUTIVE is true
10434 if the register numbers in the operation must be consecutive in the register
10435 bank. RETURN_PC is true if value is to be loaded in PC.
10436 The pattern we are trying to match for load is:
10437 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
10438 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
10439 :
10440 :
10441 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
10442 ]
10443 where
10444 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
10445 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
10446 3. If consecutive is TRUE, then for kth register being loaded,
10447 REGNO (R_dk) = REGNO (R_d0) + k.
10448 The pattern for store is similar. */
10449 bool
10450 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
10451 bool consecutive, bool return_pc)
10452 {
10453 HOST_WIDE_INT count = XVECLEN (op, 0);
10454 rtx reg, mem, addr;
10455 unsigned regno;
10456 unsigned first_regno;
10457 HOST_WIDE_INT i = 1, base = 0, offset = 0;
10458 rtx elt;
10459 bool addr_reg_in_reglist = false;
10460 bool update = false;
10461 int reg_increment;
10462 int offset_adj;
10463 int regs_per_val;
10464
10465 /* If not in SImode, then registers must be consecutive
10466 (e.g., VLDM instructions for DFmode). */
10467 gcc_assert ((mode == SImode) || consecutive);
10468 /* Setting return_pc for stores is illegal. */
10469 gcc_assert (!return_pc || load);
10470
10471 /* Set up the increments and the regs per val based on the mode. */
10472 reg_increment = GET_MODE_SIZE (mode);
10473 regs_per_val = reg_increment / 4;
10474 offset_adj = return_pc ? 1 : 0;
10475
10476 if (count <= 1
10477 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
10478 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
10479 return false;
10480
10481 /* Check if this is a write-back. */
10482 elt = XVECEXP (op, 0, offset_adj);
10483 if (GET_CODE (SET_SRC (elt)) == PLUS)
10484 {
10485 i++;
10486 base = 1;
10487 update = true;
10488
10489 /* The offset adjustment must be the number of registers being
10490 popped times the size of a single register. */
10491 if (!REG_P (SET_DEST (elt))
10492 || !REG_P (XEXP (SET_SRC (elt), 0))
10493 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
10494 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
10495 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
10496 ((count - 1 - offset_adj) * reg_increment))
10497 return false;
10498 }
10499
10500 i = i + offset_adj;
10501 base = base + offset_adj;
10502 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
10503 success depends on the type: VLDM can do just one reg,
10504 LDM must do at least two. */
10505 if ((count <= i) && (mode == SImode))
10506 return false;
10507
10508 elt = XVECEXP (op, 0, i - 1);
10509 if (GET_CODE (elt) != SET)
10510 return false;
10511
10512 if (load)
10513 {
10514 reg = SET_DEST (elt);
10515 mem = SET_SRC (elt);
10516 }
10517 else
10518 {
10519 reg = SET_SRC (elt);
10520 mem = SET_DEST (elt);
10521 }
10522
10523 if (!REG_P (reg) || !MEM_P (mem))
10524 return false;
10525
10526 regno = REGNO (reg);
10527 first_regno = regno;
10528 addr = XEXP (mem, 0);
10529 if (GET_CODE (addr) == PLUS)
10530 {
10531 if (!CONST_INT_P (XEXP (addr, 1)))
10532 return false;
10533
10534 offset = INTVAL (XEXP (addr, 1));
10535 addr = XEXP (addr, 0);
10536 }
10537
10538 if (!REG_P (addr))
10539 return false;
10540
10541 /* Don't allow SP to be loaded unless it is also the base register. It
10542 guarantees that SP is reset correctly when an LDM instruction
10543 is interrupted. Otherwise, we might end up with a corrupt stack. */
10544 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10545 return false;
10546
10547 for (; i < count; i++)
10548 {
10549 elt = XVECEXP (op, 0, i);
10550 if (GET_CODE (elt) != SET)
10551 return false;
10552
10553 if (load)
10554 {
10555 reg = SET_DEST (elt);
10556 mem = SET_SRC (elt);
10557 }
10558 else
10559 {
10560 reg = SET_SRC (elt);
10561 mem = SET_DEST (elt);
10562 }
10563
10564 if (!REG_P (reg)
10565 || GET_MODE (reg) != mode
10566 || REGNO (reg) <= regno
10567 || (consecutive
10568 && (REGNO (reg) !=
10569 (unsigned int) (first_regno + regs_per_val * (i - base))))
10570 /* Don't allow SP to be loaded unless it is also the base register. It
10571 guarantees that SP is reset correctly when an LDM instruction
10572 is interrupted. Otherwise, we might end up with a corrupt stack. */
10573 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10574 || !MEM_P (mem)
10575 || GET_MODE (mem) != mode
10576 || ((GET_CODE (XEXP (mem, 0)) != PLUS
10577 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
10578 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
10579 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
10580 offset + (i - base) * reg_increment))
10581 && (!REG_P (XEXP (mem, 0))
10582 || offset + (i - base) * reg_increment != 0)))
10583 return false;
10584
10585 regno = REGNO (reg);
10586 if (regno == REGNO (addr))
10587 addr_reg_in_reglist = true;
10588 }
10589
10590 if (load)
10591 {
10592 if (update && addr_reg_in_reglist)
10593 return false;
10594
10595 /* For Thumb-1, address register is always modified - either by write-back
10596 or by explicit load. If the pattern does not describe an update,
10597 then the address register must be in the list of loaded registers. */
10598 if (TARGET_THUMB1)
10599 return update || addr_reg_in_reglist;
10600 }
10601
10602 return true;
10603 }
10604
10605 /* Return true iff it would be profitable to turn a sequence of NOPS loads
10606 or stores (depending on IS_STORE) into a load-multiple or store-multiple
10607 instruction. ADD_OFFSET is nonzero if the base address register needs
10608 to be modified with an add instruction before we can use it. */
10609
10610 static bool
10611 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
10612 int nops, HOST_WIDE_INT add_offset)
10613 {
10614 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
10615 if the offset isn't small enough. The reason 2 ldrs are faster
10616 is because these ARMs are able to do more than one cache access
10617 in a single cycle. The ARM9 and StrongARM have Harvard caches,
10618 whilst the ARM8 has a double bandwidth cache. This means that
10619 these cores can do both an instruction fetch and a data fetch in
10620 a single cycle, so the trick of calculating the address into a
10621 scratch register (one of the result regs) and then doing a load
10622 multiple actually becomes slower (and no smaller in code size).
10623 That is the transformation
10624
10625 ldr rd1, [rbase + offset]
10626 ldr rd2, [rbase + offset + 4]
10627
10628 to
10629
10630 add rd1, rbase, offset
10631 ldmia rd1, {rd1, rd2}
10632
10633 produces worse code -- '3 cycles + any stalls on rd2' instead of
10634 '2 cycles + any stalls on rd2'. On ARMs with only one cache
10635 access per cycle, the first sequence could never complete in less
10636 than 6 cycles, whereas the ldm sequence would only take 5 and
10637 would make better use of sequential accesses if not hitting the
10638 cache.
10639
10640 We cheat here and test 'arm_ld_sched' which we currently know to
10641 only be true for the ARM8, ARM9 and StrongARM. If this ever
10642 changes, then the test below needs to be reworked. */
10643 if (nops == 2 && arm_ld_sched && add_offset != 0)
10644 return false;
10645
10646 /* XScale has load-store double instructions, but they have stricter
10647 alignment requirements than load-store multiple, so we cannot
10648 use them.
10649
10650 For XScale ldm requires 2 + NREGS cycles to complete and blocks
10651 the pipeline until completion.
10652
10653 NREGS CYCLES
10654 1 3
10655 2 4
10656 3 5
10657 4 6
10658
10659 An ldr instruction takes 1-3 cycles, but does not block the
10660 pipeline.
10661
10662 NREGS CYCLES
10663 1 1-3
10664 2 2-6
10665 3 3-9
10666 4 4-12
10667
10668 Best case ldr will always win. However, the more ldr instructions
10669 we issue, the less likely we are to be able to schedule them well.
10670 Using ldr instructions also increases code size.
10671
10672 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10673 for counts of 3 or 4 regs. */
10674 if (nops <= 2 && arm_tune_xscale && !optimize_size)
10675 return false;
10676 return true;
10677 }
10678
10679 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
10680 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10681 an array ORDER which describes the sequence to use when accessing the
10682 offsets that produces an ascending order. In this sequence, each
10683 offset must be larger by exactly 4 than the previous one. ORDER[0]
10684 must have been filled in with the lowest offset by the caller.
10685 If UNSORTED_REGS is nonnull, it is an array of register numbers that
10686 we use to verify that ORDER produces an ascending order of registers.
10687 Return true if it was possible to construct such an order, false if
10688 not. */
10689
10690 static bool
10691 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
10692 int *unsorted_regs)
10693 {
10694 int i;
10695 for (i = 1; i < nops; i++)
10696 {
10697 int j;
10698
10699 order[i] = order[i - 1];
10700 for (j = 0; j < nops; j++)
10701 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
10702 {
10703 /* We must find exactly one offset that is higher than the
10704 previous one by 4. */
10705 if (order[i] != order[i - 1])
10706 return false;
10707 order[i] = j;
10708 }
10709 if (order[i] == order[i - 1])
10710 return false;
10711 /* The register numbers must be ascending. */
10712 if (unsorted_regs != NULL
10713 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
10714 return false;
10715 }
10716 return true;
10717 }
10718
10719 /* Used to determine in a peephole whether a sequence of load
10720 instructions can be changed into a load-multiple instruction.
10721 NOPS is the number of separate load instructions we are examining. The
10722 first NOPS entries in OPERANDS are the destination registers, the
10723 next NOPS entries are memory operands. If this function is
10724 successful, *BASE is set to the common base register of the memory
10725 accesses; *LOAD_OFFSET is set to the first memory location's offset
10726 from that base register.
10727 REGS is an array filled in with the destination register numbers.
10728 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10729 insn numbers to an ascending order of stores. If CHECK_REGS is true,
10730 the sequence of registers in REGS matches the loads from ascending memory
10731 locations, and the function verifies that the register numbers are
10732 themselves ascending. If CHECK_REGS is false, the register numbers
10733 are stored in the order they are found in the operands. */
10734 static int
10735 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
10736 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
10737 {
10738 int unsorted_regs[MAX_LDM_STM_OPS];
10739 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10740 int order[MAX_LDM_STM_OPS];
10741 rtx base_reg_rtx = NULL;
10742 int base_reg = -1;
10743 int i, ldm_case;
10744
10745 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10746 easily extended if required. */
10747 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10748
10749 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10750
10751 /* Loop over the operands and check that the memory references are
10752 suitable (i.e. immediate offsets from the same base register). At
10753 the same time, extract the target register, and the memory
10754 offsets. */
10755 for (i = 0; i < nops; i++)
10756 {
10757 rtx reg;
10758 rtx offset;
10759
10760 /* Convert a subreg of a mem into the mem itself. */
10761 if (GET_CODE (operands[nops + i]) == SUBREG)
10762 operands[nops + i] = alter_subreg (operands + (nops + i), true);
10763
10764 gcc_assert (MEM_P (operands[nops + i]));
10765
10766 /* Don't reorder volatile memory references; it doesn't seem worth
10767 looking for the case where the order is ok anyway. */
10768 if (MEM_VOLATILE_P (operands[nops + i]))
10769 return 0;
10770
10771 offset = const0_rtx;
10772
10773 if ((REG_P (reg = XEXP (operands[nops + i], 0))
10774 || (GET_CODE (reg) == SUBREG
10775 && REG_P (reg = SUBREG_REG (reg))))
10776 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10777 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
10778 || (GET_CODE (reg) == SUBREG
10779 && REG_P (reg = SUBREG_REG (reg))))
10780 && (CONST_INT_P (offset
10781 = XEXP (XEXP (operands[nops + i], 0), 1)))))
10782 {
10783 if (i == 0)
10784 {
10785 base_reg = REGNO (reg);
10786 base_reg_rtx = reg;
10787 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10788 return 0;
10789 }
10790 else if (base_reg != (int) REGNO (reg))
10791 /* Not addressed from the same base register. */
10792 return 0;
10793
10794 unsorted_regs[i] = (REG_P (operands[i])
10795 ? REGNO (operands[i])
10796 : REGNO (SUBREG_REG (operands[i])));
10797
10798 /* If it isn't an integer register, or if it overwrites the
10799 base register but isn't the last insn in the list, then
10800 we can't do this. */
10801 if (unsorted_regs[i] < 0
10802 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10803 || unsorted_regs[i] > 14
10804 || (i != nops - 1 && unsorted_regs[i] == base_reg))
10805 return 0;
10806
10807 /* Don't allow SP to be loaded unless it is also the base
10808 register. It guarantees that SP is reset correctly when
10809 an LDM instruction is interrupted. Otherwise, we might
10810 end up with a corrupt stack. */
10811 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
10812 return 0;
10813
10814 unsorted_offsets[i] = INTVAL (offset);
10815 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10816 order[0] = i;
10817 }
10818 else
10819 /* Not a suitable memory address. */
10820 return 0;
10821 }
10822
10823 /* All the useful information has now been extracted from the
10824 operands into unsorted_regs and unsorted_offsets; additionally,
10825 order[0] has been set to the lowest offset in the list. Sort
10826 the offsets into order, verifying that they are adjacent, and
10827 check that the register numbers are ascending. */
10828 if (!compute_offset_order (nops, unsorted_offsets, order,
10829 check_regs ? unsorted_regs : NULL))
10830 return 0;
10831
10832 if (saved_order)
10833 memcpy (saved_order, order, sizeof order);
10834
10835 if (base)
10836 {
10837 *base = base_reg;
10838
10839 for (i = 0; i < nops; i++)
10840 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10841
10842 *load_offset = unsorted_offsets[order[0]];
10843 }
10844
10845 if (TARGET_THUMB1
10846 && !peep2_reg_dead_p (nops, base_reg_rtx))
10847 return 0;
10848
10849 if (unsorted_offsets[order[0]] == 0)
10850 ldm_case = 1; /* ldmia */
10851 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10852 ldm_case = 2; /* ldmib */
10853 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10854 ldm_case = 3; /* ldmda */
10855 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10856 ldm_case = 4; /* ldmdb */
10857 else if (const_ok_for_arm (unsorted_offsets[order[0]])
10858 || const_ok_for_arm (-unsorted_offsets[order[0]]))
10859 ldm_case = 5;
10860 else
10861 return 0;
10862
10863 if (!multiple_operation_profitable_p (false, nops,
10864 ldm_case == 5
10865 ? unsorted_offsets[order[0]] : 0))
10866 return 0;
10867
10868 return ldm_case;
10869 }
10870
10871 /* Used to determine in a peephole whether a sequence of store instructions can
10872 be changed into a store-multiple instruction.
10873 NOPS is the number of separate store instructions we are examining.
10874 NOPS_TOTAL is the total number of instructions recognized by the peephole
10875 pattern.
10876 The first NOPS entries in OPERANDS are the source registers, the next
10877 NOPS entries are memory operands. If this function is successful, *BASE is
10878 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10879 to the first memory location's offset from that base register. REGS is an
10880 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10881 likewise filled with the corresponding rtx's.
10882 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10883 numbers to an ascending order of stores.
10884 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10885 from ascending memory locations, and the function verifies that the register
10886 numbers are themselves ascending. If CHECK_REGS is false, the register
10887 numbers are stored in the order they are found in the operands. */
10888 static int
10889 store_multiple_sequence (rtx *operands, int nops, int nops_total,
10890 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
10891 HOST_WIDE_INT *load_offset, bool check_regs)
10892 {
10893 int unsorted_regs[MAX_LDM_STM_OPS];
10894 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
10895 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10896 int order[MAX_LDM_STM_OPS];
10897 int base_reg = -1;
10898 rtx base_reg_rtx = NULL;
10899 int i, stm_case;
10900
10901 /* Write back of base register is currently only supported for Thumb 1. */
10902 int base_writeback = TARGET_THUMB1;
10903
10904 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10905 easily extended if required. */
10906 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10907
10908 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10909
10910 /* Loop over the operands and check that the memory references are
10911 suitable (i.e. immediate offsets from the same base register). At
10912 the same time, extract the target register, and the memory
10913 offsets. */
10914 for (i = 0; i < nops; i++)
10915 {
10916 rtx reg;
10917 rtx offset;
10918
10919 /* Convert a subreg of a mem into the mem itself. */
10920 if (GET_CODE (operands[nops + i]) == SUBREG)
10921 operands[nops + i] = alter_subreg (operands + (nops + i), true);
10922
10923 gcc_assert (MEM_P (operands[nops + i]));
10924
10925 /* Don't reorder volatile memory references; it doesn't seem worth
10926 looking for the case where the order is ok anyway. */
10927 if (MEM_VOLATILE_P (operands[nops + i]))
10928 return 0;
10929
10930 offset = const0_rtx;
10931
10932 if ((REG_P (reg = XEXP (operands[nops + i], 0))
10933 || (GET_CODE (reg) == SUBREG
10934 && REG_P (reg = SUBREG_REG (reg))))
10935 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10936 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
10937 || (GET_CODE (reg) == SUBREG
10938 && REG_P (reg = SUBREG_REG (reg))))
10939 && (CONST_INT_P (offset
10940 = XEXP (XEXP (operands[nops + i], 0), 1)))))
10941 {
10942 unsorted_reg_rtxs[i] = (REG_P (operands[i])
10943 ? operands[i] : SUBREG_REG (operands[i]));
10944 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
10945
10946 if (i == 0)
10947 {
10948 base_reg = REGNO (reg);
10949 base_reg_rtx = reg;
10950 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10951 return 0;
10952 }
10953 else if (base_reg != (int) REGNO (reg))
10954 /* Not addressed from the same base register. */
10955 return 0;
10956
10957 /* If it isn't an integer register, then we can't do this. */
10958 if (unsorted_regs[i] < 0
10959 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10960 /* The effects are unpredictable if the base register is
10961 both updated and stored. */
10962 || (base_writeback && unsorted_regs[i] == base_reg)
10963 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
10964 || unsorted_regs[i] > 14)
10965 return 0;
10966
10967 unsorted_offsets[i] = INTVAL (offset);
10968 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10969 order[0] = i;
10970 }
10971 else
10972 /* Not a suitable memory address. */
10973 return 0;
10974 }
10975
10976 /* All the useful information has now been extracted from the
10977 operands into unsorted_regs and unsorted_offsets; additionally,
10978 order[0] has been set to the lowest offset in the list. Sort
10979 the offsets into order, verifying that they are adjacent, and
10980 check that the register numbers are ascending. */
10981 if (!compute_offset_order (nops, unsorted_offsets, order,
10982 check_regs ? unsorted_regs : NULL))
10983 return 0;
10984
10985 if (saved_order)
10986 memcpy (saved_order, order, sizeof order);
10987
10988 if (base)
10989 {
10990 *base = base_reg;
10991
10992 for (i = 0; i < nops; i++)
10993 {
10994 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10995 if (reg_rtxs)
10996 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
10997 }
10998
10999 *load_offset = unsorted_offsets[order[0]];
11000 }
11001
11002 if (TARGET_THUMB1
11003 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
11004 return 0;
11005
11006 if (unsorted_offsets[order[0]] == 0)
11007 stm_case = 1; /* stmia */
11008 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
11009 stm_case = 2; /* stmib */
11010 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
11011 stm_case = 3; /* stmda */
11012 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
11013 stm_case = 4; /* stmdb */
11014 else
11015 return 0;
11016
11017 if (!multiple_operation_profitable_p (false, nops, 0))
11018 return 0;
11019
11020 return stm_case;
11021 }
11022 \f
11023 /* Routines for use in generating RTL. */
11024
11025 /* Generate a load-multiple instruction. COUNT is the number of loads in
11026 the instruction; REGS and MEMS are arrays containing the operands.
11027 BASEREG is the base register to be used in addressing the memory operands.
11028 WBACK_OFFSET is nonzero if the instruction should update the base
11029 register. */
11030
11031 static rtx
11032 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
11033 HOST_WIDE_INT wback_offset)
11034 {
11035 int i = 0, j;
11036 rtx result;
11037
11038 if (!multiple_operation_profitable_p (false, count, 0))
11039 {
11040 rtx seq;
11041
11042 start_sequence ();
11043
11044 for (i = 0; i < count; i++)
11045 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
11046
11047 if (wback_offset != 0)
11048 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
11049
11050 seq = get_insns ();
11051 end_sequence ();
11052
11053 return seq;
11054 }
11055
11056 result = gen_rtx_PARALLEL (VOIDmode,
11057 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
11058 if (wback_offset != 0)
11059 {
11060 XVECEXP (result, 0, 0)
11061 = gen_rtx_SET (VOIDmode, basereg,
11062 plus_constant (Pmode, basereg, wback_offset));
11063 i = 1;
11064 count++;
11065 }
11066
11067 for (j = 0; i < count; i++, j++)
11068 XVECEXP (result, 0, i)
11069 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
11070
11071 return result;
11072 }
11073
11074 /* Generate a store-multiple instruction. COUNT is the number of stores in
11075 the instruction; REGS and MEMS are arrays containing the operands.
11076 BASEREG is the base register to be used in addressing the memory operands.
11077 WBACK_OFFSET is nonzero if the instruction should update the base
11078 register. */
11079
11080 static rtx
11081 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
11082 HOST_WIDE_INT wback_offset)
11083 {
11084 int i = 0, j;
11085 rtx result;
11086
11087 if (GET_CODE (basereg) == PLUS)
11088 basereg = XEXP (basereg, 0);
11089
11090 if (!multiple_operation_profitable_p (false, count, 0))
11091 {
11092 rtx seq;
11093
11094 start_sequence ();
11095
11096 for (i = 0; i < count; i++)
11097 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
11098
11099 if (wback_offset != 0)
11100 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
11101
11102 seq = get_insns ();
11103 end_sequence ();
11104
11105 return seq;
11106 }
11107
11108 result = gen_rtx_PARALLEL (VOIDmode,
11109 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
11110 if (wback_offset != 0)
11111 {
11112 XVECEXP (result, 0, 0)
11113 = gen_rtx_SET (VOIDmode, basereg,
11114 plus_constant (Pmode, basereg, wback_offset));
11115 i = 1;
11116 count++;
11117 }
11118
11119 for (j = 0; i < count; i++, j++)
11120 XVECEXP (result, 0, i)
11121 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
11122
11123 return result;
11124 }
11125
11126 /* Generate either a load-multiple or a store-multiple instruction. This
11127 function can be used in situations where we can start with a single MEM
11128 rtx and adjust its address upwards.
11129 COUNT is the number of operations in the instruction, not counting a
11130 possible update of the base register. REGS is an array containing the
11131 register operands.
11132 BASEREG is the base register to be used in addressing the memory operands,
11133 which are constructed from BASEMEM.
11134 WRITE_BACK specifies whether the generated instruction should include an
11135 update of the base register.
11136 OFFSETP is used to pass an offset to and from this function; this offset
11137 is not used when constructing the address (instead BASEMEM should have an
11138 appropriate offset in its address), it is used only for setting
11139 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
11140
11141 static rtx
11142 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
11143 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
11144 {
11145 rtx mems[MAX_LDM_STM_OPS];
11146 HOST_WIDE_INT offset = *offsetp;
11147 int i;
11148
11149 gcc_assert (count <= MAX_LDM_STM_OPS);
11150
11151 if (GET_CODE (basereg) == PLUS)
11152 basereg = XEXP (basereg, 0);
11153
11154 for (i = 0; i < count; i++)
11155 {
11156 rtx addr = plus_constant (Pmode, basereg, i * 4);
11157 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
11158 offset += 4;
11159 }
11160
11161 if (write_back)
11162 *offsetp = offset;
11163
11164 if (is_load)
11165 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
11166 write_back ? 4 * count : 0);
11167 else
11168 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
11169 write_back ? 4 * count : 0);
11170 }
11171
11172 rtx
11173 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
11174 rtx basemem, HOST_WIDE_INT *offsetp)
11175 {
11176 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
11177 offsetp);
11178 }
11179
11180 rtx
11181 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
11182 rtx basemem, HOST_WIDE_INT *offsetp)
11183 {
11184 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
11185 offsetp);
11186 }
11187
11188 /* Called from a peephole2 expander to turn a sequence of loads into an
11189 LDM instruction. OPERANDS are the operands found by the peephole matcher;
11190 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
11191 is true if we can reorder the registers because they are used commutatively
11192 subsequently.
11193 Returns true iff we could generate a new instruction. */
11194
11195 bool
11196 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
11197 {
11198 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
11199 rtx mems[MAX_LDM_STM_OPS];
11200 int i, j, base_reg;
11201 rtx base_reg_rtx;
11202 HOST_WIDE_INT offset;
11203 int write_back = FALSE;
11204 int ldm_case;
11205 rtx addr;
11206
11207 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
11208 &base_reg, &offset, !sort_regs);
11209
11210 if (ldm_case == 0)
11211 return false;
11212
11213 if (sort_regs)
11214 for (i = 0; i < nops - 1; i++)
11215 for (j = i + 1; j < nops; j++)
11216 if (regs[i] > regs[j])
11217 {
11218 int t = regs[i];
11219 regs[i] = regs[j];
11220 regs[j] = t;
11221 }
11222 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
11223
11224 if (TARGET_THUMB1)
11225 {
11226 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
11227 gcc_assert (ldm_case == 1 || ldm_case == 5);
11228 write_back = TRUE;
11229 }
11230
11231 if (ldm_case == 5)
11232 {
11233 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
11234 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
11235 offset = 0;
11236 if (!TARGET_THUMB1)
11237 {
11238 base_reg = regs[0];
11239 base_reg_rtx = newbase;
11240 }
11241 }
11242
11243 for (i = 0; i < nops; i++)
11244 {
11245 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
11246 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
11247 SImode, addr, 0);
11248 }
11249 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
11250 write_back ? offset + i * 4 : 0));
11251 return true;
11252 }
11253
11254 /* Called from a peephole2 expander to turn a sequence of stores into an
11255 STM instruction. OPERANDS are the operands found by the peephole matcher;
11256 NOPS indicates how many separate stores we are trying to combine.
11257 Returns true iff we could generate a new instruction. */
11258
11259 bool
11260 gen_stm_seq (rtx *operands, int nops)
11261 {
11262 int i;
11263 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
11264 rtx mems[MAX_LDM_STM_OPS];
11265 int base_reg;
11266 rtx base_reg_rtx;
11267 HOST_WIDE_INT offset;
11268 int write_back = FALSE;
11269 int stm_case;
11270 rtx addr;
11271 bool base_reg_dies;
11272
11273 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
11274 mem_order, &base_reg, &offset, true);
11275
11276 if (stm_case == 0)
11277 return false;
11278
11279 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
11280
11281 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
11282 if (TARGET_THUMB1)
11283 {
11284 gcc_assert (base_reg_dies);
11285 write_back = TRUE;
11286 }
11287
11288 if (stm_case == 5)
11289 {
11290 gcc_assert (base_reg_dies);
11291 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
11292 offset = 0;
11293 }
11294
11295 addr = plus_constant (Pmode, base_reg_rtx, offset);
11296
11297 for (i = 0; i < nops; i++)
11298 {
11299 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
11300 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
11301 SImode, addr, 0);
11302 }
11303 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
11304 write_back ? offset + i * 4 : 0));
11305 return true;
11306 }
11307
11308 /* Called from a peephole2 expander to turn a sequence of stores that are
11309 preceded by constant loads into an STM instruction. OPERANDS are the
11310 operands found by the peephole matcher; NOPS indicates how many
11311 separate stores we are trying to combine; there are 2 * NOPS
11312 instructions in the peephole.
11313 Returns true iff we could generate a new instruction. */
11314
11315 bool
11316 gen_const_stm_seq (rtx *operands, int nops)
11317 {
11318 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
11319 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
11320 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
11321 rtx mems[MAX_LDM_STM_OPS];
11322 int base_reg;
11323 rtx base_reg_rtx;
11324 HOST_WIDE_INT offset;
11325 int write_back = FALSE;
11326 int stm_case;
11327 rtx addr;
11328 bool base_reg_dies;
11329 int i, j;
11330 HARD_REG_SET allocated;
11331
11332 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
11333 mem_order, &base_reg, &offset, false);
11334
11335 if (stm_case == 0)
11336 return false;
11337
11338 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
11339
11340 /* If the same register is used more than once, try to find a free
11341 register. */
11342 CLEAR_HARD_REG_SET (allocated);
11343 for (i = 0; i < nops; i++)
11344 {
11345 for (j = i + 1; j < nops; j++)
11346 if (regs[i] == regs[j])
11347 {
11348 rtx t = peep2_find_free_register (0, nops * 2,
11349 TARGET_THUMB1 ? "l" : "r",
11350 SImode, &allocated);
11351 if (t == NULL_RTX)
11352 return false;
11353 reg_rtxs[i] = t;
11354 regs[i] = REGNO (t);
11355 }
11356 }
11357
11358 /* Compute an ordering that maps the register numbers to an ascending
11359 sequence. */
11360 reg_order[0] = 0;
11361 for (i = 0; i < nops; i++)
11362 if (regs[i] < regs[reg_order[0]])
11363 reg_order[0] = i;
11364
11365 for (i = 1; i < nops; i++)
11366 {
11367 int this_order = reg_order[i - 1];
11368 for (j = 0; j < nops; j++)
11369 if (regs[j] > regs[reg_order[i - 1]]
11370 && (this_order == reg_order[i - 1]
11371 || regs[j] < regs[this_order]))
11372 this_order = j;
11373 reg_order[i] = this_order;
11374 }
11375
11376 /* Ensure that registers that must be live after the instruction end
11377 up with the correct value. */
11378 for (i = 0; i < nops; i++)
11379 {
11380 int this_order = reg_order[i];
11381 if ((this_order != mem_order[i]
11382 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
11383 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
11384 return false;
11385 }
11386
11387 /* Load the constants. */
11388 for (i = 0; i < nops; i++)
11389 {
11390 rtx op = operands[2 * nops + mem_order[i]];
11391 sorted_regs[i] = regs[reg_order[i]];
11392 emit_move_insn (reg_rtxs[reg_order[i]], op);
11393 }
11394
11395 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
11396
11397 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
11398 if (TARGET_THUMB1)
11399 {
11400 gcc_assert (base_reg_dies);
11401 write_back = TRUE;
11402 }
11403
11404 if (stm_case == 5)
11405 {
11406 gcc_assert (base_reg_dies);
11407 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
11408 offset = 0;
11409 }
11410
11411 addr = plus_constant (Pmode, base_reg_rtx, offset);
11412
11413 for (i = 0; i < nops; i++)
11414 {
11415 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
11416 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
11417 SImode, addr, 0);
11418 }
11419 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
11420 write_back ? offset + i * 4 : 0));
11421 return true;
11422 }
11423
11424 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
11425 unaligned copies on processors which support unaligned semantics for those
11426 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
11427 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
11428 An interleave factor of 1 (the minimum) will perform no interleaving.
11429 Load/store multiple are used for aligned addresses where possible. */
11430
11431 static void
11432 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
11433 HOST_WIDE_INT length,
11434 unsigned int interleave_factor)
11435 {
11436 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
11437 int *regnos = XALLOCAVEC (int, interleave_factor);
11438 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
11439 HOST_WIDE_INT i, j;
11440 HOST_WIDE_INT remaining = length, words;
11441 rtx halfword_tmp = NULL, byte_tmp = NULL;
11442 rtx dst, src;
11443 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
11444 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
11445 HOST_WIDE_INT srcoffset, dstoffset;
11446 HOST_WIDE_INT src_autoinc, dst_autoinc;
11447 rtx mem, addr;
11448
11449 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
11450
11451 /* Use hard registers if we have aligned source or destination so we can use
11452 load/store multiple with contiguous registers. */
11453 if (dst_aligned || src_aligned)
11454 for (i = 0; i < interleave_factor; i++)
11455 regs[i] = gen_rtx_REG (SImode, i);
11456 else
11457 for (i = 0; i < interleave_factor; i++)
11458 regs[i] = gen_reg_rtx (SImode);
11459
11460 dst = copy_addr_to_reg (XEXP (dstbase, 0));
11461 src = copy_addr_to_reg (XEXP (srcbase, 0));
11462
11463 srcoffset = dstoffset = 0;
11464
11465 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
11466 For copying the last bytes we want to subtract this offset again. */
11467 src_autoinc = dst_autoinc = 0;
11468
11469 for (i = 0; i < interleave_factor; i++)
11470 regnos[i] = i;
11471
11472 /* Copy BLOCK_SIZE_BYTES chunks. */
11473
11474 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
11475 {
11476 /* Load words. */
11477 if (src_aligned && interleave_factor > 1)
11478 {
11479 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
11480 TRUE, srcbase, &srcoffset));
11481 src_autoinc += UNITS_PER_WORD * interleave_factor;
11482 }
11483 else
11484 {
11485 for (j = 0; j < interleave_factor; j++)
11486 {
11487 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
11488 - src_autoinc));
11489 mem = adjust_automodify_address (srcbase, SImode, addr,
11490 srcoffset + j * UNITS_PER_WORD);
11491 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11492 }
11493 srcoffset += block_size_bytes;
11494 }
11495
11496 /* Store words. */
11497 if (dst_aligned && interleave_factor > 1)
11498 {
11499 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
11500 TRUE, dstbase, &dstoffset));
11501 dst_autoinc += UNITS_PER_WORD * interleave_factor;
11502 }
11503 else
11504 {
11505 for (j = 0; j < interleave_factor; j++)
11506 {
11507 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
11508 - dst_autoinc));
11509 mem = adjust_automodify_address (dstbase, SImode, addr,
11510 dstoffset + j * UNITS_PER_WORD);
11511 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11512 }
11513 dstoffset += block_size_bytes;
11514 }
11515
11516 remaining -= block_size_bytes;
11517 }
11518
11519 /* Copy any whole words left (note these aren't interleaved with any
11520 subsequent halfword/byte load/stores in the interests of simplicity). */
11521
11522 words = remaining / UNITS_PER_WORD;
11523
11524 gcc_assert (words < interleave_factor);
11525
11526 if (src_aligned && words > 1)
11527 {
11528 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
11529 &srcoffset));
11530 src_autoinc += UNITS_PER_WORD * words;
11531 }
11532 else
11533 {
11534 for (j = 0; j < words; j++)
11535 {
11536 addr = plus_constant (Pmode, src,
11537 srcoffset + j * UNITS_PER_WORD - src_autoinc);
11538 mem = adjust_automodify_address (srcbase, SImode, addr,
11539 srcoffset + j * UNITS_PER_WORD);
11540 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11541 }
11542 srcoffset += words * UNITS_PER_WORD;
11543 }
11544
11545 if (dst_aligned && words > 1)
11546 {
11547 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
11548 &dstoffset));
11549 dst_autoinc += words * UNITS_PER_WORD;
11550 }
11551 else
11552 {
11553 for (j = 0; j < words; j++)
11554 {
11555 addr = plus_constant (Pmode, dst,
11556 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
11557 mem = adjust_automodify_address (dstbase, SImode, addr,
11558 dstoffset + j * UNITS_PER_WORD);
11559 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11560 }
11561 dstoffset += words * UNITS_PER_WORD;
11562 }
11563
11564 remaining -= words * UNITS_PER_WORD;
11565
11566 gcc_assert (remaining < 4);
11567
11568 /* Copy a halfword if necessary. */
11569
11570 if (remaining >= 2)
11571 {
11572 halfword_tmp = gen_reg_rtx (SImode);
11573
11574 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11575 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
11576 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
11577
11578 /* Either write out immediately, or delay until we've loaded the last
11579 byte, depending on interleave factor. */
11580 if (interleave_factor == 1)
11581 {
11582 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11583 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11584 emit_insn (gen_unaligned_storehi (mem,
11585 gen_lowpart (HImode, halfword_tmp)));
11586 halfword_tmp = NULL;
11587 dstoffset += 2;
11588 }
11589
11590 remaining -= 2;
11591 srcoffset += 2;
11592 }
11593
11594 gcc_assert (remaining < 2);
11595
11596 /* Copy last byte. */
11597
11598 if ((remaining & 1) != 0)
11599 {
11600 byte_tmp = gen_reg_rtx (SImode);
11601
11602 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11603 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
11604 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
11605
11606 if (interleave_factor == 1)
11607 {
11608 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11609 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11610 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11611 byte_tmp = NULL;
11612 dstoffset++;
11613 }
11614
11615 remaining--;
11616 srcoffset++;
11617 }
11618
11619 /* Store last halfword if we haven't done so already. */
11620
11621 if (halfword_tmp)
11622 {
11623 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11624 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11625 emit_insn (gen_unaligned_storehi (mem,
11626 gen_lowpart (HImode, halfword_tmp)));
11627 dstoffset += 2;
11628 }
11629
11630 /* Likewise for last byte. */
11631
11632 if (byte_tmp)
11633 {
11634 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11635 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11636 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11637 dstoffset++;
11638 }
11639
11640 gcc_assert (remaining == 0 && srcoffset == dstoffset);
11641 }
11642
11643 /* From mips_adjust_block_mem:
11644
11645 Helper function for doing a loop-based block operation on memory
11646 reference MEM. Each iteration of the loop will operate on LENGTH
11647 bytes of MEM.
11648
11649 Create a new base register for use within the loop and point it to
11650 the start of MEM. Create a new memory reference that uses this
11651 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
11652
11653 static void
11654 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
11655 rtx *loop_mem)
11656 {
11657 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
11658
11659 /* Although the new mem does not refer to a known location,
11660 it does keep up to LENGTH bytes of alignment. */
11661 *loop_mem = change_address (mem, BLKmode, *loop_reg);
11662 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
11663 }
11664
11665 /* From mips_block_move_loop:
11666
11667 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
11668 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
11669 the memory regions do not overlap. */
11670
11671 static void
11672 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
11673 unsigned int interleave_factor,
11674 HOST_WIDE_INT bytes_per_iter)
11675 {
11676 rtx label, src_reg, dest_reg, final_src, test;
11677 HOST_WIDE_INT leftover;
11678
11679 leftover = length % bytes_per_iter;
11680 length -= leftover;
11681
11682 /* Create registers and memory references for use within the loop. */
11683 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
11684 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
11685
11686 /* Calculate the value that SRC_REG should have after the last iteration of
11687 the loop. */
11688 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
11689 0, 0, OPTAB_WIDEN);
11690
11691 /* Emit the start of the loop. */
11692 label = gen_label_rtx ();
11693 emit_label (label);
11694
11695 /* Emit the loop body. */
11696 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
11697 interleave_factor);
11698
11699 /* Move on to the next block. */
11700 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
11701 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
11702
11703 /* Emit the loop condition. */
11704 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
11705 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
11706
11707 /* Mop up any left-over bytes. */
11708 if (leftover)
11709 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
11710 }
11711
11712 /* Emit a block move when either the source or destination is unaligned (not
11713 aligned to a four-byte boundary). This may need further tuning depending on
11714 core type, optimize_size setting, etc. */
11715
11716 static int
11717 arm_movmemqi_unaligned (rtx *operands)
11718 {
11719 HOST_WIDE_INT length = INTVAL (operands[2]);
11720
11721 if (optimize_size)
11722 {
11723 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
11724 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
11725 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
11726 size of code if optimizing for size. We'll use ldm/stm if src_aligned
11727 or dst_aligned though: allow more interleaving in those cases since the
11728 resulting code can be smaller. */
11729 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
11730 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
11731
11732 if (length > 12)
11733 arm_block_move_unaligned_loop (operands[0], operands[1], length,
11734 interleave_factor, bytes_per_iter);
11735 else
11736 arm_block_move_unaligned_straight (operands[0], operands[1], length,
11737 interleave_factor);
11738 }
11739 else
11740 {
11741 /* Note that the loop created by arm_block_move_unaligned_loop may be
11742 subject to loop unrolling, which makes tuning this condition a little
11743 redundant. */
11744 if (length > 32)
11745 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
11746 else
11747 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
11748 }
11749
11750 return 1;
11751 }
11752
11753 int
11754 arm_gen_movmemqi (rtx *operands)
11755 {
11756 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
11757 HOST_WIDE_INT srcoffset, dstoffset;
11758 int i;
11759 rtx src, dst, srcbase, dstbase;
11760 rtx part_bytes_reg = NULL;
11761 rtx mem;
11762
11763 if (!CONST_INT_P (operands[2])
11764 || !CONST_INT_P (operands[3])
11765 || INTVAL (operands[2]) > 64)
11766 return 0;
11767
11768 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
11769 return arm_movmemqi_unaligned (operands);
11770
11771 if (INTVAL (operands[3]) & 3)
11772 return 0;
11773
11774 dstbase = operands[0];
11775 srcbase = operands[1];
11776
11777 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
11778 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
11779
11780 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
11781 out_words_to_go = INTVAL (operands[2]) / 4;
11782 last_bytes = INTVAL (operands[2]) & 3;
11783 dstoffset = srcoffset = 0;
11784
11785 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
11786 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
11787
11788 for (i = 0; in_words_to_go >= 2; i+=4)
11789 {
11790 if (in_words_to_go > 4)
11791 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
11792 TRUE, srcbase, &srcoffset));
11793 else
11794 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
11795 src, FALSE, srcbase,
11796 &srcoffset));
11797
11798 if (out_words_to_go)
11799 {
11800 if (out_words_to_go > 4)
11801 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
11802 TRUE, dstbase, &dstoffset));
11803 else if (out_words_to_go != 1)
11804 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
11805 out_words_to_go, dst,
11806 (last_bytes == 0
11807 ? FALSE : TRUE),
11808 dstbase, &dstoffset));
11809 else
11810 {
11811 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11812 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
11813 if (last_bytes != 0)
11814 {
11815 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
11816 dstoffset += 4;
11817 }
11818 }
11819 }
11820
11821 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
11822 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
11823 }
11824
11825 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
11826 if (out_words_to_go)
11827 {
11828 rtx sreg;
11829
11830 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11831 sreg = copy_to_reg (mem);
11832
11833 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11834 emit_move_insn (mem, sreg);
11835 in_words_to_go--;
11836
11837 gcc_assert (!in_words_to_go); /* Sanity check */
11838 }
11839
11840 if (in_words_to_go)
11841 {
11842 gcc_assert (in_words_to_go > 0);
11843
11844 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11845 part_bytes_reg = copy_to_mode_reg (SImode, mem);
11846 }
11847
11848 gcc_assert (!last_bytes || part_bytes_reg);
11849
11850 if (BYTES_BIG_ENDIAN && last_bytes)
11851 {
11852 rtx tmp = gen_reg_rtx (SImode);
11853
11854 /* The bytes we want are in the top end of the word. */
11855 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
11856 GEN_INT (8 * (4 - last_bytes))));
11857 part_bytes_reg = tmp;
11858
11859 while (last_bytes)
11860 {
11861 mem = adjust_automodify_address (dstbase, QImode,
11862 plus_constant (Pmode, dst,
11863 last_bytes - 1),
11864 dstoffset + last_bytes - 1);
11865 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11866
11867 if (--last_bytes)
11868 {
11869 tmp = gen_reg_rtx (SImode);
11870 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
11871 part_bytes_reg = tmp;
11872 }
11873 }
11874
11875 }
11876 else
11877 {
11878 if (last_bytes > 1)
11879 {
11880 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
11881 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
11882 last_bytes -= 2;
11883 if (last_bytes)
11884 {
11885 rtx tmp = gen_reg_rtx (SImode);
11886 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
11887 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
11888 part_bytes_reg = tmp;
11889 dstoffset += 2;
11890 }
11891 }
11892
11893 if (last_bytes)
11894 {
11895 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
11896 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11897 }
11898 }
11899
11900 return 1;
11901 }
11902
11903 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
11904 by mode size. */
11905 inline static rtx
11906 next_consecutive_mem (rtx mem)
11907 {
11908 enum machine_mode mode = GET_MODE (mem);
11909 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
11910 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
11911
11912 return adjust_automodify_address (mem, mode, addr, offset);
11913 }
11914
11915 /* Copy using LDRD/STRD instructions whenever possible.
11916 Returns true upon success. */
11917 bool
11918 gen_movmem_ldrd_strd (rtx *operands)
11919 {
11920 unsigned HOST_WIDE_INT len;
11921 HOST_WIDE_INT align;
11922 rtx src, dst, base;
11923 rtx reg0;
11924 bool src_aligned, dst_aligned;
11925 bool src_volatile, dst_volatile;
11926
11927 gcc_assert (CONST_INT_P (operands[2]));
11928 gcc_assert (CONST_INT_P (operands[3]));
11929
11930 len = UINTVAL (operands[2]);
11931 if (len > 64)
11932 return false;
11933
11934 /* Maximum alignment we can assume for both src and dst buffers. */
11935 align = INTVAL (operands[3]);
11936
11937 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
11938 return false;
11939
11940 /* Place src and dst addresses in registers
11941 and update the corresponding mem rtx. */
11942 dst = operands[0];
11943 dst_volatile = MEM_VOLATILE_P (dst);
11944 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
11945 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
11946 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
11947
11948 src = operands[1];
11949 src_volatile = MEM_VOLATILE_P (src);
11950 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
11951 base = copy_to_mode_reg (SImode, XEXP (src, 0));
11952 src = adjust_automodify_address (src, VOIDmode, base, 0);
11953
11954 if (!unaligned_access && !(src_aligned && dst_aligned))
11955 return false;
11956
11957 if (src_volatile || dst_volatile)
11958 return false;
11959
11960 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
11961 if (!(dst_aligned || src_aligned))
11962 return arm_gen_movmemqi (operands);
11963
11964 src = adjust_address (src, DImode, 0);
11965 dst = adjust_address (dst, DImode, 0);
11966 while (len >= 8)
11967 {
11968 len -= 8;
11969 reg0 = gen_reg_rtx (DImode);
11970 if (src_aligned)
11971 emit_move_insn (reg0, src);
11972 else
11973 emit_insn (gen_unaligned_loaddi (reg0, src));
11974
11975 if (dst_aligned)
11976 emit_move_insn (dst, reg0);
11977 else
11978 emit_insn (gen_unaligned_storedi (dst, reg0));
11979
11980 src = next_consecutive_mem (src);
11981 dst = next_consecutive_mem (dst);
11982 }
11983
11984 gcc_assert (len < 8);
11985 if (len >= 4)
11986 {
11987 /* More than a word but less than a double-word to copy. Copy a word. */
11988 reg0 = gen_reg_rtx (SImode);
11989 src = adjust_address (src, SImode, 0);
11990 dst = adjust_address (dst, SImode, 0);
11991 if (src_aligned)
11992 emit_move_insn (reg0, src);
11993 else
11994 emit_insn (gen_unaligned_loadsi (reg0, src));
11995
11996 if (dst_aligned)
11997 emit_move_insn (dst, reg0);
11998 else
11999 emit_insn (gen_unaligned_storesi (dst, reg0));
12000
12001 src = next_consecutive_mem (src);
12002 dst = next_consecutive_mem (dst);
12003 len -= 4;
12004 }
12005
12006 if (len == 0)
12007 return true;
12008
12009 /* Copy the remaining bytes. */
12010 if (len >= 2)
12011 {
12012 dst = adjust_address (dst, HImode, 0);
12013 src = adjust_address (src, HImode, 0);
12014 reg0 = gen_reg_rtx (SImode);
12015 emit_insn (gen_unaligned_loadhiu (reg0, src));
12016 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
12017 src = next_consecutive_mem (src);
12018 dst = next_consecutive_mem (dst);
12019 if (len == 2)
12020 return true;
12021 }
12022
12023 dst = adjust_address (dst, QImode, 0);
12024 src = adjust_address (src, QImode, 0);
12025 reg0 = gen_reg_rtx (QImode);
12026 emit_move_insn (reg0, src);
12027 emit_move_insn (dst, reg0);
12028 return true;
12029 }
12030
12031 /* Select a dominance comparison mode if possible for a test of the general
12032 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
12033 COND_OR == DOM_CC_X_AND_Y => (X && Y)
12034 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
12035 COND_OR == DOM_CC_X_OR_Y => (X || Y)
12036 In all cases OP will be either EQ or NE, but we don't need to know which
12037 here. If we are unable to support a dominance comparison we return
12038 CC mode. This will then fail to match for the RTL expressions that
12039 generate this call. */
12040 enum machine_mode
12041 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
12042 {
12043 enum rtx_code cond1, cond2;
12044 int swapped = 0;
12045
12046 /* Currently we will probably get the wrong result if the individual
12047 comparisons are not simple. This also ensures that it is safe to
12048 reverse a comparison if necessary. */
12049 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
12050 != CCmode)
12051 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
12052 != CCmode))
12053 return CCmode;
12054
12055 /* The if_then_else variant of this tests the second condition if the
12056 first passes, but is true if the first fails. Reverse the first
12057 condition to get a true "inclusive-or" expression. */
12058 if (cond_or == DOM_CC_NX_OR_Y)
12059 cond1 = reverse_condition (cond1);
12060
12061 /* If the comparisons are not equal, and one doesn't dominate the other,
12062 then we can't do this. */
12063 if (cond1 != cond2
12064 && !comparison_dominates_p (cond1, cond2)
12065 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
12066 return CCmode;
12067
12068 if (swapped)
12069 {
12070 enum rtx_code temp = cond1;
12071 cond1 = cond2;
12072 cond2 = temp;
12073 }
12074
12075 switch (cond1)
12076 {
12077 case EQ:
12078 if (cond_or == DOM_CC_X_AND_Y)
12079 return CC_DEQmode;
12080
12081 switch (cond2)
12082 {
12083 case EQ: return CC_DEQmode;
12084 case LE: return CC_DLEmode;
12085 case LEU: return CC_DLEUmode;
12086 case GE: return CC_DGEmode;
12087 case GEU: return CC_DGEUmode;
12088 default: gcc_unreachable ();
12089 }
12090
12091 case LT:
12092 if (cond_or == DOM_CC_X_AND_Y)
12093 return CC_DLTmode;
12094
12095 switch (cond2)
12096 {
12097 case LT:
12098 return CC_DLTmode;
12099 case LE:
12100 return CC_DLEmode;
12101 case NE:
12102 return CC_DNEmode;
12103 default:
12104 gcc_unreachable ();
12105 }
12106
12107 case GT:
12108 if (cond_or == DOM_CC_X_AND_Y)
12109 return CC_DGTmode;
12110
12111 switch (cond2)
12112 {
12113 case GT:
12114 return CC_DGTmode;
12115 case GE:
12116 return CC_DGEmode;
12117 case NE:
12118 return CC_DNEmode;
12119 default:
12120 gcc_unreachable ();
12121 }
12122
12123 case LTU:
12124 if (cond_or == DOM_CC_X_AND_Y)
12125 return CC_DLTUmode;
12126
12127 switch (cond2)
12128 {
12129 case LTU:
12130 return CC_DLTUmode;
12131 case LEU:
12132 return CC_DLEUmode;
12133 case NE:
12134 return CC_DNEmode;
12135 default:
12136 gcc_unreachable ();
12137 }
12138
12139 case GTU:
12140 if (cond_or == DOM_CC_X_AND_Y)
12141 return CC_DGTUmode;
12142
12143 switch (cond2)
12144 {
12145 case GTU:
12146 return CC_DGTUmode;
12147 case GEU:
12148 return CC_DGEUmode;
12149 case NE:
12150 return CC_DNEmode;
12151 default:
12152 gcc_unreachable ();
12153 }
12154
12155 /* The remaining cases only occur when both comparisons are the
12156 same. */
12157 case NE:
12158 gcc_assert (cond1 == cond2);
12159 return CC_DNEmode;
12160
12161 case LE:
12162 gcc_assert (cond1 == cond2);
12163 return CC_DLEmode;
12164
12165 case GE:
12166 gcc_assert (cond1 == cond2);
12167 return CC_DGEmode;
12168
12169 case LEU:
12170 gcc_assert (cond1 == cond2);
12171 return CC_DLEUmode;
12172
12173 case GEU:
12174 gcc_assert (cond1 == cond2);
12175 return CC_DGEUmode;
12176
12177 default:
12178 gcc_unreachable ();
12179 }
12180 }
12181
12182 enum machine_mode
12183 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
12184 {
12185 /* All floating point compares return CCFP if it is an equality
12186 comparison, and CCFPE otherwise. */
12187 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12188 {
12189 switch (op)
12190 {
12191 case EQ:
12192 case NE:
12193 case UNORDERED:
12194 case ORDERED:
12195 case UNLT:
12196 case UNLE:
12197 case UNGT:
12198 case UNGE:
12199 case UNEQ:
12200 case LTGT:
12201 return CCFPmode;
12202
12203 case LT:
12204 case LE:
12205 case GT:
12206 case GE:
12207 return CCFPEmode;
12208
12209 default:
12210 gcc_unreachable ();
12211 }
12212 }
12213
12214 /* A compare with a shifted operand. Because of canonicalization, the
12215 comparison will have to be swapped when we emit the assembler. */
12216 if (GET_MODE (y) == SImode
12217 && (REG_P (y) || (GET_CODE (y) == SUBREG))
12218 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
12219 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
12220 || GET_CODE (x) == ROTATERT))
12221 return CC_SWPmode;
12222
12223 /* This operation is performed swapped, but since we only rely on the Z
12224 flag we don't need an additional mode. */
12225 if (GET_MODE (y) == SImode
12226 && (REG_P (y) || (GET_CODE (y) == SUBREG))
12227 && GET_CODE (x) == NEG
12228 && (op == EQ || op == NE))
12229 return CC_Zmode;
12230
12231 /* This is a special case that is used by combine to allow a
12232 comparison of a shifted byte load to be split into a zero-extend
12233 followed by a comparison of the shifted integer (only valid for
12234 equalities and unsigned inequalities). */
12235 if (GET_MODE (x) == SImode
12236 && GET_CODE (x) == ASHIFT
12237 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
12238 && GET_CODE (XEXP (x, 0)) == SUBREG
12239 && MEM_P (SUBREG_REG (XEXP (x, 0)))
12240 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
12241 && (op == EQ || op == NE
12242 || op == GEU || op == GTU || op == LTU || op == LEU)
12243 && CONST_INT_P (y))
12244 return CC_Zmode;
12245
12246 /* A construct for a conditional compare, if the false arm contains
12247 0, then both conditions must be true, otherwise either condition
12248 must be true. Not all conditions are possible, so CCmode is
12249 returned if it can't be done. */
12250 if (GET_CODE (x) == IF_THEN_ELSE
12251 && (XEXP (x, 2) == const0_rtx
12252 || XEXP (x, 2) == const1_rtx)
12253 && COMPARISON_P (XEXP (x, 0))
12254 && COMPARISON_P (XEXP (x, 1)))
12255 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
12256 INTVAL (XEXP (x, 2)));
12257
12258 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
12259 if (GET_CODE (x) == AND
12260 && (op == EQ || op == NE)
12261 && COMPARISON_P (XEXP (x, 0))
12262 && COMPARISON_P (XEXP (x, 1)))
12263 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
12264 DOM_CC_X_AND_Y);
12265
12266 if (GET_CODE (x) == IOR
12267 && (op == EQ || op == NE)
12268 && COMPARISON_P (XEXP (x, 0))
12269 && COMPARISON_P (XEXP (x, 1)))
12270 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
12271 DOM_CC_X_OR_Y);
12272
12273 /* An operation (on Thumb) where we want to test for a single bit.
12274 This is done by shifting that bit up into the top bit of a
12275 scratch register; we can then branch on the sign bit. */
12276 if (TARGET_THUMB1
12277 && GET_MODE (x) == SImode
12278 && (op == EQ || op == NE)
12279 && GET_CODE (x) == ZERO_EXTRACT
12280 && XEXP (x, 1) == const1_rtx)
12281 return CC_Nmode;
12282
12283 /* An operation that sets the condition codes as a side-effect, the
12284 V flag is not set correctly, so we can only use comparisons where
12285 this doesn't matter. (For LT and GE we can use "mi" and "pl"
12286 instead.) */
12287 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
12288 if (GET_MODE (x) == SImode
12289 && y == const0_rtx
12290 && (op == EQ || op == NE || op == LT || op == GE)
12291 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
12292 || GET_CODE (x) == AND || GET_CODE (x) == IOR
12293 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
12294 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
12295 || GET_CODE (x) == LSHIFTRT
12296 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
12297 || GET_CODE (x) == ROTATERT
12298 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
12299 return CC_NOOVmode;
12300
12301 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
12302 return CC_Zmode;
12303
12304 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
12305 && GET_CODE (x) == PLUS
12306 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
12307 return CC_Cmode;
12308
12309 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
12310 {
12311 switch (op)
12312 {
12313 case EQ:
12314 case NE:
12315 /* A DImode comparison against zero can be implemented by
12316 or'ing the two halves together. */
12317 if (y == const0_rtx)
12318 return CC_Zmode;
12319
12320 /* We can do an equality test in three Thumb instructions. */
12321 if (!TARGET_32BIT)
12322 return CC_Zmode;
12323
12324 /* FALLTHROUGH */
12325
12326 case LTU:
12327 case LEU:
12328 case GTU:
12329 case GEU:
12330 /* DImode unsigned comparisons can be implemented by cmp +
12331 cmpeq without a scratch register. Not worth doing in
12332 Thumb-2. */
12333 if (TARGET_32BIT)
12334 return CC_CZmode;
12335
12336 /* FALLTHROUGH */
12337
12338 case LT:
12339 case LE:
12340 case GT:
12341 case GE:
12342 /* DImode signed and unsigned comparisons can be implemented
12343 by cmp + sbcs with a scratch register, but that does not
12344 set the Z flag - we must reverse GT/LE/GTU/LEU. */
12345 gcc_assert (op != EQ && op != NE);
12346 return CC_NCVmode;
12347
12348 default:
12349 gcc_unreachable ();
12350 }
12351 }
12352
12353 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
12354 return GET_MODE (x);
12355
12356 return CCmode;
12357 }
12358
12359 /* X and Y are two things to compare using CODE. Emit the compare insn and
12360 return the rtx for register 0 in the proper mode. FP means this is a
12361 floating point compare: I don't think that it is needed on the arm. */
12362 rtx
12363 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
12364 {
12365 enum machine_mode mode;
12366 rtx cc_reg;
12367 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
12368
12369 /* We might have X as a constant, Y as a register because of the predicates
12370 used for cmpdi. If so, force X to a register here. */
12371 if (dimode_comparison && !REG_P (x))
12372 x = force_reg (DImode, x);
12373
12374 mode = SELECT_CC_MODE (code, x, y);
12375 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
12376
12377 if (dimode_comparison
12378 && mode != CC_CZmode)
12379 {
12380 rtx clobber, set;
12381
12382 /* To compare two non-zero values for equality, XOR them and
12383 then compare against zero. Not used for ARM mode; there
12384 CC_CZmode is cheaper. */
12385 if (mode == CC_Zmode && y != const0_rtx)
12386 {
12387 gcc_assert (!reload_completed);
12388 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
12389 y = const0_rtx;
12390 }
12391
12392 /* A scratch register is required. */
12393 if (reload_completed)
12394 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
12395 else
12396 scratch = gen_rtx_SCRATCH (SImode);
12397
12398 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
12399 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
12400 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
12401 }
12402 else
12403 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
12404
12405 return cc_reg;
12406 }
12407
12408 /* Generate a sequence of insns that will generate the correct return
12409 address mask depending on the physical architecture that the program
12410 is running on. */
12411 rtx
12412 arm_gen_return_addr_mask (void)
12413 {
12414 rtx reg = gen_reg_rtx (Pmode);
12415
12416 emit_insn (gen_return_addr_mask (reg));
12417 return reg;
12418 }
12419
12420 void
12421 arm_reload_in_hi (rtx *operands)
12422 {
12423 rtx ref = operands[1];
12424 rtx base, scratch;
12425 HOST_WIDE_INT offset = 0;
12426
12427 if (GET_CODE (ref) == SUBREG)
12428 {
12429 offset = SUBREG_BYTE (ref);
12430 ref = SUBREG_REG (ref);
12431 }
12432
12433 if (REG_P (ref))
12434 {
12435 /* We have a pseudo which has been spilt onto the stack; there
12436 are two cases here: the first where there is a simple
12437 stack-slot replacement and a second where the stack-slot is
12438 out of range, or is used as a subreg. */
12439 if (reg_equiv_mem (REGNO (ref)))
12440 {
12441 ref = reg_equiv_mem (REGNO (ref));
12442 base = find_replacement (&XEXP (ref, 0));
12443 }
12444 else
12445 /* The slot is out of range, or was dressed up in a SUBREG. */
12446 base = reg_equiv_address (REGNO (ref));
12447 }
12448 else
12449 base = find_replacement (&XEXP (ref, 0));
12450
12451 /* Handle the case where the address is too complex to be offset by 1. */
12452 if (GET_CODE (base) == MINUS
12453 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
12454 {
12455 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12456
12457 emit_set_insn (base_plus, base);
12458 base = base_plus;
12459 }
12460 else if (GET_CODE (base) == PLUS)
12461 {
12462 /* The addend must be CONST_INT, or we would have dealt with it above. */
12463 HOST_WIDE_INT hi, lo;
12464
12465 offset += INTVAL (XEXP (base, 1));
12466 base = XEXP (base, 0);
12467
12468 /* Rework the address into a legal sequence of insns. */
12469 /* Valid range for lo is -4095 -> 4095 */
12470 lo = (offset >= 0
12471 ? (offset & 0xfff)
12472 : -((-offset) & 0xfff));
12473
12474 /* Corner case, if lo is the max offset then we would be out of range
12475 once we have added the additional 1 below, so bump the msb into the
12476 pre-loading insn(s). */
12477 if (lo == 4095)
12478 lo &= 0x7ff;
12479
12480 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
12481 ^ (HOST_WIDE_INT) 0x80000000)
12482 - (HOST_WIDE_INT) 0x80000000);
12483
12484 gcc_assert (hi + lo == offset);
12485
12486 if (hi != 0)
12487 {
12488 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12489
12490 /* Get the base address; addsi3 knows how to handle constants
12491 that require more than one insn. */
12492 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
12493 base = base_plus;
12494 offset = lo;
12495 }
12496 }
12497
12498 /* Operands[2] may overlap operands[0] (though it won't overlap
12499 operands[1]), that's why we asked for a DImode reg -- so we can
12500 use the bit that does not overlap. */
12501 if (REGNO (operands[2]) == REGNO (operands[0]))
12502 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12503 else
12504 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
12505
12506 emit_insn (gen_zero_extendqisi2 (scratch,
12507 gen_rtx_MEM (QImode,
12508 plus_constant (Pmode, base,
12509 offset))));
12510 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
12511 gen_rtx_MEM (QImode,
12512 plus_constant (Pmode, base,
12513 offset + 1))));
12514 if (!BYTES_BIG_ENDIAN)
12515 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
12516 gen_rtx_IOR (SImode,
12517 gen_rtx_ASHIFT
12518 (SImode,
12519 gen_rtx_SUBREG (SImode, operands[0], 0),
12520 GEN_INT (8)),
12521 scratch));
12522 else
12523 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
12524 gen_rtx_IOR (SImode,
12525 gen_rtx_ASHIFT (SImode, scratch,
12526 GEN_INT (8)),
12527 gen_rtx_SUBREG (SImode, operands[0], 0)));
12528 }
12529
12530 /* Handle storing a half-word to memory during reload by synthesizing as two
12531 byte stores. Take care not to clobber the input values until after we
12532 have moved them somewhere safe. This code assumes that if the DImode
12533 scratch in operands[2] overlaps either the input value or output address
12534 in some way, then that value must die in this insn (we absolutely need
12535 two scratch registers for some corner cases). */
12536 void
12537 arm_reload_out_hi (rtx *operands)
12538 {
12539 rtx ref = operands[0];
12540 rtx outval = operands[1];
12541 rtx base, scratch;
12542 HOST_WIDE_INT offset = 0;
12543
12544 if (GET_CODE (ref) == SUBREG)
12545 {
12546 offset = SUBREG_BYTE (ref);
12547 ref = SUBREG_REG (ref);
12548 }
12549
12550 if (REG_P (ref))
12551 {
12552 /* We have a pseudo which has been spilt onto the stack; there
12553 are two cases here: the first where there is a simple
12554 stack-slot replacement and a second where the stack-slot is
12555 out of range, or is used as a subreg. */
12556 if (reg_equiv_mem (REGNO (ref)))
12557 {
12558 ref = reg_equiv_mem (REGNO (ref));
12559 base = find_replacement (&XEXP (ref, 0));
12560 }
12561 else
12562 /* The slot is out of range, or was dressed up in a SUBREG. */
12563 base = reg_equiv_address (REGNO (ref));
12564 }
12565 else
12566 base = find_replacement (&XEXP (ref, 0));
12567
12568 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
12569
12570 /* Handle the case where the address is too complex to be offset by 1. */
12571 if (GET_CODE (base) == MINUS
12572 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
12573 {
12574 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12575
12576 /* Be careful not to destroy OUTVAL. */
12577 if (reg_overlap_mentioned_p (base_plus, outval))
12578 {
12579 /* Updating base_plus might destroy outval, see if we can
12580 swap the scratch and base_plus. */
12581 if (!reg_overlap_mentioned_p (scratch, outval))
12582 {
12583 rtx tmp = scratch;
12584 scratch = base_plus;
12585 base_plus = tmp;
12586 }
12587 else
12588 {
12589 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12590
12591 /* Be conservative and copy OUTVAL into the scratch now,
12592 this should only be necessary if outval is a subreg
12593 of something larger than a word. */
12594 /* XXX Might this clobber base? I can't see how it can,
12595 since scratch is known to overlap with OUTVAL, and
12596 must be wider than a word. */
12597 emit_insn (gen_movhi (scratch_hi, outval));
12598 outval = scratch_hi;
12599 }
12600 }
12601
12602 emit_set_insn (base_plus, base);
12603 base = base_plus;
12604 }
12605 else if (GET_CODE (base) == PLUS)
12606 {
12607 /* The addend must be CONST_INT, or we would have dealt with it above. */
12608 HOST_WIDE_INT hi, lo;
12609
12610 offset += INTVAL (XEXP (base, 1));
12611 base = XEXP (base, 0);
12612
12613 /* Rework the address into a legal sequence of insns. */
12614 /* Valid range for lo is -4095 -> 4095 */
12615 lo = (offset >= 0
12616 ? (offset & 0xfff)
12617 : -((-offset) & 0xfff));
12618
12619 /* Corner case, if lo is the max offset then we would be out of range
12620 once we have added the additional 1 below, so bump the msb into the
12621 pre-loading insn(s). */
12622 if (lo == 4095)
12623 lo &= 0x7ff;
12624
12625 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
12626 ^ (HOST_WIDE_INT) 0x80000000)
12627 - (HOST_WIDE_INT) 0x80000000);
12628
12629 gcc_assert (hi + lo == offset);
12630
12631 if (hi != 0)
12632 {
12633 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12634
12635 /* Be careful not to destroy OUTVAL. */
12636 if (reg_overlap_mentioned_p (base_plus, outval))
12637 {
12638 /* Updating base_plus might destroy outval, see if we
12639 can swap the scratch and base_plus. */
12640 if (!reg_overlap_mentioned_p (scratch, outval))
12641 {
12642 rtx tmp = scratch;
12643 scratch = base_plus;
12644 base_plus = tmp;
12645 }
12646 else
12647 {
12648 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12649
12650 /* Be conservative and copy outval into scratch now,
12651 this should only be necessary if outval is a
12652 subreg of something larger than a word. */
12653 /* XXX Might this clobber base? I can't see how it
12654 can, since scratch is known to overlap with
12655 outval. */
12656 emit_insn (gen_movhi (scratch_hi, outval));
12657 outval = scratch_hi;
12658 }
12659 }
12660
12661 /* Get the base address; addsi3 knows how to handle constants
12662 that require more than one insn. */
12663 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
12664 base = base_plus;
12665 offset = lo;
12666 }
12667 }
12668
12669 if (BYTES_BIG_ENDIAN)
12670 {
12671 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12672 plus_constant (Pmode, base,
12673 offset + 1)),
12674 gen_lowpart (QImode, outval)));
12675 emit_insn (gen_lshrsi3 (scratch,
12676 gen_rtx_SUBREG (SImode, outval, 0),
12677 GEN_INT (8)));
12678 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
12679 offset)),
12680 gen_lowpart (QImode, scratch)));
12681 }
12682 else
12683 {
12684 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
12685 offset)),
12686 gen_lowpart (QImode, outval)));
12687 emit_insn (gen_lshrsi3 (scratch,
12688 gen_rtx_SUBREG (SImode, outval, 0),
12689 GEN_INT (8)));
12690 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12691 plus_constant (Pmode, base,
12692 offset + 1)),
12693 gen_lowpart (QImode, scratch)));
12694 }
12695 }
12696
12697 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
12698 (padded to the size of a word) should be passed in a register. */
12699
12700 static bool
12701 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
12702 {
12703 if (TARGET_AAPCS_BASED)
12704 return must_pass_in_stack_var_size (mode, type);
12705 else
12706 return must_pass_in_stack_var_size_or_pad (mode, type);
12707 }
12708
12709
12710 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
12711 Return true if an argument passed on the stack should be padded upwards,
12712 i.e. if the least-significant byte has useful data.
12713 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
12714 aggregate types are placed in the lowest memory address. */
12715
12716 bool
12717 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
12718 {
12719 if (!TARGET_AAPCS_BASED)
12720 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
12721
12722 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
12723 return false;
12724
12725 return true;
12726 }
12727
12728
12729 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
12730 Return !BYTES_BIG_ENDIAN if the least significant byte of the
12731 register has useful data, and return the opposite if the most
12732 significant byte does. */
12733
12734 bool
12735 arm_pad_reg_upward (enum machine_mode mode,
12736 tree type, int first ATTRIBUTE_UNUSED)
12737 {
12738 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
12739 {
12740 /* For AAPCS, small aggregates, small fixed-point types,
12741 and small complex types are always padded upwards. */
12742 if (type)
12743 {
12744 if ((AGGREGATE_TYPE_P (type)
12745 || TREE_CODE (type) == COMPLEX_TYPE
12746 || FIXED_POINT_TYPE_P (type))
12747 && int_size_in_bytes (type) <= 4)
12748 return true;
12749 }
12750 else
12751 {
12752 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
12753 && GET_MODE_SIZE (mode) <= 4)
12754 return true;
12755 }
12756 }
12757
12758 /* Otherwise, use default padding. */
12759 return !BYTES_BIG_ENDIAN;
12760 }
12761
12762 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
12763 assuming that the address in the base register is word aligned. */
12764 bool
12765 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
12766 {
12767 HOST_WIDE_INT max_offset;
12768
12769 /* Offset must be a multiple of 4 in Thumb mode. */
12770 if (TARGET_THUMB2 && ((offset & 3) != 0))
12771 return false;
12772
12773 if (TARGET_THUMB2)
12774 max_offset = 1020;
12775 else if (TARGET_ARM)
12776 max_offset = 255;
12777 else
12778 return false;
12779
12780 return ((offset <= max_offset) && (offset >= -max_offset));
12781 }
12782
12783 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
12784 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
12785 Assumes that the address in the base register RN is word aligned. Pattern
12786 guarantees that both memory accesses use the same base register,
12787 the offsets are constants within the range, and the gap between the offsets is 4.
12788 If preload complete then check that registers are legal. WBACK indicates whether
12789 address is updated. LOAD indicates whether memory access is load or store. */
12790 bool
12791 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
12792 bool wback, bool load)
12793 {
12794 unsigned int t, t2, n;
12795
12796 if (!reload_completed)
12797 return true;
12798
12799 if (!offset_ok_for_ldrd_strd (offset))
12800 return false;
12801
12802 t = REGNO (rt);
12803 t2 = REGNO (rt2);
12804 n = REGNO (rn);
12805
12806 if ((TARGET_THUMB2)
12807 && ((wback && (n == t || n == t2))
12808 || (t == SP_REGNUM)
12809 || (t == PC_REGNUM)
12810 || (t2 == SP_REGNUM)
12811 || (t2 == PC_REGNUM)
12812 || (!load && (n == PC_REGNUM))
12813 || (load && (t == t2))
12814 /* Triggers Cortex-M3 LDRD errata. */
12815 || (!wback && load && fix_cm3_ldrd && (n == t))))
12816 return false;
12817
12818 if ((TARGET_ARM)
12819 && ((wback && (n == t || n == t2))
12820 || (t2 == PC_REGNUM)
12821 || (t % 2 != 0) /* First destination register is not even. */
12822 || (t2 != t + 1)
12823 /* PC can be used as base register (for offset addressing only),
12824 but it is depricated. */
12825 || (n == PC_REGNUM)))
12826 return false;
12827
12828 return true;
12829 }
12830
12831 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
12832 operand ADDR is an immediate offset from the base register and is
12833 not volatile, in which case it sets BASE and OFFSET
12834 accordingly. */
12835 bool
12836 mem_ok_for_ldrd_strd (rtx addr, rtx *base, rtx *offset)
12837 {
12838 /* TODO: Handle more general memory operand patterns, such as
12839 PRE_DEC and PRE_INC. */
12840
12841 /* Convert a subreg of mem into mem itself. */
12842 if (GET_CODE (addr) == SUBREG)
12843 addr = alter_subreg (&addr, true);
12844
12845 gcc_assert (MEM_P (addr));
12846
12847 /* Don't modify volatile memory accesses. */
12848 if (MEM_VOLATILE_P (addr))
12849 return false;
12850
12851 *offset = const0_rtx;
12852
12853 addr = XEXP (addr, 0);
12854 if (REG_P (addr))
12855 {
12856 *base = addr;
12857 return true;
12858 }
12859 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
12860 {
12861 *base = XEXP (addr, 0);
12862 *offset = XEXP (addr, 1);
12863 return (REG_P (*base) && CONST_INT_P (*offset));
12864 }
12865
12866 return false;
12867 }
12868
12869 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
12870
12871 /* Called from a peephole2 to replace two word-size accesses with a
12872 single LDRD/STRD instruction. Returns true iff we can generate a
12873 new instruction sequence. That is, both accesses use the same base
12874 register and the gap between constant offsets is 4. This function
12875 may reorder its operands to match ldrd/strd RTL templates.
12876 OPERANDS are the operands found by the peephole matcher;
12877 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
12878 corresponding memory operands. LOAD indicaates whether the access
12879 is load or store. CONST_STORE indicates a store of constant
12880 integer values held in OPERANDS[4,5] and assumes that the pattern
12881 is of length 4 insn, for the purpose of checking dead registers.
12882 COMMUTE indicates that register operands may be reordered. */
12883 bool
12884 gen_operands_ldrd_strd (rtx *operands, bool load,
12885 bool const_store, bool commute)
12886 {
12887 int nops = 2;
12888 HOST_WIDE_INT offsets[2], offset;
12889 rtx base = NULL_RTX;
12890 rtx cur_base, cur_offset, tmp;
12891 int i, gap;
12892 HARD_REG_SET regset;
12893
12894 gcc_assert (!const_store || !load);
12895 /* Check that the memory references are immediate offsets from the
12896 same base register. Extract the base register, the destination
12897 registers, and the corresponding memory offsets. */
12898 for (i = 0; i < nops; i++)
12899 {
12900 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
12901 return false;
12902
12903 if (i == 0)
12904 base = cur_base;
12905 else if (REGNO (base) != REGNO (cur_base))
12906 return false;
12907
12908 offsets[i] = INTVAL (cur_offset);
12909 if (GET_CODE (operands[i]) == SUBREG)
12910 {
12911 tmp = SUBREG_REG (operands[i]);
12912 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
12913 operands[i] = tmp;
12914 }
12915 }
12916
12917 /* Make sure there is no dependency between the individual loads. */
12918 if (load && REGNO (operands[0]) == REGNO (base))
12919 return false; /* RAW */
12920
12921 if (load && REGNO (operands[0]) == REGNO (operands[1]))
12922 return false; /* WAW */
12923
12924 /* If the same input register is used in both stores
12925 when storing different constants, try to find a free register.
12926 For example, the code
12927 mov r0, 0
12928 str r0, [r2]
12929 mov r0, 1
12930 str r0, [r2, #4]
12931 can be transformed into
12932 mov r1, 0
12933 strd r1, r0, [r2]
12934 in Thumb mode assuming that r1 is free. */
12935 if (const_store
12936 && REGNO (operands[0]) == REGNO (operands[1])
12937 && INTVAL (operands[4]) != INTVAL (operands[5]))
12938 {
12939 if (TARGET_THUMB2)
12940 {
12941 CLEAR_HARD_REG_SET (regset);
12942 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
12943 if (tmp == NULL_RTX)
12944 return false;
12945
12946 /* Use the new register in the first load to ensure that
12947 if the original input register is not dead after peephole,
12948 then it will have the correct constant value. */
12949 operands[0] = tmp;
12950 }
12951 else if (TARGET_ARM)
12952 {
12953 return false;
12954 int regno = REGNO (operands[0]);
12955 if (!peep2_reg_dead_p (4, operands[0]))
12956 {
12957 /* When the input register is even and is not dead after the
12958 pattern, it has to hold the second constant but we cannot
12959 form a legal STRD in ARM mode with this register as the second
12960 register. */
12961 if (regno % 2 == 0)
12962 return false;
12963
12964 /* Is regno-1 free? */
12965 SET_HARD_REG_SET (regset);
12966 CLEAR_HARD_REG_BIT(regset, regno - 1);
12967 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
12968 if (tmp == NULL_RTX)
12969 return false;
12970
12971 operands[0] = tmp;
12972 }
12973 else
12974 {
12975 /* Find a DImode register. */
12976 CLEAR_HARD_REG_SET (regset);
12977 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
12978 if (tmp != NULL_RTX)
12979 {
12980 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
12981 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
12982 }
12983 else
12984 {
12985 /* Can we use the input register to form a DI register? */
12986 SET_HARD_REG_SET (regset);
12987 CLEAR_HARD_REG_BIT(regset,
12988 regno % 2 == 0 ? regno + 1 : regno - 1);
12989 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
12990 if (tmp == NULL_RTX)
12991 return false;
12992 operands[regno % 2 == 1 ? 0 : 1] = tmp;
12993 }
12994 }
12995
12996 gcc_assert (operands[0] != NULL_RTX);
12997 gcc_assert (operands[1] != NULL_RTX);
12998 gcc_assert (REGNO (operands[0]) % 2 == 0);
12999 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
13000 }
13001 }
13002
13003 /* Make sure the instructions are ordered with lower memory access first. */
13004 if (offsets[0] > offsets[1])
13005 {
13006 gap = offsets[0] - offsets[1];
13007 offset = offsets[1];
13008
13009 /* Swap the instructions such that lower memory is accessed first. */
13010 SWAP_RTX (operands[0], operands[1]);
13011 SWAP_RTX (operands[2], operands[3]);
13012 if (const_store)
13013 SWAP_RTX (operands[4], operands[5]);
13014 }
13015 else
13016 {
13017 gap = offsets[1] - offsets[0];
13018 offset = offsets[0];
13019 }
13020
13021 /* Make sure accesses are to consecutive memory locations. */
13022 if (gap != 4)
13023 return false;
13024
13025 /* Make sure we generate legal instructions. */
13026 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
13027 false, load))
13028 return true;
13029
13030 /* In Thumb state, where registers are almost unconstrained, there
13031 is little hope to fix it. */
13032 if (TARGET_THUMB2)
13033 return false;
13034
13035 if (load && commute)
13036 {
13037 /* Try reordering registers. */
13038 SWAP_RTX (operands[0], operands[1]);
13039 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
13040 false, load))
13041 return true;
13042 }
13043
13044 if (const_store)
13045 {
13046 /* If input registers are dead after this pattern, they can be
13047 reordered or replaced by other registers that are free in the
13048 current pattern. */
13049 if (!peep2_reg_dead_p (4, operands[0])
13050 || !peep2_reg_dead_p (4, operands[1]))
13051 return false;
13052
13053 /* Try to reorder the input registers. */
13054 /* For example, the code
13055 mov r0, 0
13056 mov r1, 1
13057 str r1, [r2]
13058 str r0, [r2, #4]
13059 can be transformed into
13060 mov r1, 0
13061 mov r0, 1
13062 strd r0, [r2]
13063 */
13064 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
13065 false, false))
13066 {
13067 SWAP_RTX (operands[0], operands[1]);
13068 return true;
13069 }
13070
13071 /* Try to find a free DI register. */
13072 CLEAR_HARD_REG_SET (regset);
13073 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
13074 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
13075 while (true)
13076 {
13077 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
13078 if (tmp == NULL_RTX)
13079 return false;
13080
13081 /* DREG must be an even-numbered register in DImode.
13082 Split it into SI registers. */
13083 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
13084 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
13085 gcc_assert (operands[0] != NULL_RTX);
13086 gcc_assert (operands[1] != NULL_RTX);
13087 gcc_assert (REGNO (operands[0]) % 2 == 0);
13088 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
13089
13090 return (operands_ok_ldrd_strd (operands[0], operands[1],
13091 base, offset,
13092 false, load));
13093 }
13094 }
13095
13096 return false;
13097 }
13098 #undef SWAP_RTX
13099
13100
13101
13102 \f
13103 /* Print a symbolic form of X to the debug file, F. */
13104 static void
13105 arm_print_value (FILE *f, rtx x)
13106 {
13107 switch (GET_CODE (x))
13108 {
13109 case CONST_INT:
13110 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
13111 return;
13112
13113 case CONST_DOUBLE:
13114 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
13115 return;
13116
13117 case CONST_VECTOR:
13118 {
13119 int i;
13120
13121 fprintf (f, "<");
13122 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
13123 {
13124 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
13125 if (i < (CONST_VECTOR_NUNITS (x) - 1))
13126 fputc (',', f);
13127 }
13128 fprintf (f, ">");
13129 }
13130 return;
13131
13132 case CONST_STRING:
13133 fprintf (f, "\"%s\"", XSTR (x, 0));
13134 return;
13135
13136 case SYMBOL_REF:
13137 fprintf (f, "`%s'", XSTR (x, 0));
13138 return;
13139
13140 case LABEL_REF:
13141 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
13142 return;
13143
13144 case CONST:
13145 arm_print_value (f, XEXP (x, 0));
13146 return;
13147
13148 case PLUS:
13149 arm_print_value (f, XEXP (x, 0));
13150 fprintf (f, "+");
13151 arm_print_value (f, XEXP (x, 1));
13152 return;
13153
13154 case PC:
13155 fprintf (f, "pc");
13156 return;
13157
13158 default:
13159 fprintf (f, "????");
13160 return;
13161 }
13162 }
13163 \f
13164 /* Routines for manipulation of the constant pool. */
13165
13166 /* Arm instructions cannot load a large constant directly into a
13167 register; they have to come from a pc relative load. The constant
13168 must therefore be placed in the addressable range of the pc
13169 relative load. Depending on the precise pc relative load
13170 instruction the range is somewhere between 256 bytes and 4k. This
13171 means that we often have to dump a constant inside a function, and
13172 generate code to branch around it.
13173
13174 It is important to minimize this, since the branches will slow
13175 things down and make the code larger.
13176
13177 Normally we can hide the table after an existing unconditional
13178 branch so that there is no interruption of the flow, but in the
13179 worst case the code looks like this:
13180
13181 ldr rn, L1
13182 ...
13183 b L2
13184 align
13185 L1: .long value
13186 L2:
13187 ...
13188
13189 ldr rn, L3
13190 ...
13191 b L4
13192 align
13193 L3: .long value
13194 L4:
13195 ...
13196
13197 We fix this by performing a scan after scheduling, which notices
13198 which instructions need to have their operands fetched from the
13199 constant table and builds the table.
13200
13201 The algorithm starts by building a table of all the constants that
13202 need fixing up and all the natural barriers in the function (places
13203 where a constant table can be dropped without breaking the flow).
13204 For each fixup we note how far the pc-relative replacement will be
13205 able to reach and the offset of the instruction into the function.
13206
13207 Having built the table we then group the fixes together to form
13208 tables that are as large as possible (subject to addressing
13209 constraints) and emit each table of constants after the last
13210 barrier that is within range of all the instructions in the group.
13211 If a group does not contain a barrier, then we forcibly create one
13212 by inserting a jump instruction into the flow. Once the table has
13213 been inserted, the insns are then modified to reference the
13214 relevant entry in the pool.
13215
13216 Possible enhancements to the algorithm (not implemented) are:
13217
13218 1) For some processors and object formats, there may be benefit in
13219 aligning the pools to the start of cache lines; this alignment
13220 would need to be taken into account when calculating addressability
13221 of a pool. */
13222
13223 /* These typedefs are located at the start of this file, so that
13224 they can be used in the prototypes there. This comment is to
13225 remind readers of that fact so that the following structures
13226 can be understood more easily.
13227
13228 typedef struct minipool_node Mnode;
13229 typedef struct minipool_fixup Mfix; */
13230
13231 struct minipool_node
13232 {
13233 /* Doubly linked chain of entries. */
13234 Mnode * next;
13235 Mnode * prev;
13236 /* The maximum offset into the code that this entry can be placed. While
13237 pushing fixes for forward references, all entries are sorted in order
13238 of increasing max_address. */
13239 HOST_WIDE_INT max_address;
13240 /* Similarly for an entry inserted for a backwards ref. */
13241 HOST_WIDE_INT min_address;
13242 /* The number of fixes referencing this entry. This can become zero
13243 if we "unpush" an entry. In this case we ignore the entry when we
13244 come to emit the code. */
13245 int refcount;
13246 /* The offset from the start of the minipool. */
13247 HOST_WIDE_INT offset;
13248 /* The value in table. */
13249 rtx value;
13250 /* The mode of value. */
13251 enum machine_mode mode;
13252 /* The size of the value. With iWMMXt enabled
13253 sizes > 4 also imply an alignment of 8-bytes. */
13254 int fix_size;
13255 };
13256
13257 struct minipool_fixup
13258 {
13259 Mfix * next;
13260 rtx insn;
13261 HOST_WIDE_INT address;
13262 rtx * loc;
13263 enum machine_mode mode;
13264 int fix_size;
13265 rtx value;
13266 Mnode * minipool;
13267 HOST_WIDE_INT forwards;
13268 HOST_WIDE_INT backwards;
13269 };
13270
13271 /* Fixes less than a word need padding out to a word boundary. */
13272 #define MINIPOOL_FIX_SIZE(mode) \
13273 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
13274
13275 static Mnode * minipool_vector_head;
13276 static Mnode * minipool_vector_tail;
13277 static rtx minipool_vector_label;
13278 static int minipool_pad;
13279
13280 /* The linked list of all minipool fixes required for this function. */
13281 Mfix * minipool_fix_head;
13282 Mfix * minipool_fix_tail;
13283 /* The fix entry for the current minipool, once it has been placed. */
13284 Mfix * minipool_barrier;
13285
13286 /* Determines if INSN is the start of a jump table. Returns the end
13287 of the TABLE or NULL_RTX. */
13288 static rtx
13289 is_jump_table (rtx insn)
13290 {
13291 rtx table;
13292
13293 if (jump_to_label_p (insn)
13294 && ((table = next_active_insn (JUMP_LABEL (insn)))
13295 == next_active_insn (insn))
13296 && table != NULL
13297 && JUMP_TABLE_DATA_P (table))
13298 return table;
13299
13300 return NULL_RTX;
13301 }
13302
13303 #ifndef JUMP_TABLES_IN_TEXT_SECTION
13304 #define JUMP_TABLES_IN_TEXT_SECTION 0
13305 #endif
13306
13307 static HOST_WIDE_INT
13308 get_jump_table_size (rtx insn)
13309 {
13310 /* ADDR_VECs only take room if read-only data does into the text
13311 section. */
13312 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
13313 {
13314 rtx body = PATTERN (insn);
13315 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
13316 HOST_WIDE_INT size;
13317 HOST_WIDE_INT modesize;
13318
13319 modesize = GET_MODE_SIZE (GET_MODE (body));
13320 size = modesize * XVECLEN (body, elt);
13321 switch (modesize)
13322 {
13323 case 1:
13324 /* Round up size of TBB table to a halfword boundary. */
13325 size = (size + 1) & ~(HOST_WIDE_INT)1;
13326 break;
13327 case 2:
13328 /* No padding necessary for TBH. */
13329 break;
13330 case 4:
13331 /* Add two bytes for alignment on Thumb. */
13332 if (TARGET_THUMB)
13333 size += 2;
13334 break;
13335 default:
13336 gcc_unreachable ();
13337 }
13338 return size;
13339 }
13340
13341 return 0;
13342 }
13343
13344 /* Return the maximum amount of padding that will be inserted before
13345 label LABEL. */
13346
13347 static HOST_WIDE_INT
13348 get_label_padding (rtx label)
13349 {
13350 HOST_WIDE_INT align, min_insn_size;
13351
13352 align = 1 << label_to_alignment (label);
13353 min_insn_size = TARGET_THUMB ? 2 : 4;
13354 return align > min_insn_size ? align - min_insn_size : 0;
13355 }
13356
13357 /* Move a minipool fix MP from its current location to before MAX_MP.
13358 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
13359 constraints may need updating. */
13360 static Mnode *
13361 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
13362 HOST_WIDE_INT max_address)
13363 {
13364 /* The code below assumes these are different. */
13365 gcc_assert (mp != max_mp);
13366
13367 if (max_mp == NULL)
13368 {
13369 if (max_address < mp->max_address)
13370 mp->max_address = max_address;
13371 }
13372 else
13373 {
13374 if (max_address > max_mp->max_address - mp->fix_size)
13375 mp->max_address = max_mp->max_address - mp->fix_size;
13376 else
13377 mp->max_address = max_address;
13378
13379 /* Unlink MP from its current position. Since max_mp is non-null,
13380 mp->prev must be non-null. */
13381 mp->prev->next = mp->next;
13382 if (mp->next != NULL)
13383 mp->next->prev = mp->prev;
13384 else
13385 minipool_vector_tail = mp->prev;
13386
13387 /* Re-insert it before MAX_MP. */
13388 mp->next = max_mp;
13389 mp->prev = max_mp->prev;
13390 max_mp->prev = mp;
13391
13392 if (mp->prev != NULL)
13393 mp->prev->next = mp;
13394 else
13395 minipool_vector_head = mp;
13396 }
13397
13398 /* Save the new entry. */
13399 max_mp = mp;
13400
13401 /* Scan over the preceding entries and adjust their addresses as
13402 required. */
13403 while (mp->prev != NULL
13404 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
13405 {
13406 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
13407 mp = mp->prev;
13408 }
13409
13410 return max_mp;
13411 }
13412
13413 /* Add a constant to the minipool for a forward reference. Returns the
13414 node added or NULL if the constant will not fit in this pool. */
13415 static Mnode *
13416 add_minipool_forward_ref (Mfix *fix)
13417 {
13418 /* If set, max_mp is the first pool_entry that has a lower
13419 constraint than the one we are trying to add. */
13420 Mnode * max_mp = NULL;
13421 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
13422 Mnode * mp;
13423
13424 /* If the minipool starts before the end of FIX->INSN then this FIX
13425 can not be placed into the current pool. Furthermore, adding the
13426 new constant pool entry may cause the pool to start FIX_SIZE bytes
13427 earlier. */
13428 if (minipool_vector_head &&
13429 (fix->address + get_attr_length (fix->insn)
13430 >= minipool_vector_head->max_address - fix->fix_size))
13431 return NULL;
13432
13433 /* Scan the pool to see if a constant with the same value has
13434 already been added. While we are doing this, also note the
13435 location where we must insert the constant if it doesn't already
13436 exist. */
13437 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
13438 {
13439 if (GET_CODE (fix->value) == GET_CODE (mp->value)
13440 && fix->mode == mp->mode
13441 && (!LABEL_P (fix->value)
13442 || (CODE_LABEL_NUMBER (fix->value)
13443 == CODE_LABEL_NUMBER (mp->value)))
13444 && rtx_equal_p (fix->value, mp->value))
13445 {
13446 /* More than one fix references this entry. */
13447 mp->refcount++;
13448 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
13449 }
13450
13451 /* Note the insertion point if necessary. */
13452 if (max_mp == NULL
13453 && mp->max_address > max_address)
13454 max_mp = mp;
13455
13456 /* If we are inserting an 8-bytes aligned quantity and
13457 we have not already found an insertion point, then
13458 make sure that all such 8-byte aligned quantities are
13459 placed at the start of the pool. */
13460 if (ARM_DOUBLEWORD_ALIGN
13461 && max_mp == NULL
13462 && fix->fix_size >= 8
13463 && mp->fix_size < 8)
13464 {
13465 max_mp = mp;
13466 max_address = mp->max_address;
13467 }
13468 }
13469
13470 /* The value is not currently in the minipool, so we need to create
13471 a new entry for it. If MAX_MP is NULL, the entry will be put on
13472 the end of the list since the placement is less constrained than
13473 any existing entry. Otherwise, we insert the new fix before
13474 MAX_MP and, if necessary, adjust the constraints on the other
13475 entries. */
13476 mp = XNEW (Mnode);
13477 mp->fix_size = fix->fix_size;
13478 mp->mode = fix->mode;
13479 mp->value = fix->value;
13480 mp->refcount = 1;
13481 /* Not yet required for a backwards ref. */
13482 mp->min_address = -65536;
13483
13484 if (max_mp == NULL)
13485 {
13486 mp->max_address = max_address;
13487 mp->next = NULL;
13488 mp->prev = minipool_vector_tail;
13489
13490 if (mp->prev == NULL)
13491 {
13492 minipool_vector_head = mp;
13493 minipool_vector_label = gen_label_rtx ();
13494 }
13495 else
13496 mp->prev->next = mp;
13497
13498 minipool_vector_tail = mp;
13499 }
13500 else
13501 {
13502 if (max_address > max_mp->max_address - mp->fix_size)
13503 mp->max_address = max_mp->max_address - mp->fix_size;
13504 else
13505 mp->max_address = max_address;
13506
13507 mp->next = max_mp;
13508 mp->prev = max_mp->prev;
13509 max_mp->prev = mp;
13510 if (mp->prev != NULL)
13511 mp->prev->next = mp;
13512 else
13513 minipool_vector_head = mp;
13514 }
13515
13516 /* Save the new entry. */
13517 max_mp = mp;
13518
13519 /* Scan over the preceding entries and adjust their addresses as
13520 required. */
13521 while (mp->prev != NULL
13522 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
13523 {
13524 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
13525 mp = mp->prev;
13526 }
13527
13528 return max_mp;
13529 }
13530
13531 static Mnode *
13532 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
13533 HOST_WIDE_INT min_address)
13534 {
13535 HOST_WIDE_INT offset;
13536
13537 /* The code below assumes these are different. */
13538 gcc_assert (mp != min_mp);
13539
13540 if (min_mp == NULL)
13541 {
13542 if (min_address > mp->min_address)
13543 mp->min_address = min_address;
13544 }
13545 else
13546 {
13547 /* We will adjust this below if it is too loose. */
13548 mp->min_address = min_address;
13549
13550 /* Unlink MP from its current position. Since min_mp is non-null,
13551 mp->next must be non-null. */
13552 mp->next->prev = mp->prev;
13553 if (mp->prev != NULL)
13554 mp->prev->next = mp->next;
13555 else
13556 minipool_vector_head = mp->next;
13557
13558 /* Reinsert it after MIN_MP. */
13559 mp->prev = min_mp;
13560 mp->next = min_mp->next;
13561 min_mp->next = mp;
13562 if (mp->next != NULL)
13563 mp->next->prev = mp;
13564 else
13565 minipool_vector_tail = mp;
13566 }
13567
13568 min_mp = mp;
13569
13570 offset = 0;
13571 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
13572 {
13573 mp->offset = offset;
13574 if (mp->refcount > 0)
13575 offset += mp->fix_size;
13576
13577 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
13578 mp->next->min_address = mp->min_address + mp->fix_size;
13579 }
13580
13581 return min_mp;
13582 }
13583
13584 /* Add a constant to the minipool for a backward reference. Returns the
13585 node added or NULL if the constant will not fit in this pool.
13586
13587 Note that the code for insertion for a backwards reference can be
13588 somewhat confusing because the calculated offsets for each fix do
13589 not take into account the size of the pool (which is still under
13590 construction. */
13591 static Mnode *
13592 add_minipool_backward_ref (Mfix *fix)
13593 {
13594 /* If set, min_mp is the last pool_entry that has a lower constraint
13595 than the one we are trying to add. */
13596 Mnode *min_mp = NULL;
13597 /* This can be negative, since it is only a constraint. */
13598 HOST_WIDE_INT min_address = fix->address - fix->backwards;
13599 Mnode *mp;
13600
13601 /* If we can't reach the current pool from this insn, or if we can't
13602 insert this entry at the end of the pool without pushing other
13603 fixes out of range, then we don't try. This ensures that we
13604 can't fail later on. */
13605 if (min_address >= minipool_barrier->address
13606 || (minipool_vector_tail->min_address + fix->fix_size
13607 >= minipool_barrier->address))
13608 return NULL;
13609
13610 /* Scan the pool to see if a constant with the same value has
13611 already been added. While we are doing this, also note the
13612 location where we must insert the constant if it doesn't already
13613 exist. */
13614 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
13615 {
13616 if (GET_CODE (fix->value) == GET_CODE (mp->value)
13617 && fix->mode == mp->mode
13618 && (!LABEL_P (fix->value)
13619 || (CODE_LABEL_NUMBER (fix->value)
13620 == CODE_LABEL_NUMBER (mp->value)))
13621 && rtx_equal_p (fix->value, mp->value)
13622 /* Check that there is enough slack to move this entry to the
13623 end of the table (this is conservative). */
13624 && (mp->max_address
13625 > (minipool_barrier->address
13626 + minipool_vector_tail->offset
13627 + minipool_vector_tail->fix_size)))
13628 {
13629 mp->refcount++;
13630 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
13631 }
13632
13633 if (min_mp != NULL)
13634 mp->min_address += fix->fix_size;
13635 else
13636 {
13637 /* Note the insertion point if necessary. */
13638 if (mp->min_address < min_address)
13639 {
13640 /* For now, we do not allow the insertion of 8-byte alignment
13641 requiring nodes anywhere but at the start of the pool. */
13642 if (ARM_DOUBLEWORD_ALIGN
13643 && fix->fix_size >= 8 && mp->fix_size < 8)
13644 return NULL;
13645 else
13646 min_mp = mp;
13647 }
13648 else if (mp->max_address
13649 < minipool_barrier->address + mp->offset + fix->fix_size)
13650 {
13651 /* Inserting before this entry would push the fix beyond
13652 its maximum address (which can happen if we have
13653 re-located a forwards fix); force the new fix to come
13654 after it. */
13655 if (ARM_DOUBLEWORD_ALIGN
13656 && fix->fix_size >= 8 && mp->fix_size < 8)
13657 return NULL;
13658 else
13659 {
13660 min_mp = mp;
13661 min_address = mp->min_address + fix->fix_size;
13662 }
13663 }
13664 /* Do not insert a non-8-byte aligned quantity before 8-byte
13665 aligned quantities. */
13666 else if (ARM_DOUBLEWORD_ALIGN
13667 && fix->fix_size < 8
13668 && mp->fix_size >= 8)
13669 {
13670 min_mp = mp;
13671 min_address = mp->min_address + fix->fix_size;
13672 }
13673 }
13674 }
13675
13676 /* We need to create a new entry. */
13677 mp = XNEW (Mnode);
13678 mp->fix_size = fix->fix_size;
13679 mp->mode = fix->mode;
13680 mp->value = fix->value;
13681 mp->refcount = 1;
13682 mp->max_address = minipool_barrier->address + 65536;
13683
13684 mp->min_address = min_address;
13685
13686 if (min_mp == NULL)
13687 {
13688 mp->prev = NULL;
13689 mp->next = minipool_vector_head;
13690
13691 if (mp->next == NULL)
13692 {
13693 minipool_vector_tail = mp;
13694 minipool_vector_label = gen_label_rtx ();
13695 }
13696 else
13697 mp->next->prev = mp;
13698
13699 minipool_vector_head = mp;
13700 }
13701 else
13702 {
13703 mp->next = min_mp->next;
13704 mp->prev = min_mp;
13705 min_mp->next = mp;
13706
13707 if (mp->next != NULL)
13708 mp->next->prev = mp;
13709 else
13710 minipool_vector_tail = mp;
13711 }
13712
13713 /* Save the new entry. */
13714 min_mp = mp;
13715
13716 if (mp->prev)
13717 mp = mp->prev;
13718 else
13719 mp->offset = 0;
13720
13721 /* Scan over the following entries and adjust their offsets. */
13722 while (mp->next != NULL)
13723 {
13724 if (mp->next->min_address < mp->min_address + mp->fix_size)
13725 mp->next->min_address = mp->min_address + mp->fix_size;
13726
13727 if (mp->refcount)
13728 mp->next->offset = mp->offset + mp->fix_size;
13729 else
13730 mp->next->offset = mp->offset;
13731
13732 mp = mp->next;
13733 }
13734
13735 return min_mp;
13736 }
13737
13738 static void
13739 assign_minipool_offsets (Mfix *barrier)
13740 {
13741 HOST_WIDE_INT offset = 0;
13742 Mnode *mp;
13743
13744 minipool_barrier = barrier;
13745
13746 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
13747 {
13748 mp->offset = offset;
13749
13750 if (mp->refcount > 0)
13751 offset += mp->fix_size;
13752 }
13753 }
13754
13755 /* Output the literal table */
13756 static void
13757 dump_minipool (rtx scan)
13758 {
13759 Mnode * mp;
13760 Mnode * nmp;
13761 int align64 = 0;
13762
13763 if (ARM_DOUBLEWORD_ALIGN)
13764 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
13765 if (mp->refcount > 0 && mp->fix_size >= 8)
13766 {
13767 align64 = 1;
13768 break;
13769 }
13770
13771 if (dump_file)
13772 fprintf (dump_file,
13773 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
13774 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
13775
13776 scan = emit_label_after (gen_label_rtx (), scan);
13777 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
13778 scan = emit_label_after (minipool_vector_label, scan);
13779
13780 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
13781 {
13782 if (mp->refcount > 0)
13783 {
13784 if (dump_file)
13785 {
13786 fprintf (dump_file,
13787 ";; Offset %u, min %ld, max %ld ",
13788 (unsigned) mp->offset, (unsigned long) mp->min_address,
13789 (unsigned long) mp->max_address);
13790 arm_print_value (dump_file, mp->value);
13791 fputc ('\n', dump_file);
13792 }
13793
13794 switch (mp->fix_size)
13795 {
13796 #ifdef HAVE_consttable_1
13797 case 1:
13798 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
13799 break;
13800
13801 #endif
13802 #ifdef HAVE_consttable_2
13803 case 2:
13804 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
13805 break;
13806
13807 #endif
13808 #ifdef HAVE_consttable_4
13809 case 4:
13810 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
13811 break;
13812
13813 #endif
13814 #ifdef HAVE_consttable_8
13815 case 8:
13816 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
13817 break;
13818
13819 #endif
13820 #ifdef HAVE_consttable_16
13821 case 16:
13822 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
13823 break;
13824
13825 #endif
13826 default:
13827 gcc_unreachable ();
13828 }
13829 }
13830
13831 nmp = mp->next;
13832 free (mp);
13833 }
13834
13835 minipool_vector_head = minipool_vector_tail = NULL;
13836 scan = emit_insn_after (gen_consttable_end (), scan);
13837 scan = emit_barrier_after (scan);
13838 }
13839
13840 /* Return the cost of forcibly inserting a barrier after INSN. */
13841 static int
13842 arm_barrier_cost (rtx insn)
13843 {
13844 /* Basing the location of the pool on the loop depth is preferable,
13845 but at the moment, the basic block information seems to be
13846 corrupt by this stage of the compilation. */
13847 int base_cost = 50;
13848 rtx next = next_nonnote_insn (insn);
13849
13850 if (next != NULL && LABEL_P (next))
13851 base_cost -= 20;
13852
13853 switch (GET_CODE (insn))
13854 {
13855 case CODE_LABEL:
13856 /* It will always be better to place the table before the label, rather
13857 than after it. */
13858 return 50;
13859
13860 case INSN:
13861 case CALL_INSN:
13862 return base_cost;
13863
13864 case JUMP_INSN:
13865 return base_cost - 10;
13866
13867 default:
13868 return base_cost + 10;
13869 }
13870 }
13871
13872 /* Find the best place in the insn stream in the range
13873 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
13874 Create the barrier by inserting a jump and add a new fix entry for
13875 it. */
13876 static Mfix *
13877 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
13878 {
13879 HOST_WIDE_INT count = 0;
13880 rtx barrier;
13881 rtx from = fix->insn;
13882 /* The instruction after which we will insert the jump. */
13883 rtx selected = NULL;
13884 int selected_cost;
13885 /* The address at which the jump instruction will be placed. */
13886 HOST_WIDE_INT selected_address;
13887 Mfix * new_fix;
13888 HOST_WIDE_INT max_count = max_address - fix->address;
13889 rtx label = gen_label_rtx ();
13890
13891 selected_cost = arm_barrier_cost (from);
13892 selected_address = fix->address;
13893
13894 while (from && count < max_count)
13895 {
13896 rtx tmp;
13897 int new_cost;
13898
13899 /* This code shouldn't have been called if there was a natural barrier
13900 within range. */
13901 gcc_assert (!BARRIER_P (from));
13902
13903 /* Count the length of this insn. This must stay in sync with the
13904 code that pushes minipool fixes. */
13905 if (LABEL_P (from))
13906 count += get_label_padding (from);
13907 else
13908 count += get_attr_length (from);
13909
13910 /* If there is a jump table, add its length. */
13911 tmp = is_jump_table (from);
13912 if (tmp != NULL)
13913 {
13914 count += get_jump_table_size (tmp);
13915
13916 /* Jump tables aren't in a basic block, so base the cost on
13917 the dispatch insn. If we select this location, we will
13918 still put the pool after the table. */
13919 new_cost = arm_barrier_cost (from);
13920
13921 if (count < max_count
13922 && (!selected || new_cost <= selected_cost))
13923 {
13924 selected = tmp;
13925 selected_cost = new_cost;
13926 selected_address = fix->address + count;
13927 }
13928
13929 /* Continue after the dispatch table. */
13930 from = NEXT_INSN (tmp);
13931 continue;
13932 }
13933
13934 new_cost = arm_barrier_cost (from);
13935
13936 if (count < max_count
13937 && (!selected || new_cost <= selected_cost))
13938 {
13939 selected = from;
13940 selected_cost = new_cost;
13941 selected_address = fix->address + count;
13942 }
13943
13944 from = NEXT_INSN (from);
13945 }
13946
13947 /* Make sure that we found a place to insert the jump. */
13948 gcc_assert (selected);
13949
13950 /* Make sure we do not split a call and its corresponding
13951 CALL_ARG_LOCATION note. */
13952 if (CALL_P (selected))
13953 {
13954 rtx next = NEXT_INSN (selected);
13955 if (next && NOTE_P (next)
13956 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
13957 selected = next;
13958 }
13959
13960 /* Create a new JUMP_INSN that branches around a barrier. */
13961 from = emit_jump_insn_after (gen_jump (label), selected);
13962 JUMP_LABEL (from) = label;
13963 barrier = emit_barrier_after (from);
13964 emit_label_after (label, barrier);
13965
13966 /* Create a minipool barrier entry for the new barrier. */
13967 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
13968 new_fix->insn = barrier;
13969 new_fix->address = selected_address;
13970 new_fix->next = fix->next;
13971 fix->next = new_fix;
13972
13973 return new_fix;
13974 }
13975
13976 /* Record that there is a natural barrier in the insn stream at
13977 ADDRESS. */
13978 static void
13979 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
13980 {
13981 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13982
13983 fix->insn = insn;
13984 fix->address = address;
13985
13986 fix->next = NULL;
13987 if (minipool_fix_head != NULL)
13988 minipool_fix_tail->next = fix;
13989 else
13990 minipool_fix_head = fix;
13991
13992 minipool_fix_tail = fix;
13993 }
13994
13995 /* Record INSN, which will need fixing up to load a value from the
13996 minipool. ADDRESS is the offset of the insn since the start of the
13997 function; LOC is a pointer to the part of the insn which requires
13998 fixing; VALUE is the constant that must be loaded, which is of type
13999 MODE. */
14000 static void
14001 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
14002 enum machine_mode mode, rtx value)
14003 {
14004 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
14005
14006 fix->insn = insn;
14007 fix->address = address;
14008 fix->loc = loc;
14009 fix->mode = mode;
14010 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
14011 fix->value = value;
14012 fix->forwards = get_attr_pool_range (insn);
14013 fix->backwards = get_attr_neg_pool_range (insn);
14014 fix->minipool = NULL;
14015
14016 /* If an insn doesn't have a range defined for it, then it isn't
14017 expecting to be reworked by this code. Better to stop now than
14018 to generate duff assembly code. */
14019 gcc_assert (fix->forwards || fix->backwards);
14020
14021 /* If an entry requires 8-byte alignment then assume all constant pools
14022 require 4 bytes of padding. Trying to do this later on a per-pool
14023 basis is awkward because existing pool entries have to be modified. */
14024 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
14025 minipool_pad = 4;
14026
14027 if (dump_file)
14028 {
14029 fprintf (dump_file,
14030 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
14031 GET_MODE_NAME (mode),
14032 INSN_UID (insn), (unsigned long) address,
14033 -1 * (long)fix->backwards, (long)fix->forwards);
14034 arm_print_value (dump_file, fix->value);
14035 fprintf (dump_file, "\n");
14036 }
14037
14038 /* Add it to the chain of fixes. */
14039 fix->next = NULL;
14040
14041 if (minipool_fix_head != NULL)
14042 minipool_fix_tail->next = fix;
14043 else
14044 minipool_fix_head = fix;
14045
14046 minipool_fix_tail = fix;
14047 }
14048
14049 /* Return the cost of synthesizing a 64-bit constant VAL inline.
14050 Returns the number of insns needed, or 99 if we don't know how to
14051 do it. */
14052 int
14053 arm_const_double_inline_cost (rtx val)
14054 {
14055 rtx lowpart, highpart;
14056 enum machine_mode mode;
14057
14058 mode = GET_MODE (val);
14059
14060 if (mode == VOIDmode)
14061 mode = DImode;
14062
14063 gcc_assert (GET_MODE_SIZE (mode) == 8);
14064
14065 lowpart = gen_lowpart (SImode, val);
14066 highpart = gen_highpart_mode (SImode, mode, val);
14067
14068 gcc_assert (CONST_INT_P (lowpart));
14069 gcc_assert (CONST_INT_P (highpart));
14070
14071 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
14072 NULL_RTX, NULL_RTX, 0, 0)
14073 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
14074 NULL_RTX, NULL_RTX, 0, 0));
14075 }
14076
14077 /* Return true if it is worthwhile to split a 64-bit constant into two
14078 32-bit operations. This is the case if optimizing for size, or
14079 if we have load delay slots, or if one 32-bit part can be done with
14080 a single data operation. */
14081 bool
14082 arm_const_double_by_parts (rtx val)
14083 {
14084 enum machine_mode mode = GET_MODE (val);
14085 rtx part;
14086
14087 if (optimize_size || arm_ld_sched)
14088 return true;
14089
14090 if (mode == VOIDmode)
14091 mode = DImode;
14092
14093 part = gen_highpart_mode (SImode, mode, val);
14094
14095 gcc_assert (CONST_INT_P (part));
14096
14097 if (const_ok_for_arm (INTVAL (part))
14098 || const_ok_for_arm (~INTVAL (part)))
14099 return true;
14100
14101 part = gen_lowpart (SImode, val);
14102
14103 gcc_assert (CONST_INT_P (part));
14104
14105 if (const_ok_for_arm (INTVAL (part))
14106 || const_ok_for_arm (~INTVAL (part)))
14107 return true;
14108
14109 return false;
14110 }
14111
14112 /* Return true if it is possible to inline both the high and low parts
14113 of a 64-bit constant into 32-bit data processing instructions. */
14114 bool
14115 arm_const_double_by_immediates (rtx val)
14116 {
14117 enum machine_mode mode = GET_MODE (val);
14118 rtx part;
14119
14120 if (mode == VOIDmode)
14121 mode = DImode;
14122
14123 part = gen_highpart_mode (SImode, mode, val);
14124
14125 gcc_assert (CONST_INT_P (part));
14126
14127 if (!const_ok_for_arm (INTVAL (part)))
14128 return false;
14129
14130 part = gen_lowpart (SImode, val);
14131
14132 gcc_assert (CONST_INT_P (part));
14133
14134 if (!const_ok_for_arm (INTVAL (part)))
14135 return false;
14136
14137 return true;
14138 }
14139
14140 /* Scan INSN and note any of its operands that need fixing.
14141 If DO_PUSHES is false we do not actually push any of the fixups
14142 needed. */
14143 static void
14144 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
14145 {
14146 int opno;
14147
14148 extract_insn (insn);
14149
14150 if (!constrain_operands (1))
14151 fatal_insn_not_found (insn);
14152
14153 if (recog_data.n_alternatives == 0)
14154 return;
14155
14156 /* Fill in recog_op_alt with information about the constraints of
14157 this insn. */
14158 preprocess_constraints ();
14159
14160 for (opno = 0; opno < recog_data.n_operands; opno++)
14161 {
14162 /* Things we need to fix can only occur in inputs. */
14163 if (recog_data.operand_type[opno] != OP_IN)
14164 continue;
14165
14166 /* If this alternative is a memory reference, then any mention
14167 of constants in this alternative is really to fool reload
14168 into allowing us to accept one there. We need to fix them up
14169 now so that we output the right code. */
14170 if (recog_op_alt[opno][which_alternative].memory_ok)
14171 {
14172 rtx op = recog_data.operand[opno];
14173
14174 if (CONSTANT_P (op))
14175 {
14176 if (do_pushes)
14177 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
14178 recog_data.operand_mode[opno], op);
14179 }
14180 else if (MEM_P (op)
14181 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
14182 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
14183 {
14184 if (do_pushes)
14185 {
14186 rtx cop = avoid_constant_pool_reference (op);
14187
14188 /* Casting the address of something to a mode narrower
14189 than a word can cause avoid_constant_pool_reference()
14190 to return the pool reference itself. That's no good to
14191 us here. Lets just hope that we can use the
14192 constant pool value directly. */
14193 if (op == cop)
14194 cop = get_pool_constant (XEXP (op, 0));
14195
14196 push_minipool_fix (insn, address,
14197 recog_data.operand_loc[opno],
14198 recog_data.operand_mode[opno], cop);
14199 }
14200
14201 }
14202 }
14203 }
14204
14205 return;
14206 }
14207
14208 /* Rewrite move insn into subtract of 0 if the condition codes will
14209 be useful in next conditional jump insn. */
14210
14211 static void
14212 thumb1_reorg (void)
14213 {
14214 basic_block bb;
14215
14216 FOR_EACH_BB (bb)
14217 {
14218 rtx set, dest, src;
14219 rtx pat, op0;
14220 rtx prev, insn = BB_END (bb);
14221
14222 while (insn != BB_HEAD (bb) && DEBUG_INSN_P (insn))
14223 insn = PREV_INSN (insn);
14224
14225 /* Find the last cbranchsi4_insn in basic block BB. */
14226 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
14227 continue;
14228
14229 /* Find the first non-note insn before INSN in basic block BB. */
14230 gcc_assert (insn != BB_HEAD (bb));
14231 prev = PREV_INSN (insn);
14232 while (prev != BB_HEAD (bb) && (NOTE_P (prev) || DEBUG_INSN_P (prev)))
14233 prev = PREV_INSN (prev);
14234
14235 set = single_set (prev);
14236 if (!set)
14237 continue;
14238
14239 dest = SET_DEST (set);
14240 src = SET_SRC (set);
14241 if (!low_register_operand (dest, SImode)
14242 || !low_register_operand (src, SImode))
14243 continue;
14244
14245 pat = PATTERN (insn);
14246 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
14247 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
14248 in INSN. Don't need to check dest since cprop_hardreg pass propagates
14249 src into INSN. */
14250 if (REGNO (op0) == REGNO (src))
14251 {
14252 dest = copy_rtx (dest);
14253 src = copy_rtx (src);
14254 src = gen_rtx_MINUS (SImode, src, const0_rtx);
14255 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
14256 INSN_CODE (prev) = -1;
14257 /* Set test register in INSN to dest. */
14258 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
14259 INSN_CODE (insn) = -1;
14260 }
14261 }
14262 }
14263
14264 /* Convert instructions to their cc-clobbering variant if possible, since
14265 that allows us to use smaller encodings. */
14266
14267 static void
14268 thumb2_reorg (void)
14269 {
14270 basic_block bb;
14271 regset_head live;
14272
14273 INIT_REG_SET (&live);
14274
14275 /* We are freeing block_for_insn in the toplev to keep compatibility
14276 with old MDEP_REORGS that are not CFG based. Recompute it now. */
14277 compute_bb_for_insn ();
14278 df_analyze ();
14279
14280 FOR_EACH_BB (bb)
14281 {
14282 rtx insn;
14283
14284 COPY_REG_SET (&live, DF_LR_OUT (bb));
14285 df_simulate_initialize_backwards (bb, &live);
14286 FOR_BB_INSNS_REVERSE (bb, insn)
14287 {
14288 if (NONJUMP_INSN_P (insn)
14289 && !REGNO_REG_SET_P (&live, CC_REGNUM)
14290 && GET_CODE (PATTERN (insn)) == SET)
14291 {
14292 enum {SKIP, CONV, SWAP_CONV} action = SKIP;
14293 rtx pat = PATTERN (insn);
14294 rtx dst = XEXP (pat, 0);
14295 rtx src = XEXP (pat, 1);
14296 rtx op0 = NULL_RTX, op1 = NULL_RTX;
14297
14298 if (!OBJECT_P (src))
14299 op0 = XEXP (src, 0);
14300
14301 if (BINARY_P (src))
14302 op1 = XEXP (src, 1);
14303
14304 if (low_register_operand (dst, SImode))
14305 {
14306 switch (GET_CODE (src))
14307 {
14308 case PLUS:
14309 /* Adding two registers and storing the result
14310 in the first source is already a 16-bit
14311 operation. */
14312 if (rtx_equal_p (dst, op0)
14313 && register_operand (op1, SImode))
14314 break;
14315
14316 if (low_register_operand (op0, SImode))
14317 {
14318 /* ADDS <Rd>,<Rn>,<Rm> */
14319 if (low_register_operand (op1, SImode))
14320 action = CONV;
14321 /* ADDS <Rdn>,#<imm8> */
14322 /* SUBS <Rdn>,#<imm8> */
14323 else if (rtx_equal_p (dst, op0)
14324 && CONST_INT_P (op1)
14325 && IN_RANGE (INTVAL (op1), -255, 255))
14326 action = CONV;
14327 /* ADDS <Rd>,<Rn>,#<imm3> */
14328 /* SUBS <Rd>,<Rn>,#<imm3> */
14329 else if (CONST_INT_P (op1)
14330 && IN_RANGE (INTVAL (op1), -7, 7))
14331 action = CONV;
14332 }
14333 break;
14334
14335 case MINUS:
14336 /* RSBS <Rd>,<Rn>,#0
14337 Not handled here: see NEG below. */
14338 /* SUBS <Rd>,<Rn>,#<imm3>
14339 SUBS <Rdn>,#<imm8>
14340 Not handled here: see PLUS above. */
14341 /* SUBS <Rd>,<Rn>,<Rm> */
14342 if (low_register_operand (op0, SImode)
14343 && low_register_operand (op1, SImode))
14344 action = CONV;
14345 break;
14346
14347 case MULT:
14348 /* MULS <Rdm>,<Rn>,<Rdm>
14349 As an exception to the rule, this is only used
14350 when optimizing for size since MULS is slow on all
14351 known implementations. We do not even want to use
14352 MULS in cold code, if optimizing for speed, so we
14353 test the global flag here. */
14354 if (!optimize_size)
14355 break;
14356 /* else fall through. */
14357 case AND:
14358 case IOR:
14359 case XOR:
14360 /* ANDS <Rdn>,<Rm> */
14361 if (rtx_equal_p (dst, op0)
14362 && low_register_operand (op1, SImode))
14363 action = CONV;
14364 else if (rtx_equal_p (dst, op1)
14365 && low_register_operand (op0, SImode))
14366 action = SWAP_CONV;
14367 break;
14368
14369 case ASHIFTRT:
14370 case ASHIFT:
14371 case LSHIFTRT:
14372 /* ASRS <Rdn>,<Rm> */
14373 /* LSRS <Rdn>,<Rm> */
14374 /* LSLS <Rdn>,<Rm> */
14375 if (rtx_equal_p (dst, op0)
14376 && low_register_operand (op1, SImode))
14377 action = CONV;
14378 /* ASRS <Rd>,<Rm>,#<imm5> */
14379 /* LSRS <Rd>,<Rm>,#<imm5> */
14380 /* LSLS <Rd>,<Rm>,#<imm5> */
14381 else if (low_register_operand (op0, SImode)
14382 && CONST_INT_P (op1)
14383 && IN_RANGE (INTVAL (op1), 0, 31))
14384 action = CONV;
14385 break;
14386
14387 case ROTATERT:
14388 /* RORS <Rdn>,<Rm> */
14389 if (rtx_equal_p (dst, op0)
14390 && low_register_operand (op1, SImode))
14391 action = CONV;
14392 break;
14393
14394 case NOT:
14395 case NEG:
14396 /* MVNS <Rd>,<Rm> */
14397 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
14398 if (low_register_operand (op0, SImode))
14399 action = CONV;
14400 break;
14401
14402 case CONST_INT:
14403 /* MOVS <Rd>,#<imm8> */
14404 if (CONST_INT_P (src)
14405 && IN_RANGE (INTVAL (src), 0, 255))
14406 action = CONV;
14407 break;
14408
14409 case REG:
14410 /* MOVS and MOV<c> with registers have different
14411 encodings, so are not relevant here. */
14412 break;
14413
14414 default:
14415 break;
14416 }
14417 }
14418
14419 if (action != SKIP)
14420 {
14421 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
14422 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
14423 rtvec vec;
14424
14425 if (action == SWAP_CONV)
14426 {
14427 src = copy_rtx (src);
14428 XEXP (src, 0) = op1;
14429 XEXP (src, 1) = op0;
14430 pat = gen_rtx_SET (VOIDmode, dst, src);
14431 vec = gen_rtvec (2, pat, clobber);
14432 }
14433 else /* action == CONV */
14434 vec = gen_rtvec (2, pat, clobber);
14435
14436 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
14437 INSN_CODE (insn) = -1;
14438 }
14439 }
14440
14441 if (NONDEBUG_INSN_P (insn))
14442 df_simulate_one_insn_backwards (bb, insn, &live);
14443 }
14444 }
14445
14446 CLEAR_REG_SET (&live);
14447 }
14448
14449 /* Gcc puts the pool in the wrong place for ARM, since we can only
14450 load addresses a limited distance around the pc. We do some
14451 special munging to move the constant pool values to the correct
14452 point in the code. */
14453 static void
14454 arm_reorg (void)
14455 {
14456 rtx insn;
14457 HOST_WIDE_INT address = 0;
14458 Mfix * fix;
14459
14460 if (TARGET_THUMB1)
14461 thumb1_reorg ();
14462 else if (TARGET_THUMB2)
14463 thumb2_reorg ();
14464
14465 /* Ensure all insns that must be split have been split at this point.
14466 Otherwise, the pool placement code below may compute incorrect
14467 insn lengths. Note that when optimizing, all insns have already
14468 been split at this point. */
14469 if (!optimize)
14470 split_all_insns_noflow ();
14471
14472 minipool_fix_head = minipool_fix_tail = NULL;
14473
14474 /* The first insn must always be a note, or the code below won't
14475 scan it properly. */
14476 insn = get_insns ();
14477 gcc_assert (NOTE_P (insn));
14478 minipool_pad = 0;
14479
14480 /* Scan all the insns and record the operands that will need fixing. */
14481 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
14482 {
14483 if (BARRIER_P (insn))
14484 push_minipool_barrier (insn, address);
14485 else if (INSN_P (insn))
14486 {
14487 rtx table;
14488
14489 note_invalid_constants (insn, address, true);
14490 address += get_attr_length (insn);
14491
14492 /* If the insn is a vector jump, add the size of the table
14493 and skip the table. */
14494 if ((table = is_jump_table (insn)) != NULL)
14495 {
14496 address += get_jump_table_size (table);
14497 insn = table;
14498 }
14499 }
14500 else if (LABEL_P (insn))
14501 /* Add the worst-case padding due to alignment. We don't add
14502 the _current_ padding because the minipool insertions
14503 themselves might change it. */
14504 address += get_label_padding (insn);
14505 }
14506
14507 fix = minipool_fix_head;
14508
14509 /* Now scan the fixups and perform the required changes. */
14510 while (fix)
14511 {
14512 Mfix * ftmp;
14513 Mfix * fdel;
14514 Mfix * last_added_fix;
14515 Mfix * last_barrier = NULL;
14516 Mfix * this_fix;
14517
14518 /* Skip any further barriers before the next fix. */
14519 while (fix && BARRIER_P (fix->insn))
14520 fix = fix->next;
14521
14522 /* No more fixes. */
14523 if (fix == NULL)
14524 break;
14525
14526 last_added_fix = NULL;
14527
14528 for (ftmp = fix; ftmp; ftmp = ftmp->next)
14529 {
14530 if (BARRIER_P (ftmp->insn))
14531 {
14532 if (ftmp->address >= minipool_vector_head->max_address)
14533 break;
14534
14535 last_barrier = ftmp;
14536 }
14537 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
14538 break;
14539
14540 last_added_fix = ftmp; /* Keep track of the last fix added. */
14541 }
14542
14543 /* If we found a barrier, drop back to that; any fixes that we
14544 could have reached but come after the barrier will now go in
14545 the next mini-pool. */
14546 if (last_barrier != NULL)
14547 {
14548 /* Reduce the refcount for those fixes that won't go into this
14549 pool after all. */
14550 for (fdel = last_barrier->next;
14551 fdel && fdel != ftmp;
14552 fdel = fdel->next)
14553 {
14554 fdel->minipool->refcount--;
14555 fdel->minipool = NULL;
14556 }
14557
14558 ftmp = last_barrier;
14559 }
14560 else
14561 {
14562 /* ftmp is first fix that we can't fit into this pool and
14563 there no natural barriers that we could use. Insert a
14564 new barrier in the code somewhere between the previous
14565 fix and this one, and arrange to jump around it. */
14566 HOST_WIDE_INT max_address;
14567
14568 /* The last item on the list of fixes must be a barrier, so
14569 we can never run off the end of the list of fixes without
14570 last_barrier being set. */
14571 gcc_assert (ftmp);
14572
14573 max_address = minipool_vector_head->max_address;
14574 /* Check that there isn't another fix that is in range that
14575 we couldn't fit into this pool because the pool was
14576 already too large: we need to put the pool before such an
14577 instruction. The pool itself may come just after the
14578 fix because create_fix_barrier also allows space for a
14579 jump instruction. */
14580 if (ftmp->address < max_address)
14581 max_address = ftmp->address + 1;
14582
14583 last_barrier = create_fix_barrier (last_added_fix, max_address);
14584 }
14585
14586 assign_minipool_offsets (last_barrier);
14587
14588 while (ftmp)
14589 {
14590 if (!BARRIER_P (ftmp->insn)
14591 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
14592 == NULL))
14593 break;
14594
14595 ftmp = ftmp->next;
14596 }
14597
14598 /* Scan over the fixes we have identified for this pool, fixing them
14599 up and adding the constants to the pool itself. */
14600 for (this_fix = fix; this_fix && ftmp != this_fix;
14601 this_fix = this_fix->next)
14602 if (!BARRIER_P (this_fix->insn))
14603 {
14604 rtx addr
14605 = plus_constant (Pmode,
14606 gen_rtx_LABEL_REF (VOIDmode,
14607 minipool_vector_label),
14608 this_fix->minipool->offset);
14609 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
14610 }
14611
14612 dump_minipool (last_barrier->insn);
14613 fix = ftmp;
14614 }
14615
14616 /* From now on we must synthesize any constants that we can't handle
14617 directly. This can happen if the RTL gets split during final
14618 instruction generation. */
14619 after_arm_reorg = 1;
14620
14621 /* Free the minipool memory. */
14622 obstack_free (&minipool_obstack, minipool_startobj);
14623 }
14624 \f
14625 /* Routines to output assembly language. */
14626
14627 /* If the rtx is the correct value then return the string of the number.
14628 In this way we can ensure that valid double constants are generated even
14629 when cross compiling. */
14630 const char *
14631 fp_immediate_constant (rtx x)
14632 {
14633 REAL_VALUE_TYPE r;
14634
14635 if (!fp_consts_inited)
14636 init_fp_table ();
14637
14638 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14639
14640 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
14641 return "0";
14642 }
14643
14644 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
14645 static const char *
14646 fp_const_from_val (REAL_VALUE_TYPE *r)
14647 {
14648 if (!fp_consts_inited)
14649 init_fp_table ();
14650
14651 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
14652 return "0";
14653 }
14654
14655 /* OPERANDS[0] is the entire list of insns that constitute pop,
14656 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
14657 is in the list, UPDATE is true iff the list contains explicit
14658 update of base register. */
14659 void
14660 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
14661 bool update)
14662 {
14663 int i;
14664 char pattern[100];
14665 int offset;
14666 const char *conditional;
14667 int num_saves = XVECLEN (operands[0], 0);
14668 unsigned int regno;
14669 unsigned int regno_base = REGNO (operands[1]);
14670
14671 offset = 0;
14672 offset += update ? 1 : 0;
14673 offset += return_pc ? 1 : 0;
14674
14675 /* Is the base register in the list? */
14676 for (i = offset; i < num_saves; i++)
14677 {
14678 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
14679 /* If SP is in the list, then the base register must be SP. */
14680 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
14681 /* If base register is in the list, there must be no explicit update. */
14682 if (regno == regno_base)
14683 gcc_assert (!update);
14684 }
14685
14686 conditional = reverse ? "%?%D0" : "%?%d0";
14687 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
14688 {
14689 /* Output pop (not stmfd) because it has a shorter encoding. */
14690 gcc_assert (update);
14691 sprintf (pattern, "pop%s\t{", conditional);
14692 }
14693 else
14694 {
14695 /* Output ldmfd when the base register is SP, otherwise output ldmia.
14696 It's just a convention, their semantics are identical. */
14697 if (regno_base == SP_REGNUM)
14698 sprintf (pattern, "ldm%sfd\t", conditional);
14699 else if (TARGET_UNIFIED_ASM)
14700 sprintf (pattern, "ldmia%s\t", conditional);
14701 else
14702 sprintf (pattern, "ldm%sia\t", conditional);
14703
14704 strcat (pattern, reg_names[regno_base]);
14705 if (update)
14706 strcat (pattern, "!, {");
14707 else
14708 strcat (pattern, ", {");
14709 }
14710
14711 /* Output the first destination register. */
14712 strcat (pattern,
14713 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
14714
14715 /* Output the rest of the destination registers. */
14716 for (i = offset + 1; i < num_saves; i++)
14717 {
14718 strcat (pattern, ", ");
14719 strcat (pattern,
14720 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
14721 }
14722
14723 strcat (pattern, "}");
14724
14725 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
14726 strcat (pattern, "^");
14727
14728 output_asm_insn (pattern, &cond);
14729 }
14730
14731
14732 /* Output the assembly for a store multiple. */
14733
14734 const char *
14735 vfp_output_fstmd (rtx * operands)
14736 {
14737 char pattern[100];
14738 int p;
14739 int base;
14740 int i;
14741
14742 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
14743 p = strlen (pattern);
14744
14745 gcc_assert (REG_P (operands[1]));
14746
14747 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
14748 for (i = 1; i < XVECLEN (operands[2], 0); i++)
14749 {
14750 p += sprintf (&pattern[p], ", d%d", base + i);
14751 }
14752 strcpy (&pattern[p], "}");
14753
14754 output_asm_insn (pattern, operands);
14755 return "";
14756 }
14757
14758
14759 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
14760 number of bytes pushed. */
14761
14762 static int
14763 vfp_emit_fstmd (int base_reg, int count)
14764 {
14765 rtx par;
14766 rtx dwarf;
14767 rtx tmp, reg;
14768 int i;
14769
14770 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
14771 register pairs are stored by a store multiple insn. We avoid this
14772 by pushing an extra pair. */
14773 if (count == 2 && !arm_arch6)
14774 {
14775 if (base_reg == LAST_VFP_REGNUM - 3)
14776 base_reg -= 2;
14777 count++;
14778 }
14779
14780 /* FSTMD may not store more than 16 doubleword registers at once. Split
14781 larger stores into multiple parts (up to a maximum of two, in
14782 practice). */
14783 if (count > 16)
14784 {
14785 int saved;
14786 /* NOTE: base_reg is an internal register number, so each D register
14787 counts as 2. */
14788 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
14789 saved += vfp_emit_fstmd (base_reg, 16);
14790 return saved;
14791 }
14792
14793 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
14794 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
14795
14796 reg = gen_rtx_REG (DFmode, base_reg);
14797 base_reg += 2;
14798
14799 XVECEXP (par, 0, 0)
14800 = gen_rtx_SET (VOIDmode,
14801 gen_frame_mem
14802 (BLKmode,
14803 gen_rtx_PRE_MODIFY (Pmode,
14804 stack_pointer_rtx,
14805 plus_constant
14806 (Pmode, stack_pointer_rtx,
14807 - (count * 8)))
14808 ),
14809 gen_rtx_UNSPEC (BLKmode,
14810 gen_rtvec (1, reg),
14811 UNSPEC_PUSH_MULT));
14812
14813 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14814 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
14815 RTX_FRAME_RELATED_P (tmp) = 1;
14816 XVECEXP (dwarf, 0, 0) = tmp;
14817
14818 tmp = gen_rtx_SET (VOIDmode,
14819 gen_frame_mem (DFmode, stack_pointer_rtx),
14820 reg);
14821 RTX_FRAME_RELATED_P (tmp) = 1;
14822 XVECEXP (dwarf, 0, 1) = tmp;
14823
14824 for (i = 1; i < count; i++)
14825 {
14826 reg = gen_rtx_REG (DFmode, base_reg);
14827 base_reg += 2;
14828 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
14829
14830 tmp = gen_rtx_SET (VOIDmode,
14831 gen_frame_mem (DFmode,
14832 plus_constant (Pmode,
14833 stack_pointer_rtx,
14834 i * 8)),
14835 reg);
14836 RTX_FRAME_RELATED_P (tmp) = 1;
14837 XVECEXP (dwarf, 0, i + 1) = tmp;
14838 }
14839
14840 par = emit_insn (par);
14841 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14842 RTX_FRAME_RELATED_P (par) = 1;
14843
14844 return count * 8;
14845 }
14846
14847 /* Emit a call instruction with pattern PAT. ADDR is the address of
14848 the call target. */
14849
14850 void
14851 arm_emit_call_insn (rtx pat, rtx addr)
14852 {
14853 rtx insn;
14854
14855 insn = emit_call_insn (pat);
14856
14857 /* The PIC register is live on entry to VxWorks PIC PLT entries.
14858 If the call might use such an entry, add a use of the PIC register
14859 to the instruction's CALL_INSN_FUNCTION_USAGE. */
14860 if (TARGET_VXWORKS_RTP
14861 && flag_pic
14862 && GET_CODE (addr) == SYMBOL_REF
14863 && (SYMBOL_REF_DECL (addr)
14864 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
14865 : !SYMBOL_REF_LOCAL_P (addr)))
14866 {
14867 require_pic_register ();
14868 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
14869 }
14870 }
14871
14872 /* Output a 'call' insn. */
14873 const char *
14874 output_call (rtx *operands)
14875 {
14876 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
14877
14878 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
14879 if (REGNO (operands[0]) == LR_REGNUM)
14880 {
14881 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
14882 output_asm_insn ("mov%?\t%0, %|lr", operands);
14883 }
14884
14885 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14886
14887 if (TARGET_INTERWORK || arm_arch4t)
14888 output_asm_insn ("bx%?\t%0", operands);
14889 else
14890 output_asm_insn ("mov%?\t%|pc, %0", operands);
14891
14892 return "";
14893 }
14894
14895 /* Output a 'call' insn that is a reference in memory. This is
14896 disabled for ARMv5 and we prefer a blx instead because otherwise
14897 there's a significant performance overhead. */
14898 const char *
14899 output_call_mem (rtx *operands)
14900 {
14901 gcc_assert (!arm_arch5);
14902 if (TARGET_INTERWORK)
14903 {
14904 output_asm_insn ("ldr%?\t%|ip, %0", operands);
14905 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14906 output_asm_insn ("bx%?\t%|ip", operands);
14907 }
14908 else if (regno_use_in (LR_REGNUM, operands[0]))
14909 {
14910 /* LR is used in the memory address. We load the address in the
14911 first instruction. It's safe to use IP as the target of the
14912 load since the call will kill it anyway. */
14913 output_asm_insn ("ldr%?\t%|ip, %0", operands);
14914 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14915 if (arm_arch4t)
14916 output_asm_insn ("bx%?\t%|ip", operands);
14917 else
14918 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
14919 }
14920 else
14921 {
14922 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14923 output_asm_insn ("ldr%?\t%|pc, %0", operands);
14924 }
14925
14926 return "";
14927 }
14928
14929
14930 /* Output a move from arm registers to arm registers of a long double
14931 OPERANDS[0] is the destination.
14932 OPERANDS[1] is the source. */
14933 const char *
14934 output_mov_long_double_arm_from_arm (rtx *operands)
14935 {
14936 /* We have to be careful here because the two might overlap. */
14937 int dest_start = REGNO (operands[0]);
14938 int src_start = REGNO (operands[1]);
14939 rtx ops[2];
14940 int i;
14941
14942 if (dest_start < src_start)
14943 {
14944 for (i = 0; i < 3; i++)
14945 {
14946 ops[0] = gen_rtx_REG (SImode, dest_start + i);
14947 ops[1] = gen_rtx_REG (SImode, src_start + i);
14948 output_asm_insn ("mov%?\t%0, %1", ops);
14949 }
14950 }
14951 else
14952 {
14953 for (i = 2; i >= 0; i--)
14954 {
14955 ops[0] = gen_rtx_REG (SImode, dest_start + i);
14956 ops[1] = gen_rtx_REG (SImode, src_start + i);
14957 output_asm_insn ("mov%?\t%0, %1", ops);
14958 }
14959 }
14960
14961 return "";
14962 }
14963
14964 void
14965 arm_emit_movpair (rtx dest, rtx src)
14966 {
14967 /* If the src is an immediate, simplify it. */
14968 if (CONST_INT_P (src))
14969 {
14970 HOST_WIDE_INT val = INTVAL (src);
14971 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
14972 if ((val >> 16) & 0x0000ffff)
14973 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
14974 GEN_INT (16)),
14975 GEN_INT ((val >> 16) & 0x0000ffff));
14976 return;
14977 }
14978 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
14979 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
14980 }
14981
14982 /* Output a move between double words. It must be REG<-MEM
14983 or MEM<-REG. */
14984 const char *
14985 output_move_double (rtx *operands, bool emit, int *count)
14986 {
14987 enum rtx_code code0 = GET_CODE (operands[0]);
14988 enum rtx_code code1 = GET_CODE (operands[1]);
14989 rtx otherops[3];
14990 if (count)
14991 *count = 1;
14992
14993 /* The only case when this might happen is when
14994 you are looking at the length of a DImode instruction
14995 that has an invalid constant in it. */
14996 if (code0 == REG && code1 != MEM)
14997 {
14998 gcc_assert (!emit);
14999 *count = 2;
15000 return "";
15001 }
15002
15003 if (code0 == REG)
15004 {
15005 unsigned int reg0 = REGNO (operands[0]);
15006
15007 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
15008
15009 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
15010
15011 switch (GET_CODE (XEXP (operands[1], 0)))
15012 {
15013 case REG:
15014
15015 if (emit)
15016 {
15017 if (TARGET_LDRD
15018 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
15019 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
15020 else
15021 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
15022 }
15023 break;
15024
15025 case PRE_INC:
15026 gcc_assert (TARGET_LDRD);
15027 if (emit)
15028 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
15029 break;
15030
15031 case PRE_DEC:
15032 if (emit)
15033 {
15034 if (TARGET_LDRD)
15035 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
15036 else
15037 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
15038 }
15039 break;
15040
15041 case POST_INC:
15042 if (emit)
15043 {
15044 if (TARGET_LDRD)
15045 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
15046 else
15047 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
15048 }
15049 break;
15050
15051 case POST_DEC:
15052 gcc_assert (TARGET_LDRD);
15053 if (emit)
15054 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
15055 break;
15056
15057 case PRE_MODIFY:
15058 case POST_MODIFY:
15059 /* Autoicrement addressing modes should never have overlapping
15060 base and destination registers, and overlapping index registers
15061 are already prohibited, so this doesn't need to worry about
15062 fix_cm3_ldrd. */
15063 otherops[0] = operands[0];
15064 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
15065 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
15066
15067 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
15068 {
15069 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
15070 {
15071 /* Registers overlap so split out the increment. */
15072 if (emit)
15073 {
15074 output_asm_insn ("add%?\t%1, %1, %2", otherops);
15075 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
15076 }
15077 if (count)
15078 *count = 2;
15079 }
15080 else
15081 {
15082 /* Use a single insn if we can.
15083 FIXME: IWMMXT allows offsets larger than ldrd can
15084 handle, fix these up with a pair of ldr. */
15085 if (TARGET_THUMB2
15086 || !CONST_INT_P (otherops[2])
15087 || (INTVAL (otherops[2]) > -256
15088 && INTVAL (otherops[2]) < 256))
15089 {
15090 if (emit)
15091 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
15092 }
15093 else
15094 {
15095 if (emit)
15096 {
15097 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
15098 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
15099 }
15100 if (count)
15101 *count = 2;
15102
15103 }
15104 }
15105 }
15106 else
15107 {
15108 /* Use a single insn if we can.
15109 FIXME: IWMMXT allows offsets larger than ldrd can handle,
15110 fix these up with a pair of ldr. */
15111 if (TARGET_THUMB2
15112 || !CONST_INT_P (otherops[2])
15113 || (INTVAL (otherops[2]) > -256
15114 && INTVAL (otherops[2]) < 256))
15115 {
15116 if (emit)
15117 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
15118 }
15119 else
15120 {
15121 if (emit)
15122 {
15123 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
15124 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
15125 }
15126 if (count)
15127 *count = 2;
15128 }
15129 }
15130 break;
15131
15132 case LABEL_REF:
15133 case CONST:
15134 /* We might be able to use ldrd %0, %1 here. However the range is
15135 different to ldr/adr, and it is broken on some ARMv7-M
15136 implementations. */
15137 /* Use the second register of the pair to avoid problematic
15138 overlap. */
15139 otherops[1] = operands[1];
15140 if (emit)
15141 output_asm_insn ("adr%?\t%0, %1", otherops);
15142 operands[1] = otherops[0];
15143 if (emit)
15144 {
15145 if (TARGET_LDRD)
15146 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
15147 else
15148 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
15149 }
15150
15151 if (count)
15152 *count = 2;
15153 break;
15154
15155 /* ??? This needs checking for thumb2. */
15156 default:
15157 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
15158 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
15159 {
15160 otherops[0] = operands[0];
15161 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
15162 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
15163
15164 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
15165 {
15166 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
15167 {
15168 switch ((int) INTVAL (otherops[2]))
15169 {
15170 case -8:
15171 if (emit)
15172 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
15173 return "";
15174 case -4:
15175 if (TARGET_THUMB2)
15176 break;
15177 if (emit)
15178 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
15179 return "";
15180 case 4:
15181 if (TARGET_THUMB2)
15182 break;
15183 if (emit)
15184 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
15185 return "";
15186 }
15187 }
15188 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
15189 operands[1] = otherops[0];
15190 if (TARGET_LDRD
15191 && (REG_P (otherops[2])
15192 || TARGET_THUMB2
15193 || (CONST_INT_P (otherops[2])
15194 && INTVAL (otherops[2]) > -256
15195 && INTVAL (otherops[2]) < 256)))
15196 {
15197 if (reg_overlap_mentioned_p (operands[0],
15198 otherops[2]))
15199 {
15200 rtx tmp;
15201 /* Swap base and index registers over to
15202 avoid a conflict. */
15203 tmp = otherops[1];
15204 otherops[1] = otherops[2];
15205 otherops[2] = tmp;
15206 }
15207 /* If both registers conflict, it will usually
15208 have been fixed by a splitter. */
15209 if (reg_overlap_mentioned_p (operands[0], otherops[2])
15210 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
15211 {
15212 if (emit)
15213 {
15214 output_asm_insn ("add%?\t%0, %1, %2", otherops);
15215 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
15216 }
15217 if (count)
15218 *count = 2;
15219 }
15220 else
15221 {
15222 otherops[0] = operands[0];
15223 if (emit)
15224 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
15225 }
15226 return "";
15227 }
15228
15229 if (CONST_INT_P (otherops[2]))
15230 {
15231 if (emit)
15232 {
15233 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
15234 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
15235 else
15236 output_asm_insn ("add%?\t%0, %1, %2", otherops);
15237 }
15238 }
15239 else
15240 {
15241 if (emit)
15242 output_asm_insn ("add%?\t%0, %1, %2", otherops);
15243 }
15244 }
15245 else
15246 {
15247 if (emit)
15248 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
15249 }
15250
15251 if (count)
15252 *count = 2;
15253
15254 if (TARGET_LDRD)
15255 return "ldr%(d%)\t%0, [%1]";
15256
15257 return "ldm%(ia%)\t%1, %M0";
15258 }
15259 else
15260 {
15261 otherops[1] = adjust_address (operands[1], SImode, 4);
15262 /* Take care of overlapping base/data reg. */
15263 if (reg_mentioned_p (operands[0], operands[1]))
15264 {
15265 if (emit)
15266 {
15267 output_asm_insn ("ldr%?\t%0, %1", otherops);
15268 output_asm_insn ("ldr%?\t%0, %1", operands);
15269 }
15270 if (count)
15271 *count = 2;
15272
15273 }
15274 else
15275 {
15276 if (emit)
15277 {
15278 output_asm_insn ("ldr%?\t%0, %1", operands);
15279 output_asm_insn ("ldr%?\t%0, %1", otherops);
15280 }
15281 if (count)
15282 *count = 2;
15283 }
15284 }
15285 }
15286 }
15287 else
15288 {
15289 /* Constraints should ensure this. */
15290 gcc_assert (code0 == MEM && code1 == REG);
15291 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
15292 || (TARGET_ARM && TARGET_LDRD));
15293
15294 switch (GET_CODE (XEXP (operands[0], 0)))
15295 {
15296 case REG:
15297 if (emit)
15298 {
15299 if (TARGET_LDRD)
15300 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
15301 else
15302 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
15303 }
15304 break;
15305
15306 case PRE_INC:
15307 gcc_assert (TARGET_LDRD);
15308 if (emit)
15309 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
15310 break;
15311
15312 case PRE_DEC:
15313 if (emit)
15314 {
15315 if (TARGET_LDRD)
15316 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
15317 else
15318 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
15319 }
15320 break;
15321
15322 case POST_INC:
15323 if (emit)
15324 {
15325 if (TARGET_LDRD)
15326 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
15327 else
15328 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
15329 }
15330 break;
15331
15332 case POST_DEC:
15333 gcc_assert (TARGET_LDRD);
15334 if (emit)
15335 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
15336 break;
15337
15338 case PRE_MODIFY:
15339 case POST_MODIFY:
15340 otherops[0] = operands[1];
15341 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
15342 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
15343
15344 /* IWMMXT allows offsets larger than ldrd can handle,
15345 fix these up with a pair of ldr. */
15346 if (!TARGET_THUMB2
15347 && CONST_INT_P (otherops[2])
15348 && (INTVAL(otherops[2]) <= -256
15349 || INTVAL(otherops[2]) >= 256))
15350 {
15351 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
15352 {
15353 if (emit)
15354 {
15355 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
15356 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
15357 }
15358 if (count)
15359 *count = 2;
15360 }
15361 else
15362 {
15363 if (emit)
15364 {
15365 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
15366 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
15367 }
15368 if (count)
15369 *count = 2;
15370 }
15371 }
15372 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
15373 {
15374 if (emit)
15375 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
15376 }
15377 else
15378 {
15379 if (emit)
15380 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
15381 }
15382 break;
15383
15384 case PLUS:
15385 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
15386 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
15387 {
15388 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
15389 {
15390 case -8:
15391 if (emit)
15392 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
15393 return "";
15394
15395 case -4:
15396 if (TARGET_THUMB2)
15397 break;
15398 if (emit)
15399 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
15400 return "";
15401
15402 case 4:
15403 if (TARGET_THUMB2)
15404 break;
15405 if (emit)
15406 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
15407 return "";
15408 }
15409 }
15410 if (TARGET_LDRD
15411 && (REG_P (otherops[2])
15412 || TARGET_THUMB2
15413 || (CONST_INT_P (otherops[2])
15414 && INTVAL (otherops[2]) > -256
15415 && INTVAL (otherops[2]) < 256)))
15416 {
15417 otherops[0] = operands[1];
15418 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
15419 if (emit)
15420 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
15421 return "";
15422 }
15423 /* Fall through */
15424
15425 default:
15426 otherops[0] = adjust_address (operands[0], SImode, 4);
15427 otherops[1] = operands[1];
15428 if (emit)
15429 {
15430 output_asm_insn ("str%?\t%1, %0", operands);
15431 output_asm_insn ("str%?\t%H1, %0", otherops);
15432 }
15433 if (count)
15434 *count = 2;
15435 }
15436 }
15437
15438 return "";
15439 }
15440
15441 /* Output a move, load or store for quad-word vectors in ARM registers. Only
15442 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
15443
15444 const char *
15445 output_move_quad (rtx *operands)
15446 {
15447 if (REG_P (operands[0]))
15448 {
15449 /* Load, or reg->reg move. */
15450
15451 if (MEM_P (operands[1]))
15452 {
15453 switch (GET_CODE (XEXP (operands[1], 0)))
15454 {
15455 case REG:
15456 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
15457 break;
15458
15459 case LABEL_REF:
15460 case CONST:
15461 output_asm_insn ("adr%?\t%0, %1", operands);
15462 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
15463 break;
15464
15465 default:
15466 gcc_unreachable ();
15467 }
15468 }
15469 else
15470 {
15471 rtx ops[2];
15472 int dest, src, i;
15473
15474 gcc_assert (REG_P (operands[1]));
15475
15476 dest = REGNO (operands[0]);
15477 src = REGNO (operands[1]);
15478
15479 /* This seems pretty dumb, but hopefully GCC won't try to do it
15480 very often. */
15481 if (dest < src)
15482 for (i = 0; i < 4; i++)
15483 {
15484 ops[0] = gen_rtx_REG (SImode, dest + i);
15485 ops[1] = gen_rtx_REG (SImode, src + i);
15486 output_asm_insn ("mov%?\t%0, %1", ops);
15487 }
15488 else
15489 for (i = 3; i >= 0; i--)
15490 {
15491 ops[0] = gen_rtx_REG (SImode, dest + i);
15492 ops[1] = gen_rtx_REG (SImode, src + i);
15493 output_asm_insn ("mov%?\t%0, %1", ops);
15494 }
15495 }
15496 }
15497 else
15498 {
15499 gcc_assert (MEM_P (operands[0]));
15500 gcc_assert (REG_P (operands[1]));
15501 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
15502
15503 switch (GET_CODE (XEXP (operands[0], 0)))
15504 {
15505 case REG:
15506 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
15507 break;
15508
15509 default:
15510 gcc_unreachable ();
15511 }
15512 }
15513
15514 return "";
15515 }
15516
15517 /* Output a VFP load or store instruction. */
15518
15519 const char *
15520 output_move_vfp (rtx *operands)
15521 {
15522 rtx reg, mem, addr, ops[2];
15523 int load = REG_P (operands[0]);
15524 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
15525 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
15526 const char *templ;
15527 char buff[50];
15528 enum machine_mode mode;
15529
15530 reg = operands[!load];
15531 mem = operands[load];
15532
15533 mode = GET_MODE (reg);
15534
15535 gcc_assert (REG_P (reg));
15536 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
15537 gcc_assert (mode == SFmode
15538 || mode == DFmode
15539 || mode == SImode
15540 || mode == DImode
15541 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
15542 gcc_assert (MEM_P (mem));
15543
15544 addr = XEXP (mem, 0);
15545
15546 switch (GET_CODE (addr))
15547 {
15548 case PRE_DEC:
15549 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
15550 ops[0] = XEXP (addr, 0);
15551 ops[1] = reg;
15552 break;
15553
15554 case POST_INC:
15555 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
15556 ops[0] = XEXP (addr, 0);
15557 ops[1] = reg;
15558 break;
15559
15560 default:
15561 templ = "f%s%c%%?\t%%%s0, %%1%s";
15562 ops[0] = reg;
15563 ops[1] = mem;
15564 break;
15565 }
15566
15567 sprintf (buff, templ,
15568 load ? "ld" : "st",
15569 dp ? 'd' : 's',
15570 dp ? "P" : "",
15571 integer_p ? "\t%@ int" : "");
15572 output_asm_insn (buff, ops);
15573
15574 return "";
15575 }
15576
15577 /* Output a Neon double-word or quad-word load or store, or a load
15578 or store for larger structure modes.
15579
15580 WARNING: The ordering of elements is weird in big-endian mode,
15581 because the EABI requires that vectors stored in memory appear
15582 as though they were stored by a VSTM, as required by the EABI.
15583 GCC RTL defines element ordering based on in-memory order.
15584 This can be different from the architectural ordering of elements
15585 within a NEON register. The intrinsics defined in arm_neon.h use the
15586 NEON register element ordering, not the GCC RTL element ordering.
15587
15588 For example, the in-memory ordering of a big-endian a quadword
15589 vector with 16-bit elements when stored from register pair {d0,d1}
15590 will be (lowest address first, d0[N] is NEON register element N):
15591
15592 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
15593
15594 When necessary, quadword registers (dN, dN+1) are moved to ARM
15595 registers from rN in the order:
15596
15597 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
15598
15599 So that STM/LDM can be used on vectors in ARM registers, and the
15600 same memory layout will result as if VSTM/VLDM were used.
15601
15602 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
15603 possible, which allows use of appropriate alignment tags.
15604 Note that the choice of "64" is independent of the actual vector
15605 element size; this size simply ensures that the behavior is
15606 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
15607
15608 Due to limitations of those instructions, use of VST1.64/VLD1.64
15609 is not possible if:
15610 - the address contains PRE_DEC, or
15611 - the mode refers to more than 4 double-word registers
15612
15613 In those cases, it would be possible to replace VSTM/VLDM by a
15614 sequence of instructions; this is not currently implemented since
15615 this is not certain to actually improve performance. */
15616
15617 const char *
15618 output_move_neon (rtx *operands)
15619 {
15620 rtx reg, mem, addr, ops[2];
15621 int regno, nregs, load = REG_P (operands[0]);
15622 const char *templ;
15623 char buff[50];
15624 enum machine_mode mode;
15625
15626 reg = operands[!load];
15627 mem = operands[load];
15628
15629 mode = GET_MODE (reg);
15630
15631 gcc_assert (REG_P (reg));
15632 regno = REGNO (reg);
15633 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
15634 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
15635 || NEON_REGNO_OK_FOR_QUAD (regno));
15636 gcc_assert (VALID_NEON_DREG_MODE (mode)
15637 || VALID_NEON_QREG_MODE (mode)
15638 || VALID_NEON_STRUCT_MODE (mode));
15639 gcc_assert (MEM_P (mem));
15640
15641 addr = XEXP (mem, 0);
15642
15643 /* Strip off const from addresses like (const (plus (...))). */
15644 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
15645 addr = XEXP (addr, 0);
15646
15647 switch (GET_CODE (addr))
15648 {
15649 case POST_INC:
15650 /* We have to use vldm / vstm for too-large modes. */
15651 if (nregs > 4)
15652 {
15653 templ = "v%smia%%?\t%%0!, %%h1";
15654 ops[0] = XEXP (addr, 0);
15655 }
15656 else
15657 {
15658 templ = "v%s1.64\t%%h1, %%A0";
15659 ops[0] = mem;
15660 }
15661 ops[1] = reg;
15662 break;
15663
15664 case PRE_DEC:
15665 /* We have to use vldm / vstm in this case, since there is no
15666 pre-decrement form of the vld1 / vst1 instructions. */
15667 templ = "v%smdb%%?\t%%0!, %%h1";
15668 ops[0] = XEXP (addr, 0);
15669 ops[1] = reg;
15670 break;
15671
15672 case POST_MODIFY:
15673 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
15674 gcc_unreachable ();
15675
15676 case LABEL_REF:
15677 case PLUS:
15678 {
15679 int i;
15680 int overlap = -1;
15681 for (i = 0; i < nregs; i++)
15682 {
15683 /* We're only using DImode here because it's a convenient size. */
15684 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
15685 ops[1] = adjust_address (mem, DImode, 8 * i);
15686 if (reg_overlap_mentioned_p (ops[0], mem))
15687 {
15688 gcc_assert (overlap == -1);
15689 overlap = i;
15690 }
15691 else
15692 {
15693 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
15694 output_asm_insn (buff, ops);
15695 }
15696 }
15697 if (overlap != -1)
15698 {
15699 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
15700 ops[1] = adjust_address (mem, SImode, 8 * overlap);
15701 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
15702 output_asm_insn (buff, ops);
15703 }
15704
15705 return "";
15706 }
15707
15708 default:
15709 /* We have to use vldm / vstm for too-large modes. */
15710 if (nregs > 4)
15711 templ = "v%smia%%?\t%%m0, %%h1";
15712 else
15713 templ = "v%s1.64\t%%h1, %%A0";
15714
15715 ops[0] = mem;
15716 ops[1] = reg;
15717 }
15718
15719 sprintf (buff, templ, load ? "ld" : "st");
15720 output_asm_insn (buff, ops);
15721
15722 return "";
15723 }
15724
15725 /* Compute and return the length of neon_mov<mode>, where <mode> is
15726 one of VSTRUCT modes: EI, OI, CI or XI. */
15727 int
15728 arm_attr_length_move_neon (rtx insn)
15729 {
15730 rtx reg, mem, addr;
15731 int load;
15732 enum machine_mode mode;
15733
15734 extract_insn_cached (insn);
15735
15736 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
15737 {
15738 mode = GET_MODE (recog_data.operand[0]);
15739 switch (mode)
15740 {
15741 case EImode:
15742 case OImode:
15743 return 8;
15744 case CImode:
15745 return 12;
15746 case XImode:
15747 return 16;
15748 default:
15749 gcc_unreachable ();
15750 }
15751 }
15752
15753 load = REG_P (recog_data.operand[0]);
15754 reg = recog_data.operand[!load];
15755 mem = recog_data.operand[load];
15756
15757 gcc_assert (MEM_P (mem));
15758
15759 mode = GET_MODE (reg);
15760 addr = XEXP (mem, 0);
15761
15762 /* Strip off const from addresses like (const (plus (...))). */
15763 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
15764 addr = XEXP (addr, 0);
15765
15766 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
15767 {
15768 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
15769 return insns * 4;
15770 }
15771 else
15772 return 4;
15773 }
15774
15775 /* Return nonzero if the offset in the address is an immediate. Otherwise,
15776 return zero. */
15777
15778 int
15779 arm_address_offset_is_imm (rtx insn)
15780 {
15781 rtx mem, addr;
15782
15783 extract_insn_cached (insn);
15784
15785 if (REG_P (recog_data.operand[0]))
15786 return 0;
15787
15788 mem = recog_data.operand[0];
15789
15790 gcc_assert (MEM_P (mem));
15791
15792 addr = XEXP (mem, 0);
15793
15794 if (REG_P (addr)
15795 || (GET_CODE (addr) == PLUS
15796 && REG_P (XEXP (addr, 0))
15797 && CONST_INT_P (XEXP (addr, 1))))
15798 return 1;
15799 else
15800 return 0;
15801 }
15802
15803 /* Output an ADD r, s, #n where n may be too big for one instruction.
15804 If adding zero to one register, output nothing. */
15805 const char *
15806 output_add_immediate (rtx *operands)
15807 {
15808 HOST_WIDE_INT n = INTVAL (operands[2]);
15809
15810 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
15811 {
15812 if (n < 0)
15813 output_multi_immediate (operands,
15814 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
15815 -n);
15816 else
15817 output_multi_immediate (operands,
15818 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
15819 n);
15820 }
15821
15822 return "";
15823 }
15824
15825 /* Output a multiple immediate operation.
15826 OPERANDS is the vector of operands referred to in the output patterns.
15827 INSTR1 is the output pattern to use for the first constant.
15828 INSTR2 is the output pattern to use for subsequent constants.
15829 IMMED_OP is the index of the constant slot in OPERANDS.
15830 N is the constant value. */
15831 static const char *
15832 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
15833 int immed_op, HOST_WIDE_INT n)
15834 {
15835 #if HOST_BITS_PER_WIDE_INT > 32
15836 n &= 0xffffffff;
15837 #endif
15838
15839 if (n == 0)
15840 {
15841 /* Quick and easy output. */
15842 operands[immed_op] = const0_rtx;
15843 output_asm_insn (instr1, operands);
15844 }
15845 else
15846 {
15847 int i;
15848 const char * instr = instr1;
15849
15850 /* Note that n is never zero here (which would give no output). */
15851 for (i = 0; i < 32; i += 2)
15852 {
15853 if (n & (3 << i))
15854 {
15855 operands[immed_op] = GEN_INT (n & (255 << i));
15856 output_asm_insn (instr, operands);
15857 instr = instr2;
15858 i += 6;
15859 }
15860 }
15861 }
15862
15863 return "";
15864 }
15865
15866 /* Return the name of a shifter operation. */
15867 static const char *
15868 arm_shift_nmem(enum rtx_code code)
15869 {
15870 switch (code)
15871 {
15872 case ASHIFT:
15873 return ARM_LSL_NAME;
15874
15875 case ASHIFTRT:
15876 return "asr";
15877
15878 case LSHIFTRT:
15879 return "lsr";
15880
15881 case ROTATERT:
15882 return "ror";
15883
15884 default:
15885 abort();
15886 }
15887 }
15888
15889 /* Return the appropriate ARM instruction for the operation code.
15890 The returned result should not be overwritten. OP is the rtx of the
15891 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
15892 was shifted. */
15893 const char *
15894 arithmetic_instr (rtx op, int shift_first_arg)
15895 {
15896 switch (GET_CODE (op))
15897 {
15898 case PLUS:
15899 return "add";
15900
15901 case MINUS:
15902 return shift_first_arg ? "rsb" : "sub";
15903
15904 case IOR:
15905 return "orr";
15906
15907 case XOR:
15908 return "eor";
15909
15910 case AND:
15911 return "and";
15912
15913 case ASHIFT:
15914 case ASHIFTRT:
15915 case LSHIFTRT:
15916 case ROTATERT:
15917 return arm_shift_nmem(GET_CODE(op));
15918
15919 default:
15920 gcc_unreachable ();
15921 }
15922 }
15923
15924 /* Ensure valid constant shifts and return the appropriate shift mnemonic
15925 for the operation code. The returned result should not be overwritten.
15926 OP is the rtx code of the shift.
15927 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
15928 shift. */
15929 static const char *
15930 shift_op (rtx op, HOST_WIDE_INT *amountp)
15931 {
15932 const char * mnem;
15933 enum rtx_code code = GET_CODE (op);
15934
15935 switch (code)
15936 {
15937 case ROTATE:
15938 if (!CONST_INT_P (XEXP (op, 1)))
15939 {
15940 output_operand_lossage ("invalid shift operand");
15941 return NULL;
15942 }
15943
15944 code = ROTATERT;
15945 *amountp = 32 - INTVAL (XEXP (op, 1));
15946 mnem = "ror";
15947 break;
15948
15949 case ASHIFT:
15950 case ASHIFTRT:
15951 case LSHIFTRT:
15952 case ROTATERT:
15953 mnem = arm_shift_nmem(code);
15954 if (CONST_INT_P (XEXP (op, 1)))
15955 {
15956 *amountp = INTVAL (XEXP (op, 1));
15957 }
15958 else if (REG_P (XEXP (op, 1)))
15959 {
15960 *amountp = -1;
15961 return mnem;
15962 }
15963 else
15964 {
15965 output_operand_lossage ("invalid shift operand");
15966 return NULL;
15967 }
15968 break;
15969
15970 case MULT:
15971 /* We never have to worry about the amount being other than a
15972 power of 2, since this case can never be reloaded from a reg. */
15973 if (!CONST_INT_P (XEXP (op, 1)))
15974 {
15975 output_operand_lossage ("invalid shift operand");
15976 return NULL;
15977 }
15978
15979 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
15980
15981 /* Amount must be a power of two. */
15982 if (*amountp & (*amountp - 1))
15983 {
15984 output_operand_lossage ("invalid shift operand");
15985 return NULL;
15986 }
15987
15988 *amountp = int_log2 (*amountp);
15989 return ARM_LSL_NAME;
15990
15991 default:
15992 output_operand_lossage ("invalid shift operand");
15993 return NULL;
15994 }
15995
15996 /* This is not 100% correct, but follows from the desire to merge
15997 multiplication by a power of 2 with the recognizer for a
15998 shift. >=32 is not a valid shift for "lsl", so we must try and
15999 output a shift that produces the correct arithmetical result.
16000 Using lsr #32 is identical except for the fact that the carry bit
16001 is not set correctly if we set the flags; but we never use the
16002 carry bit from such an operation, so we can ignore that. */
16003 if (code == ROTATERT)
16004 /* Rotate is just modulo 32. */
16005 *amountp &= 31;
16006 else if (*amountp != (*amountp & 31))
16007 {
16008 if (code == ASHIFT)
16009 mnem = "lsr";
16010 *amountp = 32;
16011 }
16012
16013 /* Shifts of 0 are no-ops. */
16014 if (*amountp == 0)
16015 return NULL;
16016
16017 return mnem;
16018 }
16019
16020 /* Obtain the shift from the POWER of two. */
16021
16022 static HOST_WIDE_INT
16023 int_log2 (HOST_WIDE_INT power)
16024 {
16025 HOST_WIDE_INT shift = 0;
16026
16027 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
16028 {
16029 gcc_assert (shift <= 31);
16030 shift++;
16031 }
16032
16033 return shift;
16034 }
16035
16036 /* Output a .ascii pseudo-op, keeping track of lengths. This is
16037 because /bin/as is horribly restrictive. The judgement about
16038 whether or not each character is 'printable' (and can be output as
16039 is) or not (and must be printed with an octal escape) must be made
16040 with reference to the *host* character set -- the situation is
16041 similar to that discussed in the comments above pp_c_char in
16042 c-pretty-print.c. */
16043
16044 #define MAX_ASCII_LEN 51
16045
16046 void
16047 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
16048 {
16049 int i;
16050 int len_so_far = 0;
16051
16052 fputs ("\t.ascii\t\"", stream);
16053
16054 for (i = 0; i < len; i++)
16055 {
16056 int c = p[i];
16057
16058 if (len_so_far >= MAX_ASCII_LEN)
16059 {
16060 fputs ("\"\n\t.ascii\t\"", stream);
16061 len_so_far = 0;
16062 }
16063
16064 if (ISPRINT (c))
16065 {
16066 if (c == '\\' || c == '\"')
16067 {
16068 putc ('\\', stream);
16069 len_so_far++;
16070 }
16071 putc (c, stream);
16072 len_so_far++;
16073 }
16074 else
16075 {
16076 fprintf (stream, "\\%03o", c);
16077 len_so_far += 4;
16078 }
16079 }
16080
16081 fputs ("\"\n", stream);
16082 }
16083 \f
16084 /* Compute the register save mask for registers 0 through 12
16085 inclusive. This code is used by arm_compute_save_reg_mask. */
16086
16087 static unsigned long
16088 arm_compute_save_reg0_reg12_mask (void)
16089 {
16090 unsigned long func_type = arm_current_func_type ();
16091 unsigned long save_reg_mask = 0;
16092 unsigned int reg;
16093
16094 if (IS_INTERRUPT (func_type))
16095 {
16096 unsigned int max_reg;
16097 /* Interrupt functions must not corrupt any registers,
16098 even call clobbered ones. If this is a leaf function
16099 we can just examine the registers used by the RTL, but
16100 otherwise we have to assume that whatever function is
16101 called might clobber anything, and so we have to save
16102 all the call-clobbered registers as well. */
16103 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
16104 /* FIQ handlers have registers r8 - r12 banked, so
16105 we only need to check r0 - r7, Normal ISRs only
16106 bank r14 and r15, so we must check up to r12.
16107 r13 is the stack pointer which is always preserved,
16108 so we do not need to consider it here. */
16109 max_reg = 7;
16110 else
16111 max_reg = 12;
16112
16113 for (reg = 0; reg <= max_reg; reg++)
16114 if (df_regs_ever_live_p (reg)
16115 || (! crtl->is_leaf && call_used_regs[reg]))
16116 save_reg_mask |= (1 << reg);
16117
16118 /* Also save the pic base register if necessary. */
16119 if (flag_pic
16120 && !TARGET_SINGLE_PIC_BASE
16121 && arm_pic_register != INVALID_REGNUM
16122 && crtl->uses_pic_offset_table)
16123 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
16124 }
16125 else if (IS_VOLATILE(func_type))
16126 {
16127 /* For noreturn functions we historically omitted register saves
16128 altogether. However this really messes up debugging. As a
16129 compromise save just the frame pointers. Combined with the link
16130 register saved elsewhere this should be sufficient to get
16131 a backtrace. */
16132 if (frame_pointer_needed)
16133 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
16134 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
16135 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
16136 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
16137 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
16138 }
16139 else
16140 {
16141 /* In the normal case we only need to save those registers
16142 which are call saved and which are used by this function. */
16143 for (reg = 0; reg <= 11; reg++)
16144 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
16145 save_reg_mask |= (1 << reg);
16146
16147 /* Handle the frame pointer as a special case. */
16148 if (frame_pointer_needed)
16149 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
16150
16151 /* If we aren't loading the PIC register,
16152 don't stack it even though it may be live. */
16153 if (flag_pic
16154 && !TARGET_SINGLE_PIC_BASE
16155 && arm_pic_register != INVALID_REGNUM
16156 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
16157 || crtl->uses_pic_offset_table))
16158 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
16159
16160 /* The prologue will copy SP into R0, so save it. */
16161 if (IS_STACKALIGN (func_type))
16162 save_reg_mask |= 1;
16163 }
16164
16165 /* Save registers so the exception handler can modify them. */
16166 if (crtl->calls_eh_return)
16167 {
16168 unsigned int i;
16169
16170 for (i = 0; ; i++)
16171 {
16172 reg = EH_RETURN_DATA_REGNO (i);
16173 if (reg == INVALID_REGNUM)
16174 break;
16175 save_reg_mask |= 1 << reg;
16176 }
16177 }
16178
16179 return save_reg_mask;
16180 }
16181
16182 /* Return true if r3 is live at the start of the function. */
16183
16184 static bool
16185 arm_r3_live_at_start_p (void)
16186 {
16187 /* Just look at cfg info, which is still close enough to correct at this
16188 point. This gives false positives for broken functions that might use
16189 uninitialized data that happens to be allocated in r3, but who cares? */
16190 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 3);
16191 }
16192
16193 /* Compute the number of bytes used to store the static chain register on the
16194 stack, above the stack frame. We need to know this accurately to get the
16195 alignment of the rest of the stack frame correct. */
16196
16197 static int
16198 arm_compute_static_chain_stack_bytes (void)
16199 {
16200 /* See the defining assertion in arm_expand_prologue. */
16201 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
16202 && IS_NESTED (arm_current_func_type ())
16203 && arm_r3_live_at_start_p ()
16204 && crtl->args.pretend_args_size == 0)
16205 return 4;
16206
16207 return 0;
16208 }
16209
16210 /* Compute a bit mask of which registers need to be
16211 saved on the stack for the current function.
16212 This is used by arm_get_frame_offsets, which may add extra registers. */
16213
16214 static unsigned long
16215 arm_compute_save_reg_mask (void)
16216 {
16217 unsigned int save_reg_mask = 0;
16218 unsigned long func_type = arm_current_func_type ();
16219 unsigned int reg;
16220
16221 if (IS_NAKED (func_type))
16222 /* This should never really happen. */
16223 return 0;
16224
16225 /* If we are creating a stack frame, then we must save the frame pointer,
16226 IP (which will hold the old stack pointer), LR and the PC. */
16227 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
16228 save_reg_mask |=
16229 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
16230 | (1 << IP_REGNUM)
16231 | (1 << LR_REGNUM)
16232 | (1 << PC_REGNUM);
16233
16234 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
16235
16236 /* Decide if we need to save the link register.
16237 Interrupt routines have their own banked link register,
16238 so they never need to save it.
16239 Otherwise if we do not use the link register we do not need to save
16240 it. If we are pushing other registers onto the stack however, we
16241 can save an instruction in the epilogue by pushing the link register
16242 now and then popping it back into the PC. This incurs extra memory
16243 accesses though, so we only do it when optimizing for size, and only
16244 if we know that we will not need a fancy return sequence. */
16245 if (df_regs_ever_live_p (LR_REGNUM)
16246 || (save_reg_mask
16247 && optimize_size
16248 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
16249 && !crtl->calls_eh_return))
16250 save_reg_mask |= 1 << LR_REGNUM;
16251
16252 if (cfun->machine->lr_save_eliminated)
16253 save_reg_mask &= ~ (1 << LR_REGNUM);
16254
16255 if (TARGET_REALLY_IWMMXT
16256 && ((bit_count (save_reg_mask)
16257 + ARM_NUM_INTS (crtl->args.pretend_args_size +
16258 arm_compute_static_chain_stack_bytes())
16259 ) % 2) != 0)
16260 {
16261 /* The total number of registers that are going to be pushed
16262 onto the stack is odd. We need to ensure that the stack
16263 is 64-bit aligned before we start to save iWMMXt registers,
16264 and also before we start to create locals. (A local variable
16265 might be a double or long long which we will load/store using
16266 an iWMMXt instruction). Therefore we need to push another
16267 ARM register, so that the stack will be 64-bit aligned. We
16268 try to avoid using the arg registers (r0 -r3) as they might be
16269 used to pass values in a tail call. */
16270 for (reg = 4; reg <= 12; reg++)
16271 if ((save_reg_mask & (1 << reg)) == 0)
16272 break;
16273
16274 if (reg <= 12)
16275 save_reg_mask |= (1 << reg);
16276 else
16277 {
16278 cfun->machine->sibcall_blocked = 1;
16279 save_reg_mask |= (1 << 3);
16280 }
16281 }
16282
16283 /* We may need to push an additional register for use initializing the
16284 PIC base register. */
16285 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
16286 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
16287 {
16288 reg = thumb_find_work_register (1 << 4);
16289 if (!call_used_regs[reg])
16290 save_reg_mask |= (1 << reg);
16291 }
16292
16293 return save_reg_mask;
16294 }
16295
16296
16297 /* Compute a bit mask of which registers need to be
16298 saved on the stack for the current function. */
16299 static unsigned long
16300 thumb1_compute_save_reg_mask (void)
16301 {
16302 unsigned long mask;
16303 unsigned reg;
16304
16305 mask = 0;
16306 for (reg = 0; reg < 12; reg ++)
16307 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
16308 mask |= 1 << reg;
16309
16310 if (flag_pic
16311 && !TARGET_SINGLE_PIC_BASE
16312 && arm_pic_register != INVALID_REGNUM
16313 && crtl->uses_pic_offset_table)
16314 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
16315
16316 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
16317 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
16318 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
16319
16320 /* LR will also be pushed if any lo regs are pushed. */
16321 if (mask & 0xff || thumb_force_lr_save ())
16322 mask |= (1 << LR_REGNUM);
16323
16324 /* Make sure we have a low work register if we need one.
16325 We will need one if we are going to push a high register,
16326 but we are not currently intending to push a low register. */
16327 if ((mask & 0xff) == 0
16328 && ((mask & 0x0f00) || TARGET_BACKTRACE))
16329 {
16330 /* Use thumb_find_work_register to choose which register
16331 we will use. If the register is live then we will
16332 have to push it. Use LAST_LO_REGNUM as our fallback
16333 choice for the register to select. */
16334 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
16335 /* Make sure the register returned by thumb_find_work_register is
16336 not part of the return value. */
16337 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
16338 reg = LAST_LO_REGNUM;
16339
16340 if (! call_used_regs[reg])
16341 mask |= 1 << reg;
16342 }
16343
16344 /* The 504 below is 8 bytes less than 512 because there are two possible
16345 alignment words. We can't tell here if they will be present or not so we
16346 have to play it safe and assume that they are. */
16347 if ((CALLER_INTERWORKING_SLOT_SIZE +
16348 ROUND_UP_WORD (get_frame_size ()) +
16349 crtl->outgoing_args_size) >= 504)
16350 {
16351 /* This is the same as the code in thumb1_expand_prologue() which
16352 determines which register to use for stack decrement. */
16353 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
16354 if (mask & (1 << reg))
16355 break;
16356
16357 if (reg > LAST_LO_REGNUM)
16358 {
16359 /* Make sure we have a register available for stack decrement. */
16360 mask |= 1 << LAST_LO_REGNUM;
16361 }
16362 }
16363
16364 return mask;
16365 }
16366
16367
16368 /* Return the number of bytes required to save VFP registers. */
16369 static int
16370 arm_get_vfp_saved_size (void)
16371 {
16372 unsigned int regno;
16373 int count;
16374 int saved;
16375
16376 saved = 0;
16377 /* Space for saved VFP registers. */
16378 if (TARGET_HARD_FLOAT && TARGET_VFP)
16379 {
16380 count = 0;
16381 for (regno = FIRST_VFP_REGNUM;
16382 regno < LAST_VFP_REGNUM;
16383 regno += 2)
16384 {
16385 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
16386 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
16387 {
16388 if (count > 0)
16389 {
16390 /* Workaround ARM10 VFPr1 bug. */
16391 if (count == 2 && !arm_arch6)
16392 count++;
16393 saved += count * 8;
16394 }
16395 count = 0;
16396 }
16397 else
16398 count++;
16399 }
16400 if (count > 0)
16401 {
16402 if (count == 2 && !arm_arch6)
16403 count++;
16404 saved += count * 8;
16405 }
16406 }
16407 return saved;
16408 }
16409
16410
16411 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
16412 everything bar the final return instruction. If simple_return is true,
16413 then do not output epilogue, because it has already been emitted in RTL. */
16414 const char *
16415 output_return_instruction (rtx operand, bool really_return, bool reverse,
16416 bool simple_return)
16417 {
16418 char conditional[10];
16419 char instr[100];
16420 unsigned reg;
16421 unsigned long live_regs_mask;
16422 unsigned long func_type;
16423 arm_stack_offsets *offsets;
16424
16425 func_type = arm_current_func_type ();
16426
16427 if (IS_NAKED (func_type))
16428 return "";
16429
16430 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
16431 {
16432 /* If this function was declared non-returning, and we have
16433 found a tail call, then we have to trust that the called
16434 function won't return. */
16435 if (really_return)
16436 {
16437 rtx ops[2];
16438
16439 /* Otherwise, trap an attempted return by aborting. */
16440 ops[0] = operand;
16441 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
16442 : "abort");
16443 assemble_external_libcall (ops[1]);
16444 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
16445 }
16446
16447 return "";
16448 }
16449
16450 gcc_assert (!cfun->calls_alloca || really_return);
16451
16452 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
16453
16454 cfun->machine->return_used_this_function = 1;
16455
16456 offsets = arm_get_frame_offsets ();
16457 live_regs_mask = offsets->saved_regs_mask;
16458
16459 if (!simple_return && live_regs_mask)
16460 {
16461 const char * return_reg;
16462
16463 /* If we do not have any special requirements for function exit
16464 (e.g. interworking) then we can load the return address
16465 directly into the PC. Otherwise we must load it into LR. */
16466 if (really_return
16467 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
16468 return_reg = reg_names[PC_REGNUM];
16469 else
16470 return_reg = reg_names[LR_REGNUM];
16471
16472 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
16473 {
16474 /* There are three possible reasons for the IP register
16475 being saved. 1) a stack frame was created, in which case
16476 IP contains the old stack pointer, or 2) an ISR routine
16477 corrupted it, or 3) it was saved to align the stack on
16478 iWMMXt. In case 1, restore IP into SP, otherwise just
16479 restore IP. */
16480 if (frame_pointer_needed)
16481 {
16482 live_regs_mask &= ~ (1 << IP_REGNUM);
16483 live_regs_mask |= (1 << SP_REGNUM);
16484 }
16485 else
16486 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
16487 }
16488
16489 /* On some ARM architectures it is faster to use LDR rather than
16490 LDM to load a single register. On other architectures, the
16491 cost is the same. In 26 bit mode, or for exception handlers,
16492 we have to use LDM to load the PC so that the CPSR is also
16493 restored. */
16494 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
16495 if (live_regs_mask == (1U << reg))
16496 break;
16497
16498 if (reg <= LAST_ARM_REGNUM
16499 && (reg != LR_REGNUM
16500 || ! really_return
16501 || ! IS_INTERRUPT (func_type)))
16502 {
16503 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
16504 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
16505 }
16506 else
16507 {
16508 char *p;
16509 int first = 1;
16510
16511 /* Generate the load multiple instruction to restore the
16512 registers. Note we can get here, even if
16513 frame_pointer_needed is true, but only if sp already
16514 points to the base of the saved core registers. */
16515 if (live_regs_mask & (1 << SP_REGNUM))
16516 {
16517 unsigned HOST_WIDE_INT stack_adjust;
16518
16519 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
16520 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
16521
16522 if (stack_adjust && arm_arch5 && TARGET_ARM)
16523 if (TARGET_UNIFIED_ASM)
16524 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
16525 else
16526 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
16527 else
16528 {
16529 /* If we can't use ldmib (SA110 bug),
16530 then try to pop r3 instead. */
16531 if (stack_adjust)
16532 live_regs_mask |= 1 << 3;
16533
16534 if (TARGET_UNIFIED_ASM)
16535 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
16536 else
16537 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
16538 }
16539 }
16540 else
16541 if (TARGET_UNIFIED_ASM)
16542 sprintf (instr, "pop%s\t{", conditional);
16543 else
16544 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
16545
16546 p = instr + strlen (instr);
16547
16548 for (reg = 0; reg <= SP_REGNUM; reg++)
16549 if (live_regs_mask & (1 << reg))
16550 {
16551 int l = strlen (reg_names[reg]);
16552
16553 if (first)
16554 first = 0;
16555 else
16556 {
16557 memcpy (p, ", ", 2);
16558 p += 2;
16559 }
16560
16561 memcpy (p, "%|", 2);
16562 memcpy (p + 2, reg_names[reg], l);
16563 p += l + 2;
16564 }
16565
16566 if (live_regs_mask & (1 << LR_REGNUM))
16567 {
16568 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
16569 /* If returning from an interrupt, restore the CPSR. */
16570 if (IS_INTERRUPT (func_type))
16571 strcat (p, "^");
16572 }
16573 else
16574 strcpy (p, "}");
16575 }
16576
16577 output_asm_insn (instr, & operand);
16578
16579 /* See if we need to generate an extra instruction to
16580 perform the actual function return. */
16581 if (really_return
16582 && func_type != ARM_FT_INTERWORKED
16583 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
16584 {
16585 /* The return has already been handled
16586 by loading the LR into the PC. */
16587 return "";
16588 }
16589 }
16590
16591 if (really_return)
16592 {
16593 switch ((int) ARM_FUNC_TYPE (func_type))
16594 {
16595 case ARM_FT_ISR:
16596 case ARM_FT_FIQ:
16597 /* ??? This is wrong for unified assembly syntax. */
16598 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
16599 break;
16600
16601 case ARM_FT_INTERWORKED:
16602 sprintf (instr, "bx%s\t%%|lr", conditional);
16603 break;
16604
16605 case ARM_FT_EXCEPTION:
16606 /* ??? This is wrong for unified assembly syntax. */
16607 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
16608 break;
16609
16610 default:
16611 /* Use bx if it's available. */
16612 if (arm_arch5 || arm_arch4t)
16613 sprintf (instr, "bx%s\t%%|lr", conditional);
16614 else
16615 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
16616 break;
16617 }
16618
16619 output_asm_insn (instr, & operand);
16620 }
16621
16622 return "";
16623 }
16624
16625 /* Write the function name into the code section, directly preceding
16626 the function prologue.
16627
16628 Code will be output similar to this:
16629 t0
16630 .ascii "arm_poke_function_name", 0
16631 .align
16632 t1
16633 .word 0xff000000 + (t1 - t0)
16634 arm_poke_function_name
16635 mov ip, sp
16636 stmfd sp!, {fp, ip, lr, pc}
16637 sub fp, ip, #4
16638
16639 When performing a stack backtrace, code can inspect the value
16640 of 'pc' stored at 'fp' + 0. If the trace function then looks
16641 at location pc - 12 and the top 8 bits are set, then we know
16642 that there is a function name embedded immediately preceding this
16643 location and has length ((pc[-3]) & 0xff000000).
16644
16645 We assume that pc is declared as a pointer to an unsigned long.
16646
16647 It is of no benefit to output the function name if we are assembling
16648 a leaf function. These function types will not contain a stack
16649 backtrace structure, therefore it is not possible to determine the
16650 function name. */
16651 void
16652 arm_poke_function_name (FILE *stream, const char *name)
16653 {
16654 unsigned long alignlength;
16655 unsigned long length;
16656 rtx x;
16657
16658 length = strlen (name) + 1;
16659 alignlength = ROUND_UP_WORD (length);
16660
16661 ASM_OUTPUT_ASCII (stream, name, length);
16662 ASM_OUTPUT_ALIGN (stream, 2);
16663 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
16664 assemble_aligned_integer (UNITS_PER_WORD, x);
16665 }
16666
16667 /* Place some comments into the assembler stream
16668 describing the current function. */
16669 static void
16670 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
16671 {
16672 unsigned long func_type;
16673
16674 /* ??? Do we want to print some of the below anyway? */
16675 if (TARGET_THUMB1)
16676 return;
16677
16678 /* Sanity check. */
16679 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
16680
16681 func_type = arm_current_func_type ();
16682
16683 switch ((int) ARM_FUNC_TYPE (func_type))
16684 {
16685 default:
16686 case ARM_FT_NORMAL:
16687 break;
16688 case ARM_FT_INTERWORKED:
16689 asm_fprintf (f, "\t%@ Function supports interworking.\n");
16690 break;
16691 case ARM_FT_ISR:
16692 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
16693 break;
16694 case ARM_FT_FIQ:
16695 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
16696 break;
16697 case ARM_FT_EXCEPTION:
16698 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
16699 break;
16700 }
16701
16702 if (IS_NAKED (func_type))
16703 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
16704
16705 if (IS_VOLATILE (func_type))
16706 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
16707
16708 if (IS_NESTED (func_type))
16709 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
16710 if (IS_STACKALIGN (func_type))
16711 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
16712
16713 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
16714 crtl->args.size,
16715 crtl->args.pretend_args_size, frame_size);
16716
16717 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
16718 frame_pointer_needed,
16719 cfun->machine->uses_anonymous_args);
16720
16721 if (cfun->machine->lr_save_eliminated)
16722 asm_fprintf (f, "\t%@ link register save eliminated.\n");
16723
16724 if (crtl->calls_eh_return)
16725 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
16726
16727 }
16728
16729 static void
16730 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
16731 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
16732 {
16733 arm_stack_offsets *offsets;
16734
16735 if (TARGET_THUMB1)
16736 {
16737 int regno;
16738
16739 /* Emit any call-via-reg trampolines that are needed for v4t support
16740 of call_reg and call_value_reg type insns. */
16741 for (regno = 0; regno < LR_REGNUM; regno++)
16742 {
16743 rtx label = cfun->machine->call_via[regno];
16744
16745 if (label != NULL)
16746 {
16747 switch_to_section (function_section (current_function_decl));
16748 targetm.asm_out.internal_label (asm_out_file, "L",
16749 CODE_LABEL_NUMBER (label));
16750 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
16751 }
16752 }
16753
16754 /* ??? Probably not safe to set this here, since it assumes that a
16755 function will be emitted as assembly immediately after we generate
16756 RTL for it. This does not happen for inline functions. */
16757 cfun->machine->return_used_this_function = 0;
16758 }
16759 else /* TARGET_32BIT */
16760 {
16761 /* We need to take into account any stack-frame rounding. */
16762 offsets = arm_get_frame_offsets ();
16763
16764 gcc_assert (!use_return_insn (FALSE, NULL)
16765 || (cfun->machine->return_used_this_function != 0)
16766 || offsets->saved_regs == offsets->outgoing_args
16767 || frame_pointer_needed);
16768
16769 /* Reset the ARM-specific per-function variables. */
16770 after_arm_reorg = 0;
16771 }
16772 }
16773
16774 /* Generate and emit a pattern that will be recognized as STRD pattern. If even
16775 number of registers are being pushed, multiple STRD patterns are created for
16776 all register pairs. If odd number of registers are pushed, emit a
16777 combination of STRDs and STR for the prologue saves. */
16778 static void
16779 thumb2_emit_strd_push (unsigned long saved_regs_mask)
16780 {
16781 int num_regs = 0;
16782 int i, j;
16783 rtx par = NULL_RTX;
16784 rtx insn = NULL_RTX;
16785 rtx dwarf = NULL_RTX;
16786 rtx tmp, reg, tmp1;
16787
16788 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16789 if (saved_regs_mask & (1 << i))
16790 num_regs++;
16791
16792 gcc_assert (num_regs && num_regs <= 16);
16793
16794 /* Pre-decrement the stack pointer, based on there being num_regs 4-byte
16795 registers to push. */
16796 tmp = gen_rtx_SET (VOIDmode,
16797 stack_pointer_rtx,
16798 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
16799 RTX_FRAME_RELATED_P (tmp) = 1;
16800 insn = emit_insn (tmp);
16801
16802 /* Create sequence for DWARF info. */
16803 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
16804
16805 /* RTLs cannot be shared, hence create new copy for dwarf. */
16806 tmp1 = gen_rtx_SET (VOIDmode,
16807 stack_pointer_rtx,
16808 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
16809 RTX_FRAME_RELATED_P (tmp1) = 1;
16810 XVECEXP (dwarf, 0, 0) = tmp1;
16811
16812 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
16813 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
16814
16815 /* Var j iterates over all the registers to gather all the registers in
16816 saved_regs_mask. Var i gives index of register R_j in stack frame.
16817 A PARALLEL RTX of register-pair is created here, so that pattern for
16818 STRD can be matched. If num_regs is odd, 1st register will be pushed
16819 using STR and remaining registers will be pushed with STRD in pairs.
16820 If num_regs is even, all registers are pushed with STRD in pairs.
16821 Hence, skip first element for odd num_regs. */
16822 for (i = num_regs - 1, j = LAST_ARM_REGNUM; i >= (num_regs % 2); j--)
16823 if (saved_regs_mask & (1 << j))
16824 {
16825 /* Create RTX for store. New RTX is created for dwarf as
16826 they are not sharable. */
16827 reg = gen_rtx_REG (SImode, j);
16828 tmp = gen_rtx_SET (SImode,
16829 gen_frame_mem
16830 (SImode,
16831 plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
16832 reg);
16833
16834 tmp1 = gen_rtx_SET (SImode,
16835 gen_frame_mem
16836 (SImode,
16837 plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
16838 reg);
16839 RTX_FRAME_RELATED_P (tmp) = 1;
16840 RTX_FRAME_RELATED_P (tmp1) = 1;
16841
16842 if (((i - (num_regs % 2)) % 2) == 1)
16843 /* When (i - (num_regs % 2)) is odd, the RTX to be emitted is yet to
16844 be created. Hence create it first. The STRD pattern we are
16845 generating is :
16846 [ (SET (MEM (PLUS (SP) (NUM))) (reg_t1))
16847 (SET (MEM (PLUS (SP) (NUM + 4))) (reg_t2)) ]
16848 where the target registers need not be consecutive. */
16849 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
16850
16851 /* Register R_j is added in PARALLEL RTX. If (i - (num_regs % 2)) is
16852 even, the reg_j is added as 0th element and if it is odd, reg_i is
16853 added as 1st element of STRD pattern shown above. */
16854 XVECEXP (par, 0, ((i - (num_regs % 2)) % 2)) = tmp;
16855 XVECEXP (dwarf, 0, (i + 1)) = tmp1;
16856
16857 if (((i - (num_regs % 2)) % 2) == 0)
16858 /* When (i - (num_regs % 2)) is even, RTXs for both the registers
16859 to be loaded are generated in above given STRD pattern, and the
16860 pattern can be emitted now. */
16861 emit_insn (par);
16862
16863 i--;
16864 }
16865
16866 if ((num_regs % 2) == 1)
16867 {
16868 /* If odd number of registers are pushed, generate STR pattern to store
16869 lone register. */
16870 for (; (saved_regs_mask & (1 << j)) == 0; j--);
16871
16872 tmp1 = gen_frame_mem (SImode, plus_constant (Pmode,
16873 stack_pointer_rtx, 4 * i));
16874 reg = gen_rtx_REG (SImode, j);
16875 tmp = gen_rtx_SET (SImode, tmp1, reg);
16876 RTX_FRAME_RELATED_P (tmp) = 1;
16877
16878 emit_insn (tmp);
16879
16880 tmp1 = gen_rtx_SET (SImode,
16881 gen_frame_mem
16882 (SImode,
16883 plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
16884 reg);
16885 RTX_FRAME_RELATED_P (tmp1) = 1;
16886 XVECEXP (dwarf, 0, (i + 1)) = tmp1;
16887 }
16888
16889 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16890 RTX_FRAME_RELATED_P (insn) = 1;
16891 return;
16892 }
16893
16894 /* STRD in ARM mode requires consecutive registers. This function emits STRD
16895 whenever possible, otherwise it emits single-word stores. The first store
16896 also allocates stack space for all saved registers, using writeback with
16897 post-addressing mode. All other stores use offset addressing. If no STRD
16898 can be emitted, this function emits a sequence of single-word stores,
16899 and not an STM as before, because single-word stores provide more freedom
16900 scheduling and can be turned into an STM by peephole optimizations. */
16901 static void
16902 arm_emit_strd_push (unsigned long saved_regs_mask)
16903 {
16904 int num_regs = 0;
16905 int i, j, dwarf_index = 0;
16906 int offset = 0;
16907 rtx dwarf = NULL_RTX;
16908 rtx insn = NULL_RTX;
16909 rtx tmp, mem;
16910
16911 /* TODO: A more efficient code can be emitted by changing the
16912 layout, e.g., first push all pairs that can use STRD to keep the
16913 stack aligned, and then push all other registers. */
16914 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16915 if (saved_regs_mask & (1 << i))
16916 num_regs++;
16917
16918 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
16919 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
16920 gcc_assert (num_regs > 0);
16921
16922 /* Create sequence for DWARF info. */
16923 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
16924
16925 /* For dwarf info, we generate explicit stack update. */
16926 tmp = gen_rtx_SET (VOIDmode,
16927 stack_pointer_rtx,
16928 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
16929 RTX_FRAME_RELATED_P (tmp) = 1;
16930 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
16931
16932 /* Save registers. */
16933 offset = - 4 * num_regs;
16934 j = 0;
16935 while (j <= LAST_ARM_REGNUM)
16936 if (saved_regs_mask & (1 << j))
16937 {
16938 if ((j % 2 == 0)
16939 && (saved_regs_mask & (1 << (j + 1))))
16940 {
16941 /* Current register and previous register form register pair for
16942 which STRD can be generated. */
16943 if (offset < 0)
16944 {
16945 /* Allocate stack space for all saved registers. */
16946 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
16947 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
16948 mem = gen_frame_mem (DImode, tmp);
16949 offset = 0;
16950 }
16951 else if (offset > 0)
16952 mem = gen_frame_mem (DImode,
16953 plus_constant (Pmode,
16954 stack_pointer_rtx,
16955 offset));
16956 else
16957 mem = gen_frame_mem (DImode, stack_pointer_rtx);
16958
16959 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
16960 RTX_FRAME_RELATED_P (tmp) = 1;
16961 tmp = emit_insn (tmp);
16962
16963 /* Record the first store insn. */
16964 if (dwarf_index == 1)
16965 insn = tmp;
16966
16967 /* Generate dwarf info. */
16968 mem = gen_frame_mem (SImode,
16969 plus_constant (Pmode,
16970 stack_pointer_rtx,
16971 offset));
16972 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
16973 RTX_FRAME_RELATED_P (tmp) = 1;
16974 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
16975
16976 mem = gen_frame_mem (SImode,
16977 plus_constant (Pmode,
16978 stack_pointer_rtx,
16979 offset + 4));
16980 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
16981 RTX_FRAME_RELATED_P (tmp) = 1;
16982 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
16983
16984 offset += 8;
16985 j += 2;
16986 }
16987 else
16988 {
16989 /* Emit a single word store. */
16990 if (offset < 0)
16991 {
16992 /* Allocate stack space for all saved registers. */
16993 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
16994 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
16995 mem = gen_frame_mem (SImode, tmp);
16996 offset = 0;
16997 }
16998 else if (offset > 0)
16999 mem = gen_frame_mem (SImode,
17000 plus_constant (Pmode,
17001 stack_pointer_rtx,
17002 offset));
17003 else
17004 mem = gen_frame_mem (SImode, stack_pointer_rtx);
17005
17006 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
17007 RTX_FRAME_RELATED_P (tmp) = 1;
17008 tmp = emit_insn (tmp);
17009
17010 /* Record the first store insn. */
17011 if (dwarf_index == 1)
17012 insn = tmp;
17013
17014 /* Generate dwarf info. */
17015 mem = gen_frame_mem (SImode,
17016 plus_constant(Pmode,
17017 stack_pointer_rtx,
17018 offset));
17019 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
17020 RTX_FRAME_RELATED_P (tmp) = 1;
17021 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
17022
17023 offset += 4;
17024 j += 1;
17025 }
17026 }
17027 else
17028 j++;
17029
17030 /* Attach dwarf info to the first insn we generate. */
17031 gcc_assert (insn != NULL_RTX);
17032 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
17033 RTX_FRAME_RELATED_P (insn) = 1;
17034 }
17035
17036 /* Generate and emit an insn that we will recognize as a push_multi.
17037 Unfortunately, since this insn does not reflect very well the actual
17038 semantics of the operation, we need to annotate the insn for the benefit
17039 of DWARF2 frame unwind information. */
17040 static rtx
17041 emit_multi_reg_push (unsigned long mask)
17042 {
17043 int num_regs = 0;
17044 int num_dwarf_regs;
17045 int i, j;
17046 rtx par;
17047 rtx dwarf;
17048 int dwarf_par_index;
17049 rtx tmp, reg;
17050
17051 for (i = 0; i <= LAST_ARM_REGNUM; i++)
17052 if (mask & (1 << i))
17053 num_regs++;
17054
17055 gcc_assert (num_regs && num_regs <= 16);
17056
17057 /* We don't record the PC in the dwarf frame information. */
17058 num_dwarf_regs = num_regs;
17059 if (mask & (1 << PC_REGNUM))
17060 num_dwarf_regs--;
17061
17062 /* For the body of the insn we are going to generate an UNSPEC in
17063 parallel with several USEs. This allows the insn to be recognized
17064 by the push_multi pattern in the arm.md file.
17065
17066 The body of the insn looks something like this:
17067
17068 (parallel [
17069 (set (mem:BLK (pre_modify:SI (reg:SI sp)
17070 (const_int:SI <num>)))
17071 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
17072 (use (reg:SI XX))
17073 (use (reg:SI YY))
17074 ...
17075 ])
17076
17077 For the frame note however, we try to be more explicit and actually
17078 show each register being stored into the stack frame, plus a (single)
17079 decrement of the stack pointer. We do it this way in order to be
17080 friendly to the stack unwinding code, which only wants to see a single
17081 stack decrement per instruction. The RTL we generate for the note looks
17082 something like this:
17083
17084 (sequence [
17085 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
17086 (set (mem:SI (reg:SI sp)) (reg:SI r4))
17087 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
17088 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
17089 ...
17090 ])
17091
17092 FIXME:: In an ideal world the PRE_MODIFY would not exist and
17093 instead we'd have a parallel expression detailing all
17094 the stores to the various memory addresses so that debug
17095 information is more up-to-date. Remember however while writing
17096 this to take care of the constraints with the push instruction.
17097
17098 Note also that this has to be taken care of for the VFP registers.
17099
17100 For more see PR43399. */
17101
17102 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
17103 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
17104 dwarf_par_index = 1;
17105
17106 for (i = 0; i <= LAST_ARM_REGNUM; i++)
17107 {
17108 if (mask & (1 << i))
17109 {
17110 reg = gen_rtx_REG (SImode, i);
17111
17112 XVECEXP (par, 0, 0)
17113 = gen_rtx_SET (VOIDmode,
17114 gen_frame_mem
17115 (BLKmode,
17116 gen_rtx_PRE_MODIFY (Pmode,
17117 stack_pointer_rtx,
17118 plus_constant
17119 (Pmode, stack_pointer_rtx,
17120 -4 * num_regs))
17121 ),
17122 gen_rtx_UNSPEC (BLKmode,
17123 gen_rtvec (1, reg),
17124 UNSPEC_PUSH_MULT));
17125
17126 if (i != PC_REGNUM)
17127 {
17128 tmp = gen_rtx_SET (VOIDmode,
17129 gen_frame_mem (SImode, stack_pointer_rtx),
17130 reg);
17131 RTX_FRAME_RELATED_P (tmp) = 1;
17132 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
17133 dwarf_par_index++;
17134 }
17135
17136 break;
17137 }
17138 }
17139
17140 for (j = 1, i++; j < num_regs; i++)
17141 {
17142 if (mask & (1 << i))
17143 {
17144 reg = gen_rtx_REG (SImode, i);
17145
17146 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
17147
17148 if (i != PC_REGNUM)
17149 {
17150 tmp
17151 = gen_rtx_SET (VOIDmode,
17152 gen_frame_mem
17153 (SImode,
17154 plus_constant (Pmode, stack_pointer_rtx,
17155 4 * j)),
17156 reg);
17157 RTX_FRAME_RELATED_P (tmp) = 1;
17158 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
17159 }
17160
17161 j++;
17162 }
17163 }
17164
17165 par = emit_insn (par);
17166
17167 tmp = gen_rtx_SET (VOIDmode,
17168 stack_pointer_rtx,
17169 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
17170 RTX_FRAME_RELATED_P (tmp) = 1;
17171 XVECEXP (dwarf, 0, 0) = tmp;
17172
17173 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17174
17175 return par;
17176 }
17177
17178 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
17179 SIZE is the offset to be adjusted.
17180 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
17181 static void
17182 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
17183 {
17184 rtx dwarf;
17185
17186 RTX_FRAME_RELATED_P (insn) = 1;
17187 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
17188 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
17189 }
17190
17191 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
17192 SAVED_REGS_MASK shows which registers need to be restored.
17193
17194 Unfortunately, since this insn does not reflect very well the actual
17195 semantics of the operation, we need to annotate the insn for the benefit
17196 of DWARF2 frame unwind information. */
17197 static void
17198 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
17199 {
17200 int num_regs = 0;
17201 int i, j;
17202 rtx par;
17203 rtx dwarf = NULL_RTX;
17204 rtx tmp, reg;
17205 bool return_in_pc;
17206 int offset_adj;
17207 int emit_update;
17208
17209 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
17210 offset_adj = return_in_pc ? 1 : 0;
17211 for (i = 0; i <= LAST_ARM_REGNUM; i++)
17212 if (saved_regs_mask & (1 << i))
17213 num_regs++;
17214
17215 gcc_assert (num_regs && num_regs <= 16);
17216
17217 /* If SP is in reglist, then we don't emit SP update insn. */
17218 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
17219
17220 /* The parallel needs to hold num_regs SETs
17221 and one SET for the stack update. */
17222 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
17223
17224 if (return_in_pc)
17225 {
17226 tmp = ret_rtx;
17227 XVECEXP (par, 0, 0) = tmp;
17228 }
17229
17230 if (emit_update)
17231 {
17232 /* Increment the stack pointer, based on there being
17233 num_regs 4-byte registers to restore. */
17234 tmp = gen_rtx_SET (VOIDmode,
17235 stack_pointer_rtx,
17236 plus_constant (Pmode,
17237 stack_pointer_rtx,
17238 4 * num_regs));
17239 RTX_FRAME_RELATED_P (tmp) = 1;
17240 XVECEXP (par, 0, offset_adj) = tmp;
17241 }
17242
17243 /* Now restore every reg, which may include PC. */
17244 for (j = 0, i = 0; j < num_regs; i++)
17245 if (saved_regs_mask & (1 << i))
17246 {
17247 reg = gen_rtx_REG (SImode, i);
17248 if ((num_regs == 1) && emit_update && !return_in_pc)
17249 {
17250 /* Emit single load with writeback. */
17251 tmp = gen_frame_mem (SImode,
17252 gen_rtx_POST_INC (Pmode,
17253 stack_pointer_rtx));
17254 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
17255 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
17256 return;
17257 }
17258
17259 tmp = gen_rtx_SET (VOIDmode,
17260 reg,
17261 gen_frame_mem
17262 (SImode,
17263 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
17264 RTX_FRAME_RELATED_P (tmp) = 1;
17265 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
17266
17267 /* We need to maintain a sequence for DWARF info too. As dwarf info
17268 should not have PC, skip PC. */
17269 if (i != PC_REGNUM)
17270 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
17271
17272 j++;
17273 }
17274
17275 if (return_in_pc)
17276 par = emit_jump_insn (par);
17277 else
17278 par = emit_insn (par);
17279
17280 REG_NOTES (par) = dwarf;
17281 if (!return_in_pc)
17282 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
17283 stack_pointer_rtx, stack_pointer_rtx);
17284 }
17285
17286 /* Generate and emit an insn pattern that we will recognize as a pop_multi
17287 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
17288
17289 Unfortunately, since this insn does not reflect very well the actual
17290 semantics of the operation, we need to annotate the insn for the benefit
17291 of DWARF2 frame unwind information. */
17292 static void
17293 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
17294 {
17295 int i, j;
17296 rtx par;
17297 rtx dwarf = NULL_RTX;
17298 rtx tmp, reg;
17299
17300 gcc_assert (num_regs && num_regs <= 32);
17301
17302 /* Workaround ARM10 VFPr1 bug. */
17303 if (num_regs == 2 && !arm_arch6)
17304 {
17305 if (first_reg == 15)
17306 first_reg--;
17307
17308 num_regs++;
17309 }
17310
17311 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
17312 there could be up to 32 D-registers to restore.
17313 If there are more than 16 D-registers, make two recursive calls,
17314 each of which emits one pop_multi instruction. */
17315 if (num_regs > 16)
17316 {
17317 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
17318 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
17319 return;
17320 }
17321
17322 /* The parallel needs to hold num_regs SETs
17323 and one SET for the stack update. */
17324 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
17325
17326 /* Increment the stack pointer, based on there being
17327 num_regs 8-byte registers to restore. */
17328 tmp = gen_rtx_SET (VOIDmode,
17329 base_reg,
17330 plus_constant (Pmode, base_reg, 8 * num_regs));
17331 RTX_FRAME_RELATED_P (tmp) = 1;
17332 XVECEXP (par, 0, 0) = tmp;
17333
17334 /* Now show every reg that will be restored, using a SET for each. */
17335 for (j = 0, i=first_reg; j < num_regs; i += 2)
17336 {
17337 reg = gen_rtx_REG (DFmode, i);
17338
17339 tmp = gen_rtx_SET (VOIDmode,
17340 reg,
17341 gen_frame_mem
17342 (DFmode,
17343 plus_constant (Pmode, base_reg, 8 * j)));
17344 RTX_FRAME_RELATED_P (tmp) = 1;
17345 XVECEXP (par, 0, j + 1) = tmp;
17346
17347 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
17348
17349 j++;
17350 }
17351
17352 par = emit_insn (par);
17353 REG_NOTES (par) = dwarf;
17354
17355 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
17356 base_reg, base_reg);
17357 }
17358
17359 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
17360 number of registers are being popped, multiple LDRD patterns are created for
17361 all register pairs. If odd number of registers are popped, last register is
17362 loaded by using LDR pattern. */
17363 static void
17364 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
17365 {
17366 int num_regs = 0;
17367 int i, j;
17368 rtx par = NULL_RTX;
17369 rtx dwarf = NULL_RTX;
17370 rtx tmp, reg, tmp1;
17371 bool return_in_pc;
17372
17373 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
17374 for (i = 0; i <= LAST_ARM_REGNUM; i++)
17375 if (saved_regs_mask & (1 << i))
17376 num_regs++;
17377
17378 gcc_assert (num_regs && num_regs <= 16);
17379
17380 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
17381 to be popped. So, if num_regs is even, now it will become odd,
17382 and we can generate pop with PC. If num_regs is odd, it will be
17383 even now, and ldr with return can be generated for PC. */
17384 if (return_in_pc)
17385 num_regs--;
17386
17387 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
17388
17389 /* Var j iterates over all the registers to gather all the registers in
17390 saved_regs_mask. Var i gives index of saved registers in stack frame.
17391 A PARALLEL RTX of register-pair is created here, so that pattern for
17392 LDRD can be matched. As PC is always last register to be popped, and
17393 we have already decremented num_regs if PC, we don't have to worry
17394 about PC in this loop. */
17395 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
17396 if (saved_regs_mask & (1 << j))
17397 {
17398 /* Create RTX for memory load. */
17399 reg = gen_rtx_REG (SImode, j);
17400 tmp = gen_rtx_SET (SImode,
17401 reg,
17402 gen_frame_mem (SImode,
17403 plus_constant (Pmode,
17404 stack_pointer_rtx, 4 * i)));
17405 RTX_FRAME_RELATED_P (tmp) = 1;
17406
17407 if (i % 2 == 0)
17408 {
17409 /* When saved-register index (i) is even, the RTX to be emitted is
17410 yet to be created. Hence create it first. The LDRD pattern we
17411 are generating is :
17412 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
17413 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
17414 where target registers need not be consecutive. */
17415 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
17416 dwarf = NULL_RTX;
17417 }
17418
17419 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
17420 added as 0th element and if i is odd, reg_i is added as 1st element
17421 of LDRD pattern shown above. */
17422 XVECEXP (par, 0, (i % 2)) = tmp;
17423 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
17424
17425 if ((i % 2) == 1)
17426 {
17427 /* When saved-register index (i) is odd, RTXs for both the registers
17428 to be loaded are generated in above given LDRD pattern, and the
17429 pattern can be emitted now. */
17430 par = emit_insn (par);
17431 REG_NOTES (par) = dwarf;
17432 RTX_FRAME_RELATED_P (par) = 1;
17433 }
17434
17435 i++;
17436 }
17437
17438 /* If the number of registers pushed is odd AND return_in_pc is false OR
17439 number of registers are even AND return_in_pc is true, last register is
17440 popped using LDR. It can be PC as well. Hence, adjust the stack first and
17441 then LDR with post increment. */
17442
17443 /* Increment the stack pointer, based on there being
17444 num_regs 4-byte registers to restore. */
17445 tmp = gen_rtx_SET (VOIDmode,
17446 stack_pointer_rtx,
17447 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
17448 RTX_FRAME_RELATED_P (tmp) = 1;
17449 tmp = emit_insn (tmp);
17450 if (!return_in_pc)
17451 {
17452 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
17453 stack_pointer_rtx, stack_pointer_rtx);
17454 }
17455
17456 dwarf = NULL_RTX;
17457
17458 if (((num_regs % 2) == 1 && !return_in_pc)
17459 || ((num_regs % 2) == 0 && return_in_pc))
17460 {
17461 /* Scan for the single register to be popped. Skip until the saved
17462 register is found. */
17463 for (; (saved_regs_mask & (1 << j)) == 0; j++);
17464
17465 /* Gen LDR with post increment here. */
17466 tmp1 = gen_rtx_MEM (SImode,
17467 gen_rtx_POST_INC (SImode,
17468 stack_pointer_rtx));
17469 set_mem_alias_set (tmp1, get_frame_alias_set ());
17470
17471 reg = gen_rtx_REG (SImode, j);
17472 tmp = gen_rtx_SET (SImode, reg, tmp1);
17473 RTX_FRAME_RELATED_P (tmp) = 1;
17474 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
17475
17476 if (return_in_pc)
17477 {
17478 /* If return_in_pc, j must be PC_REGNUM. */
17479 gcc_assert (j == PC_REGNUM);
17480 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
17481 XVECEXP (par, 0, 0) = ret_rtx;
17482 XVECEXP (par, 0, 1) = tmp;
17483 par = emit_jump_insn (par);
17484 }
17485 else
17486 {
17487 par = emit_insn (tmp);
17488 REG_NOTES (par) = dwarf;
17489 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
17490 stack_pointer_rtx, stack_pointer_rtx);
17491 }
17492
17493 }
17494 else if ((num_regs % 2) == 1 && return_in_pc)
17495 {
17496 /* There are 2 registers to be popped. So, generate the pattern
17497 pop_multiple_with_stack_update_and_return to pop in PC. */
17498 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
17499 }
17500
17501 return;
17502 }
17503
17504 /* LDRD in ARM mode needs consecutive registers as operands. This function
17505 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
17506 offset addressing and then generates one separate stack udpate. This provides
17507 more scheduling freedom, compared to writeback on every load. However,
17508 if the function returns using load into PC directly
17509 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
17510 before the last load. TODO: Add a peephole optimization to recognize
17511 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
17512 peephole optimization to merge the load at stack-offset zero
17513 with the stack update instruction using load with writeback
17514 in post-index addressing mode. */
17515 static void
17516 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
17517 {
17518 int j = 0;
17519 int offset = 0;
17520 rtx par = NULL_RTX;
17521 rtx dwarf = NULL_RTX;
17522 rtx tmp, mem;
17523
17524 /* Restore saved registers. */
17525 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
17526 j = 0;
17527 while (j <= LAST_ARM_REGNUM)
17528 if (saved_regs_mask & (1 << j))
17529 {
17530 if ((j % 2) == 0
17531 && (saved_regs_mask & (1 << (j + 1)))
17532 && (j + 1) != PC_REGNUM)
17533 {
17534 /* Current register and next register form register pair for which
17535 LDRD can be generated. PC is always the last register popped, and
17536 we handle it separately. */
17537 if (offset > 0)
17538 mem = gen_frame_mem (DImode,
17539 plus_constant (Pmode,
17540 stack_pointer_rtx,
17541 offset));
17542 else
17543 mem = gen_frame_mem (DImode, stack_pointer_rtx);
17544
17545 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
17546 RTX_FRAME_RELATED_P (tmp) = 1;
17547 tmp = emit_insn (tmp);
17548
17549 /* Generate dwarf info. */
17550
17551 dwarf = alloc_reg_note (REG_CFA_RESTORE,
17552 gen_rtx_REG (SImode, j),
17553 NULL_RTX);
17554 dwarf = alloc_reg_note (REG_CFA_RESTORE,
17555 gen_rtx_REG (SImode, j + 1),
17556 dwarf);
17557
17558 REG_NOTES (tmp) = dwarf;
17559
17560 offset += 8;
17561 j += 2;
17562 }
17563 else if (j != PC_REGNUM)
17564 {
17565 /* Emit a single word load. */
17566 if (offset > 0)
17567 mem = gen_frame_mem (SImode,
17568 plus_constant (Pmode,
17569 stack_pointer_rtx,
17570 offset));
17571 else
17572 mem = gen_frame_mem (SImode, stack_pointer_rtx);
17573
17574 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
17575 RTX_FRAME_RELATED_P (tmp) = 1;
17576 tmp = emit_insn (tmp);
17577
17578 /* Generate dwarf info. */
17579 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
17580 gen_rtx_REG (SImode, j),
17581 NULL_RTX);
17582
17583 offset += 4;
17584 j += 1;
17585 }
17586 else /* j == PC_REGNUM */
17587 j++;
17588 }
17589 else
17590 j++;
17591
17592 /* Update the stack. */
17593 if (offset > 0)
17594 {
17595 tmp = gen_rtx_SET (Pmode,
17596 stack_pointer_rtx,
17597 plus_constant (Pmode,
17598 stack_pointer_rtx,
17599 offset));
17600 RTX_FRAME_RELATED_P (tmp) = 1;
17601 emit_insn (tmp);
17602 offset = 0;
17603 }
17604
17605 if (saved_regs_mask & (1 << PC_REGNUM))
17606 {
17607 /* Only PC is to be popped. */
17608 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
17609 XVECEXP (par, 0, 0) = ret_rtx;
17610 tmp = gen_rtx_SET (SImode,
17611 gen_rtx_REG (SImode, PC_REGNUM),
17612 gen_frame_mem (SImode,
17613 gen_rtx_POST_INC (SImode,
17614 stack_pointer_rtx)));
17615 RTX_FRAME_RELATED_P (tmp) = 1;
17616 XVECEXP (par, 0, 1) = tmp;
17617 par = emit_jump_insn (par);
17618
17619 /* Generate dwarf info. */
17620 dwarf = alloc_reg_note (REG_CFA_RESTORE,
17621 gen_rtx_REG (SImode, PC_REGNUM),
17622 NULL_RTX);
17623 REG_NOTES (par) = dwarf;
17624 }
17625 }
17626
17627 /* Calculate the size of the return value that is passed in registers. */
17628 static unsigned
17629 arm_size_return_regs (void)
17630 {
17631 enum machine_mode mode;
17632
17633 if (crtl->return_rtx != 0)
17634 mode = GET_MODE (crtl->return_rtx);
17635 else
17636 mode = DECL_MODE (DECL_RESULT (current_function_decl));
17637
17638 return GET_MODE_SIZE (mode);
17639 }
17640
17641 /* Return true if the current function needs to save/restore LR. */
17642 static bool
17643 thumb_force_lr_save (void)
17644 {
17645 return !cfun->machine->lr_save_eliminated
17646 && (!leaf_function_p ()
17647 || thumb_far_jump_used_p ()
17648 || df_regs_ever_live_p (LR_REGNUM));
17649 }
17650
17651 /* We do not know if r3 will be available because
17652 we do have an indirect tailcall happening in this
17653 particular case. */
17654 static bool
17655 is_indirect_tailcall_p (rtx call)
17656 {
17657 rtx pat = PATTERN (call);
17658
17659 /* Indirect tail call. */
17660 pat = XVECEXP (pat, 0, 0);
17661 if (GET_CODE (pat) == SET)
17662 pat = SET_SRC (pat);
17663
17664 pat = XEXP (XEXP (pat, 0), 0);
17665 return REG_P (pat);
17666 }
17667
17668 /* Return true if r3 is used by any of the tail call insns in the
17669 current function. */
17670 static bool
17671 any_sibcall_could_use_r3 (void)
17672 {
17673 edge_iterator ei;
17674 edge e;
17675
17676 if (!crtl->tail_call_emit)
17677 return false;
17678 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
17679 if (e->flags & EDGE_SIBCALL)
17680 {
17681 rtx call = BB_END (e->src);
17682 if (!CALL_P (call))
17683 call = prev_nonnote_nondebug_insn (call);
17684 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
17685 if (find_regno_fusage (call, USE, 3)
17686 || is_indirect_tailcall_p (call))
17687 return true;
17688 }
17689 return false;
17690 }
17691
17692
17693 /* Compute the distance from register FROM to register TO.
17694 These can be the arg pointer (26), the soft frame pointer (25),
17695 the stack pointer (13) or the hard frame pointer (11).
17696 In thumb mode r7 is used as the soft frame pointer, if needed.
17697 Typical stack layout looks like this:
17698
17699 old stack pointer -> | |
17700 ----
17701 | | \
17702 | | saved arguments for
17703 | | vararg functions
17704 | | /
17705 --
17706 hard FP & arg pointer -> | | \
17707 | | stack
17708 | | frame
17709 | | /
17710 --
17711 | | \
17712 | | call saved
17713 | | registers
17714 soft frame pointer -> | | /
17715 --
17716 | | \
17717 | | local
17718 | | variables
17719 locals base pointer -> | | /
17720 --
17721 | | \
17722 | | outgoing
17723 | | arguments
17724 current stack pointer -> | | /
17725 --
17726
17727 For a given function some or all of these stack components
17728 may not be needed, giving rise to the possibility of
17729 eliminating some of the registers.
17730
17731 The values returned by this function must reflect the behavior
17732 of arm_expand_prologue() and arm_compute_save_reg_mask().
17733
17734 The sign of the number returned reflects the direction of stack
17735 growth, so the values are positive for all eliminations except
17736 from the soft frame pointer to the hard frame pointer.
17737
17738 SFP may point just inside the local variables block to ensure correct
17739 alignment. */
17740
17741
17742 /* Calculate stack offsets. These are used to calculate register elimination
17743 offsets and in prologue/epilogue code. Also calculates which registers
17744 should be saved. */
17745
17746 static arm_stack_offsets *
17747 arm_get_frame_offsets (void)
17748 {
17749 struct arm_stack_offsets *offsets;
17750 unsigned long func_type;
17751 int leaf;
17752 int saved;
17753 int core_saved;
17754 HOST_WIDE_INT frame_size;
17755 int i;
17756
17757 offsets = &cfun->machine->stack_offsets;
17758
17759 /* We need to know if we are a leaf function. Unfortunately, it
17760 is possible to be called after start_sequence has been called,
17761 which causes get_insns to return the insns for the sequence,
17762 not the function, which will cause leaf_function_p to return
17763 the incorrect result.
17764
17765 to know about leaf functions once reload has completed, and the
17766 frame size cannot be changed after that time, so we can safely
17767 use the cached value. */
17768
17769 if (reload_completed)
17770 return offsets;
17771
17772 /* Initially this is the size of the local variables. It will translated
17773 into an offset once we have determined the size of preceding data. */
17774 frame_size = ROUND_UP_WORD (get_frame_size ());
17775
17776 leaf = leaf_function_p ();
17777
17778 /* Space for variadic functions. */
17779 offsets->saved_args = crtl->args.pretend_args_size;
17780
17781 /* In Thumb mode this is incorrect, but never used. */
17782 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
17783 arm_compute_static_chain_stack_bytes();
17784
17785 if (TARGET_32BIT)
17786 {
17787 unsigned int regno;
17788
17789 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
17790 core_saved = bit_count (offsets->saved_regs_mask) * 4;
17791 saved = core_saved;
17792
17793 /* We know that SP will be doubleword aligned on entry, and we must
17794 preserve that condition at any subroutine call. We also require the
17795 soft frame pointer to be doubleword aligned. */
17796
17797 if (TARGET_REALLY_IWMMXT)
17798 {
17799 /* Check for the call-saved iWMMXt registers. */
17800 for (regno = FIRST_IWMMXT_REGNUM;
17801 regno <= LAST_IWMMXT_REGNUM;
17802 regno++)
17803 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
17804 saved += 8;
17805 }
17806
17807 func_type = arm_current_func_type ();
17808 /* Space for saved VFP registers. */
17809 if (! IS_VOLATILE (func_type)
17810 && TARGET_HARD_FLOAT && TARGET_VFP)
17811 saved += arm_get_vfp_saved_size ();
17812 }
17813 else /* TARGET_THUMB1 */
17814 {
17815 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
17816 core_saved = bit_count (offsets->saved_regs_mask) * 4;
17817 saved = core_saved;
17818 if (TARGET_BACKTRACE)
17819 saved += 16;
17820 }
17821
17822 /* Saved registers include the stack frame. */
17823 offsets->saved_regs = offsets->saved_args + saved +
17824 arm_compute_static_chain_stack_bytes();
17825 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
17826 /* A leaf function does not need any stack alignment if it has nothing
17827 on the stack. */
17828 if (leaf && frame_size == 0
17829 /* However if it calls alloca(), we have a dynamically allocated
17830 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
17831 && ! cfun->calls_alloca)
17832 {
17833 offsets->outgoing_args = offsets->soft_frame;
17834 offsets->locals_base = offsets->soft_frame;
17835 return offsets;
17836 }
17837
17838 /* Ensure SFP has the correct alignment. */
17839 if (ARM_DOUBLEWORD_ALIGN
17840 && (offsets->soft_frame & 7))
17841 {
17842 offsets->soft_frame += 4;
17843 /* Try to align stack by pushing an extra reg. Don't bother doing this
17844 when there is a stack frame as the alignment will be rolled into
17845 the normal stack adjustment. */
17846 if (frame_size + crtl->outgoing_args_size == 0)
17847 {
17848 int reg = -1;
17849
17850 /* If it is safe to use r3, then do so. This sometimes
17851 generates better code on Thumb-2 by avoiding the need to
17852 use 32-bit push/pop instructions. */
17853 if (! any_sibcall_could_use_r3 ()
17854 && arm_size_return_regs () <= 12
17855 && (offsets->saved_regs_mask & (1 << 3)) == 0
17856 && (TARGET_THUMB2 || !current_tune->prefer_ldrd_strd))
17857 {
17858 reg = 3;
17859 }
17860 else
17861 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
17862 {
17863 /* Avoid fixed registers; they may be changed at
17864 arbitrary times so it's unsafe to restore them
17865 during the epilogue. */
17866 if (!fixed_regs[i]
17867 && (offsets->saved_regs_mask & (1 << i)) == 0)
17868 {
17869 reg = i;
17870 break;
17871 }
17872 }
17873
17874 if (reg != -1)
17875 {
17876 offsets->saved_regs += 4;
17877 offsets->saved_regs_mask |= (1 << reg);
17878 }
17879 }
17880 }
17881
17882 offsets->locals_base = offsets->soft_frame + frame_size;
17883 offsets->outgoing_args = (offsets->locals_base
17884 + crtl->outgoing_args_size);
17885
17886 if (ARM_DOUBLEWORD_ALIGN)
17887 {
17888 /* Ensure SP remains doubleword aligned. */
17889 if (offsets->outgoing_args & 7)
17890 offsets->outgoing_args += 4;
17891 gcc_assert (!(offsets->outgoing_args & 7));
17892 }
17893
17894 return offsets;
17895 }
17896
17897
17898 /* Calculate the relative offsets for the different stack pointers. Positive
17899 offsets are in the direction of stack growth. */
17900
17901 HOST_WIDE_INT
17902 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
17903 {
17904 arm_stack_offsets *offsets;
17905
17906 offsets = arm_get_frame_offsets ();
17907
17908 /* OK, now we have enough information to compute the distances.
17909 There must be an entry in these switch tables for each pair
17910 of registers in ELIMINABLE_REGS, even if some of the entries
17911 seem to be redundant or useless. */
17912 switch (from)
17913 {
17914 case ARG_POINTER_REGNUM:
17915 switch (to)
17916 {
17917 case THUMB_HARD_FRAME_POINTER_REGNUM:
17918 return 0;
17919
17920 case FRAME_POINTER_REGNUM:
17921 /* This is the reverse of the soft frame pointer
17922 to hard frame pointer elimination below. */
17923 return offsets->soft_frame - offsets->saved_args;
17924
17925 case ARM_HARD_FRAME_POINTER_REGNUM:
17926 /* This is only non-zero in the case where the static chain register
17927 is stored above the frame. */
17928 return offsets->frame - offsets->saved_args - 4;
17929
17930 case STACK_POINTER_REGNUM:
17931 /* If nothing has been pushed on the stack at all
17932 then this will return -4. This *is* correct! */
17933 return offsets->outgoing_args - (offsets->saved_args + 4);
17934
17935 default:
17936 gcc_unreachable ();
17937 }
17938 gcc_unreachable ();
17939
17940 case FRAME_POINTER_REGNUM:
17941 switch (to)
17942 {
17943 case THUMB_HARD_FRAME_POINTER_REGNUM:
17944 return 0;
17945
17946 case ARM_HARD_FRAME_POINTER_REGNUM:
17947 /* The hard frame pointer points to the top entry in the
17948 stack frame. The soft frame pointer to the bottom entry
17949 in the stack frame. If there is no stack frame at all,
17950 then they are identical. */
17951
17952 return offsets->frame - offsets->soft_frame;
17953
17954 case STACK_POINTER_REGNUM:
17955 return offsets->outgoing_args - offsets->soft_frame;
17956
17957 default:
17958 gcc_unreachable ();
17959 }
17960 gcc_unreachable ();
17961
17962 default:
17963 /* You cannot eliminate from the stack pointer.
17964 In theory you could eliminate from the hard frame
17965 pointer to the stack pointer, but this will never
17966 happen, since if a stack frame is not needed the
17967 hard frame pointer will never be used. */
17968 gcc_unreachable ();
17969 }
17970 }
17971
17972 /* Given FROM and TO register numbers, say whether this elimination is
17973 allowed. Frame pointer elimination is automatically handled.
17974
17975 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
17976 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
17977 pointer, we must eliminate FRAME_POINTER_REGNUM into
17978 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
17979 ARG_POINTER_REGNUM. */
17980
17981 bool
17982 arm_can_eliminate (const int from, const int to)
17983 {
17984 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
17985 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
17986 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
17987 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
17988 true);
17989 }
17990
17991 /* Emit RTL to save coprocessor registers on function entry. Returns the
17992 number of bytes pushed. */
17993
17994 static int
17995 arm_save_coproc_regs(void)
17996 {
17997 int saved_size = 0;
17998 unsigned reg;
17999 unsigned start_reg;
18000 rtx insn;
18001
18002 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
18003 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
18004 {
18005 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
18006 insn = gen_rtx_MEM (V2SImode, insn);
18007 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
18008 RTX_FRAME_RELATED_P (insn) = 1;
18009 saved_size += 8;
18010 }
18011
18012 if (TARGET_HARD_FLOAT && TARGET_VFP)
18013 {
18014 start_reg = FIRST_VFP_REGNUM;
18015
18016 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
18017 {
18018 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
18019 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
18020 {
18021 if (start_reg != reg)
18022 saved_size += vfp_emit_fstmd (start_reg,
18023 (reg - start_reg) / 2);
18024 start_reg = reg + 2;
18025 }
18026 }
18027 if (start_reg != reg)
18028 saved_size += vfp_emit_fstmd (start_reg,
18029 (reg - start_reg) / 2);
18030 }
18031 return saved_size;
18032 }
18033
18034
18035 /* Set the Thumb frame pointer from the stack pointer. */
18036
18037 static void
18038 thumb_set_frame_pointer (arm_stack_offsets *offsets)
18039 {
18040 HOST_WIDE_INT amount;
18041 rtx insn, dwarf;
18042
18043 amount = offsets->outgoing_args - offsets->locals_base;
18044 if (amount < 1024)
18045 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
18046 stack_pointer_rtx, GEN_INT (amount)));
18047 else
18048 {
18049 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
18050 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
18051 expects the first two operands to be the same. */
18052 if (TARGET_THUMB2)
18053 {
18054 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
18055 stack_pointer_rtx,
18056 hard_frame_pointer_rtx));
18057 }
18058 else
18059 {
18060 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
18061 hard_frame_pointer_rtx,
18062 stack_pointer_rtx));
18063 }
18064 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
18065 plus_constant (Pmode, stack_pointer_rtx, amount));
18066 RTX_FRAME_RELATED_P (dwarf) = 1;
18067 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
18068 }
18069
18070 RTX_FRAME_RELATED_P (insn) = 1;
18071 }
18072
18073 /* Generate the prologue instructions for entry into an ARM or Thumb-2
18074 function. */
18075 void
18076 arm_expand_prologue (void)
18077 {
18078 rtx amount;
18079 rtx insn;
18080 rtx ip_rtx;
18081 unsigned long live_regs_mask;
18082 unsigned long func_type;
18083 int fp_offset = 0;
18084 int saved_pretend_args = 0;
18085 int saved_regs = 0;
18086 unsigned HOST_WIDE_INT args_to_push;
18087 arm_stack_offsets *offsets;
18088
18089 func_type = arm_current_func_type ();
18090
18091 /* Naked functions don't have prologues. */
18092 if (IS_NAKED (func_type))
18093 return;
18094
18095 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
18096 args_to_push = crtl->args.pretend_args_size;
18097
18098 /* Compute which register we will have to save onto the stack. */
18099 offsets = arm_get_frame_offsets ();
18100 live_regs_mask = offsets->saved_regs_mask;
18101
18102 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
18103
18104 if (IS_STACKALIGN (func_type))
18105 {
18106 rtx r0, r1;
18107
18108 /* Handle a word-aligned stack pointer. We generate the following:
18109
18110 mov r0, sp
18111 bic r1, r0, #7
18112 mov sp, r1
18113 <save and restore r0 in normal prologue/epilogue>
18114 mov sp, r0
18115 bx lr
18116
18117 The unwinder doesn't need to know about the stack realignment.
18118 Just tell it we saved SP in r0. */
18119 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
18120
18121 r0 = gen_rtx_REG (SImode, 0);
18122 r1 = gen_rtx_REG (SImode, 1);
18123
18124 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
18125 RTX_FRAME_RELATED_P (insn) = 1;
18126 add_reg_note (insn, REG_CFA_REGISTER, NULL);
18127
18128 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
18129
18130 /* ??? The CFA changes here, which may cause GDB to conclude that it
18131 has entered a different function. That said, the unwind info is
18132 correct, individually, before and after this instruction because
18133 we've described the save of SP, which will override the default
18134 handling of SP as restoring from the CFA. */
18135 emit_insn (gen_movsi (stack_pointer_rtx, r1));
18136 }
18137
18138 /* For APCS frames, if IP register is clobbered
18139 when creating frame, save that register in a special
18140 way. */
18141 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
18142 {
18143 if (IS_INTERRUPT (func_type))
18144 {
18145 /* Interrupt functions must not corrupt any registers.
18146 Creating a frame pointer however, corrupts the IP
18147 register, so we must push it first. */
18148 emit_multi_reg_push (1 << IP_REGNUM);
18149
18150 /* Do not set RTX_FRAME_RELATED_P on this insn.
18151 The dwarf stack unwinding code only wants to see one
18152 stack decrement per function, and this is not it. If
18153 this instruction is labeled as being part of the frame
18154 creation sequence then dwarf2out_frame_debug_expr will
18155 die when it encounters the assignment of IP to FP
18156 later on, since the use of SP here establishes SP as
18157 the CFA register and not IP.
18158
18159 Anyway this instruction is not really part of the stack
18160 frame creation although it is part of the prologue. */
18161 }
18162 else if (IS_NESTED (func_type))
18163 {
18164 /* The static chain register is the same as the IP register
18165 used as a scratch register during stack frame creation.
18166 To get around this need to find somewhere to store IP
18167 whilst the frame is being created. We try the following
18168 places in order:
18169
18170 1. The last argument register r3.
18171 2. A slot on the stack above the frame. (This only
18172 works if the function is not a varargs function).
18173 3. Register r3 again, after pushing the argument registers
18174 onto the stack.
18175
18176 Note - we only need to tell the dwarf2 backend about the SP
18177 adjustment in the second variant; the static chain register
18178 doesn't need to be unwound, as it doesn't contain a value
18179 inherited from the caller. */
18180
18181 if (!arm_r3_live_at_start_p ())
18182 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
18183 else if (args_to_push == 0)
18184 {
18185 rtx dwarf;
18186
18187 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
18188 saved_regs += 4;
18189
18190 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
18191 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
18192 fp_offset = 4;
18193
18194 /* Just tell the dwarf backend that we adjusted SP. */
18195 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
18196 plus_constant (Pmode, stack_pointer_rtx,
18197 -fp_offset));
18198 RTX_FRAME_RELATED_P (insn) = 1;
18199 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
18200 }
18201 else
18202 {
18203 /* Store the args on the stack. */
18204 if (cfun->machine->uses_anonymous_args)
18205 insn = emit_multi_reg_push
18206 ((0xf0 >> (args_to_push / 4)) & 0xf);
18207 else
18208 insn = emit_insn
18209 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
18210 GEN_INT (- args_to_push)));
18211
18212 RTX_FRAME_RELATED_P (insn) = 1;
18213
18214 saved_pretend_args = 1;
18215 fp_offset = args_to_push;
18216 args_to_push = 0;
18217
18218 /* Now reuse r3 to preserve IP. */
18219 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
18220 }
18221 }
18222
18223 insn = emit_set_insn (ip_rtx,
18224 plus_constant (Pmode, stack_pointer_rtx,
18225 fp_offset));
18226 RTX_FRAME_RELATED_P (insn) = 1;
18227 }
18228
18229 if (args_to_push)
18230 {
18231 /* Push the argument registers, or reserve space for them. */
18232 if (cfun->machine->uses_anonymous_args)
18233 insn = emit_multi_reg_push
18234 ((0xf0 >> (args_to_push / 4)) & 0xf);
18235 else
18236 insn = emit_insn
18237 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
18238 GEN_INT (- args_to_push)));
18239 RTX_FRAME_RELATED_P (insn) = 1;
18240 }
18241
18242 /* If this is an interrupt service routine, and the link register
18243 is going to be pushed, and we're not generating extra
18244 push of IP (needed when frame is needed and frame layout if apcs),
18245 subtracting four from LR now will mean that the function return
18246 can be done with a single instruction. */
18247 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
18248 && (live_regs_mask & (1 << LR_REGNUM)) != 0
18249 && !(frame_pointer_needed && TARGET_APCS_FRAME)
18250 && TARGET_ARM)
18251 {
18252 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
18253
18254 emit_set_insn (lr, plus_constant (SImode, lr, -4));
18255 }
18256
18257 if (live_regs_mask)
18258 {
18259 saved_regs += bit_count (live_regs_mask) * 4;
18260 if (optimize_size && !frame_pointer_needed
18261 && saved_regs == offsets->saved_regs - offsets->saved_args)
18262 {
18263 /* If no coprocessor registers are being pushed and we don't have
18264 to worry about a frame pointer then push extra registers to
18265 create the stack frame. This is done is a way that does not
18266 alter the frame layout, so is independent of the epilogue. */
18267 int n;
18268 int frame;
18269 n = 0;
18270 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
18271 n++;
18272 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
18273 if (frame && n * 4 >= frame)
18274 {
18275 n = frame / 4;
18276 live_regs_mask |= (1 << n) - 1;
18277 saved_regs += frame;
18278 }
18279 }
18280
18281 if (current_tune->prefer_ldrd_strd
18282 && !optimize_function_for_size_p (cfun))
18283 {
18284 if (TARGET_THUMB2)
18285 {
18286 thumb2_emit_strd_push (live_regs_mask);
18287 }
18288 else if (TARGET_ARM
18289 && !TARGET_APCS_FRAME
18290 && !IS_INTERRUPT (func_type))
18291 {
18292 arm_emit_strd_push (live_regs_mask);
18293 }
18294 else
18295 {
18296 insn = emit_multi_reg_push (live_regs_mask);
18297 RTX_FRAME_RELATED_P (insn) = 1;
18298 }
18299 }
18300 else
18301 {
18302 insn = emit_multi_reg_push (live_regs_mask);
18303 RTX_FRAME_RELATED_P (insn) = 1;
18304 }
18305 }
18306
18307 if (! IS_VOLATILE (func_type))
18308 saved_regs += arm_save_coproc_regs ();
18309
18310 if (frame_pointer_needed && TARGET_ARM)
18311 {
18312 /* Create the new frame pointer. */
18313 if (TARGET_APCS_FRAME)
18314 {
18315 insn = GEN_INT (-(4 + args_to_push + fp_offset));
18316 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
18317 RTX_FRAME_RELATED_P (insn) = 1;
18318
18319 if (IS_NESTED (func_type))
18320 {
18321 /* Recover the static chain register. */
18322 if (!arm_r3_live_at_start_p () || saved_pretend_args)
18323 insn = gen_rtx_REG (SImode, 3);
18324 else /* if (crtl->args.pretend_args_size == 0) */
18325 {
18326 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
18327 insn = gen_frame_mem (SImode, insn);
18328 }
18329 emit_set_insn (ip_rtx, insn);
18330 /* Add a USE to stop propagate_one_insn() from barfing. */
18331 emit_insn (gen_force_register_use (ip_rtx));
18332 }
18333 }
18334 else
18335 {
18336 insn = GEN_INT (saved_regs - 4);
18337 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
18338 stack_pointer_rtx, insn));
18339 RTX_FRAME_RELATED_P (insn) = 1;
18340 }
18341 }
18342
18343 if (flag_stack_usage_info)
18344 current_function_static_stack_size
18345 = offsets->outgoing_args - offsets->saved_args;
18346
18347 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
18348 {
18349 /* This add can produce multiple insns for a large constant, so we
18350 need to get tricky. */
18351 rtx last = get_last_insn ();
18352
18353 amount = GEN_INT (offsets->saved_args + saved_regs
18354 - offsets->outgoing_args);
18355
18356 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
18357 amount));
18358 do
18359 {
18360 last = last ? NEXT_INSN (last) : get_insns ();
18361 RTX_FRAME_RELATED_P (last) = 1;
18362 }
18363 while (last != insn);
18364
18365 /* If the frame pointer is needed, emit a special barrier that
18366 will prevent the scheduler from moving stores to the frame
18367 before the stack adjustment. */
18368 if (frame_pointer_needed)
18369 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
18370 hard_frame_pointer_rtx));
18371 }
18372
18373
18374 if (frame_pointer_needed && TARGET_THUMB2)
18375 thumb_set_frame_pointer (offsets);
18376
18377 if (flag_pic && arm_pic_register != INVALID_REGNUM)
18378 {
18379 unsigned long mask;
18380
18381 mask = live_regs_mask;
18382 mask &= THUMB2_WORK_REGS;
18383 if (!IS_NESTED (func_type))
18384 mask |= (1 << IP_REGNUM);
18385 arm_load_pic_register (mask);
18386 }
18387
18388 /* If we are profiling, make sure no instructions are scheduled before
18389 the call to mcount. Similarly if the user has requested no
18390 scheduling in the prolog. Similarly if we want non-call exceptions
18391 using the EABI unwinder, to prevent faulting instructions from being
18392 swapped with a stack adjustment. */
18393 if (crtl->profile || !TARGET_SCHED_PROLOG
18394 || (arm_except_unwind_info (&global_options) == UI_TARGET
18395 && cfun->can_throw_non_call_exceptions))
18396 emit_insn (gen_blockage ());
18397
18398 /* If the link register is being kept alive, with the return address in it,
18399 then make sure that it does not get reused by the ce2 pass. */
18400 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
18401 cfun->machine->lr_save_eliminated = 1;
18402 }
18403 \f
18404 /* Print condition code to STREAM. Helper function for arm_print_operand. */
18405 static void
18406 arm_print_condition (FILE *stream)
18407 {
18408 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
18409 {
18410 /* Branch conversion is not implemented for Thumb-2. */
18411 if (TARGET_THUMB)
18412 {
18413 output_operand_lossage ("predicated Thumb instruction");
18414 return;
18415 }
18416 if (current_insn_predicate != NULL)
18417 {
18418 output_operand_lossage
18419 ("predicated instruction in conditional sequence");
18420 return;
18421 }
18422
18423 fputs (arm_condition_codes[arm_current_cc], stream);
18424 }
18425 else if (current_insn_predicate)
18426 {
18427 enum arm_cond_code code;
18428
18429 if (TARGET_THUMB1)
18430 {
18431 output_operand_lossage ("predicated Thumb instruction");
18432 return;
18433 }
18434
18435 code = get_arm_condition_code (current_insn_predicate);
18436 fputs (arm_condition_codes[code], stream);
18437 }
18438 }
18439
18440
18441 /* If CODE is 'd', then the X is a condition operand and the instruction
18442 should only be executed if the condition is true.
18443 if CODE is 'D', then the X is a condition operand and the instruction
18444 should only be executed if the condition is false: however, if the mode
18445 of the comparison is CCFPEmode, then always execute the instruction -- we
18446 do this because in these circumstances !GE does not necessarily imply LT;
18447 in these cases the instruction pattern will take care to make sure that
18448 an instruction containing %d will follow, thereby undoing the effects of
18449 doing this instruction unconditionally.
18450 If CODE is 'N' then X is a floating point operand that must be negated
18451 before output.
18452 If CODE is 'B' then output a bitwise inverted value of X (a const int).
18453 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
18454 static void
18455 arm_print_operand (FILE *stream, rtx x, int code)
18456 {
18457 switch (code)
18458 {
18459 case '@':
18460 fputs (ASM_COMMENT_START, stream);
18461 return;
18462
18463 case '_':
18464 fputs (user_label_prefix, stream);
18465 return;
18466
18467 case '|':
18468 fputs (REGISTER_PREFIX, stream);
18469 return;
18470
18471 case '?':
18472 arm_print_condition (stream);
18473 return;
18474
18475 case '(':
18476 /* Nothing in unified syntax, otherwise the current condition code. */
18477 if (!TARGET_UNIFIED_ASM)
18478 arm_print_condition (stream);
18479 break;
18480
18481 case ')':
18482 /* The current condition code in unified syntax, otherwise nothing. */
18483 if (TARGET_UNIFIED_ASM)
18484 arm_print_condition (stream);
18485 break;
18486
18487 case '.':
18488 /* The current condition code for a condition code setting instruction.
18489 Preceded by 's' in unified syntax, otherwise followed by 's'. */
18490 if (TARGET_UNIFIED_ASM)
18491 {
18492 fputc('s', stream);
18493 arm_print_condition (stream);
18494 }
18495 else
18496 {
18497 arm_print_condition (stream);
18498 fputc('s', stream);
18499 }
18500 return;
18501
18502 case '!':
18503 /* If the instruction is conditionally executed then print
18504 the current condition code, otherwise print 's'. */
18505 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
18506 if (current_insn_predicate)
18507 arm_print_condition (stream);
18508 else
18509 fputc('s', stream);
18510 break;
18511
18512 /* %# is a "break" sequence. It doesn't output anything, but is used to
18513 separate e.g. operand numbers from following text, if that text consists
18514 of further digits which we don't want to be part of the operand
18515 number. */
18516 case '#':
18517 return;
18518
18519 case 'N':
18520 {
18521 REAL_VALUE_TYPE r;
18522 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
18523 r = real_value_negate (&r);
18524 fprintf (stream, "%s", fp_const_from_val (&r));
18525 }
18526 return;
18527
18528 /* An integer or symbol address without a preceding # sign. */
18529 case 'c':
18530 switch (GET_CODE (x))
18531 {
18532 case CONST_INT:
18533 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
18534 break;
18535
18536 case SYMBOL_REF:
18537 output_addr_const (stream, x);
18538 break;
18539
18540 case CONST:
18541 if (GET_CODE (XEXP (x, 0)) == PLUS
18542 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
18543 {
18544 output_addr_const (stream, x);
18545 break;
18546 }
18547 /* Fall through. */
18548
18549 default:
18550 output_operand_lossage ("Unsupported operand for code '%c'", code);
18551 }
18552 return;
18553
18554 /* An integer that we want to print in HEX. */
18555 case 'x':
18556 switch (GET_CODE (x))
18557 {
18558 case CONST_INT:
18559 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
18560 break;
18561
18562 default:
18563 output_operand_lossage ("Unsupported operand for code '%c'", code);
18564 }
18565 return;
18566
18567 case 'B':
18568 if (CONST_INT_P (x))
18569 {
18570 HOST_WIDE_INT val;
18571 val = ARM_SIGN_EXTEND (~INTVAL (x));
18572 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
18573 }
18574 else
18575 {
18576 putc ('~', stream);
18577 output_addr_const (stream, x);
18578 }
18579 return;
18580
18581 case 'L':
18582 /* The low 16 bits of an immediate constant. */
18583 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
18584 return;
18585
18586 case 'i':
18587 fprintf (stream, "%s", arithmetic_instr (x, 1));
18588 return;
18589
18590 case 'I':
18591 fprintf (stream, "%s", arithmetic_instr (x, 0));
18592 return;
18593
18594 case 'S':
18595 {
18596 HOST_WIDE_INT val;
18597 const char *shift;
18598
18599 shift = shift_op (x, &val);
18600
18601 if (shift)
18602 {
18603 fprintf (stream, ", %s ", shift);
18604 if (val == -1)
18605 arm_print_operand (stream, XEXP (x, 1), 0);
18606 else
18607 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
18608 }
18609 }
18610 return;
18611
18612 /* An explanation of the 'Q', 'R' and 'H' register operands:
18613
18614 In a pair of registers containing a DI or DF value the 'Q'
18615 operand returns the register number of the register containing
18616 the least significant part of the value. The 'R' operand returns
18617 the register number of the register containing the most
18618 significant part of the value.
18619
18620 The 'H' operand returns the higher of the two register numbers.
18621 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
18622 same as the 'Q' operand, since the most significant part of the
18623 value is held in the lower number register. The reverse is true
18624 on systems where WORDS_BIG_ENDIAN is false.
18625
18626 The purpose of these operands is to distinguish between cases
18627 where the endian-ness of the values is important (for example
18628 when they are added together), and cases where the endian-ness
18629 is irrelevant, but the order of register operations is important.
18630 For example when loading a value from memory into a register
18631 pair, the endian-ness does not matter. Provided that the value
18632 from the lower memory address is put into the lower numbered
18633 register, and the value from the higher address is put into the
18634 higher numbered register, the load will work regardless of whether
18635 the value being loaded is big-wordian or little-wordian. The
18636 order of the two register loads can matter however, if the address
18637 of the memory location is actually held in one of the registers
18638 being overwritten by the load.
18639
18640 The 'Q' and 'R' constraints are also available for 64-bit
18641 constants. */
18642 case 'Q':
18643 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
18644 {
18645 rtx part = gen_lowpart (SImode, x);
18646 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
18647 return;
18648 }
18649
18650 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
18651 {
18652 output_operand_lossage ("invalid operand for code '%c'", code);
18653 return;
18654 }
18655
18656 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
18657 return;
18658
18659 case 'R':
18660 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
18661 {
18662 enum machine_mode mode = GET_MODE (x);
18663 rtx part;
18664
18665 if (mode == VOIDmode)
18666 mode = DImode;
18667 part = gen_highpart_mode (SImode, mode, x);
18668 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
18669 return;
18670 }
18671
18672 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
18673 {
18674 output_operand_lossage ("invalid operand for code '%c'", code);
18675 return;
18676 }
18677
18678 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
18679 return;
18680
18681 case 'H':
18682 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
18683 {
18684 output_operand_lossage ("invalid operand for code '%c'", code);
18685 return;
18686 }
18687
18688 asm_fprintf (stream, "%r", REGNO (x) + 1);
18689 return;
18690
18691 case 'J':
18692 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
18693 {
18694 output_operand_lossage ("invalid operand for code '%c'", code);
18695 return;
18696 }
18697
18698 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
18699 return;
18700
18701 case 'K':
18702 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
18703 {
18704 output_operand_lossage ("invalid operand for code '%c'", code);
18705 return;
18706 }
18707
18708 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
18709 return;
18710
18711 case 'm':
18712 asm_fprintf (stream, "%r",
18713 REG_P (XEXP (x, 0))
18714 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
18715 return;
18716
18717 case 'M':
18718 asm_fprintf (stream, "{%r-%r}",
18719 REGNO (x),
18720 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
18721 return;
18722
18723 /* Like 'M', but writing doubleword vector registers, for use by Neon
18724 insns. */
18725 case 'h':
18726 {
18727 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
18728 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
18729 if (numregs == 1)
18730 asm_fprintf (stream, "{d%d}", regno);
18731 else
18732 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
18733 }
18734 return;
18735
18736 case 'd':
18737 /* CONST_TRUE_RTX means always -- that's the default. */
18738 if (x == const_true_rtx)
18739 return;
18740
18741 if (!COMPARISON_P (x))
18742 {
18743 output_operand_lossage ("invalid operand for code '%c'", code);
18744 return;
18745 }
18746
18747 fputs (arm_condition_codes[get_arm_condition_code (x)],
18748 stream);
18749 return;
18750
18751 case 'D':
18752 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
18753 want to do that. */
18754 if (x == const_true_rtx)
18755 {
18756 output_operand_lossage ("instruction never executed");
18757 return;
18758 }
18759 if (!COMPARISON_P (x))
18760 {
18761 output_operand_lossage ("invalid operand for code '%c'", code);
18762 return;
18763 }
18764
18765 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
18766 (get_arm_condition_code (x))],
18767 stream);
18768 return;
18769
18770 case 's':
18771 case 'V':
18772 case 'W':
18773 case 'X':
18774 case 'Y':
18775 case 'Z':
18776 /* Former Maverick support, removed after GCC-4.7. */
18777 output_operand_lossage ("obsolete Maverick format code '%c'", code);
18778 return;
18779
18780 case 'U':
18781 if (!REG_P (x)
18782 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
18783 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
18784 /* Bad value for wCG register number. */
18785 {
18786 output_operand_lossage ("invalid operand for code '%c'", code);
18787 return;
18788 }
18789
18790 else
18791 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
18792 return;
18793
18794 /* Print an iWMMXt control register name. */
18795 case 'w':
18796 if (!CONST_INT_P (x)
18797 || INTVAL (x) < 0
18798 || INTVAL (x) >= 16)
18799 /* Bad value for wC register number. */
18800 {
18801 output_operand_lossage ("invalid operand for code '%c'", code);
18802 return;
18803 }
18804
18805 else
18806 {
18807 static const char * wc_reg_names [16] =
18808 {
18809 "wCID", "wCon", "wCSSF", "wCASF",
18810 "wC4", "wC5", "wC6", "wC7",
18811 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
18812 "wC12", "wC13", "wC14", "wC15"
18813 };
18814
18815 fputs (wc_reg_names [INTVAL (x)], stream);
18816 }
18817 return;
18818
18819 /* Print the high single-precision register of a VFP double-precision
18820 register. */
18821 case 'p':
18822 {
18823 int mode = GET_MODE (x);
18824 int regno;
18825
18826 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
18827 {
18828 output_operand_lossage ("invalid operand for code '%c'", code);
18829 return;
18830 }
18831
18832 regno = REGNO (x);
18833 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
18834 {
18835 output_operand_lossage ("invalid operand for code '%c'", code);
18836 return;
18837 }
18838
18839 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
18840 }
18841 return;
18842
18843 /* Print a VFP/Neon double precision or quad precision register name. */
18844 case 'P':
18845 case 'q':
18846 {
18847 int mode = GET_MODE (x);
18848 int is_quad = (code == 'q');
18849 int regno;
18850
18851 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
18852 {
18853 output_operand_lossage ("invalid operand for code '%c'", code);
18854 return;
18855 }
18856
18857 if (!REG_P (x)
18858 || !IS_VFP_REGNUM (REGNO (x)))
18859 {
18860 output_operand_lossage ("invalid operand for code '%c'", code);
18861 return;
18862 }
18863
18864 regno = REGNO (x);
18865 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
18866 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
18867 {
18868 output_operand_lossage ("invalid operand for code '%c'", code);
18869 return;
18870 }
18871
18872 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
18873 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
18874 }
18875 return;
18876
18877 /* These two codes print the low/high doubleword register of a Neon quad
18878 register, respectively. For pair-structure types, can also print
18879 low/high quadword registers. */
18880 case 'e':
18881 case 'f':
18882 {
18883 int mode = GET_MODE (x);
18884 int regno;
18885
18886 if ((GET_MODE_SIZE (mode) != 16
18887 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
18888 {
18889 output_operand_lossage ("invalid operand for code '%c'", code);
18890 return;
18891 }
18892
18893 regno = REGNO (x);
18894 if (!NEON_REGNO_OK_FOR_QUAD (regno))
18895 {
18896 output_operand_lossage ("invalid operand for code '%c'", code);
18897 return;
18898 }
18899
18900 if (GET_MODE_SIZE (mode) == 16)
18901 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
18902 + (code == 'f' ? 1 : 0));
18903 else
18904 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
18905 + (code == 'f' ? 1 : 0));
18906 }
18907 return;
18908
18909 /* Print a VFPv3 floating-point constant, represented as an integer
18910 index. */
18911 case 'G':
18912 {
18913 int index = vfp3_const_double_index (x);
18914 gcc_assert (index != -1);
18915 fprintf (stream, "%d", index);
18916 }
18917 return;
18918
18919 /* Print bits representing opcode features for Neon.
18920
18921 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
18922 and polynomials as unsigned.
18923
18924 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
18925
18926 Bit 2 is 1 for rounding functions, 0 otherwise. */
18927
18928 /* Identify the type as 's', 'u', 'p' or 'f'. */
18929 case 'T':
18930 {
18931 HOST_WIDE_INT bits = INTVAL (x);
18932 fputc ("uspf"[bits & 3], stream);
18933 }
18934 return;
18935
18936 /* Likewise, but signed and unsigned integers are both 'i'. */
18937 case 'F':
18938 {
18939 HOST_WIDE_INT bits = INTVAL (x);
18940 fputc ("iipf"[bits & 3], stream);
18941 }
18942 return;
18943
18944 /* As for 'T', but emit 'u' instead of 'p'. */
18945 case 't':
18946 {
18947 HOST_WIDE_INT bits = INTVAL (x);
18948 fputc ("usuf"[bits & 3], stream);
18949 }
18950 return;
18951
18952 /* Bit 2: rounding (vs none). */
18953 case 'O':
18954 {
18955 HOST_WIDE_INT bits = INTVAL (x);
18956 fputs ((bits & 4) != 0 ? "r" : "", stream);
18957 }
18958 return;
18959
18960 /* Memory operand for vld1/vst1 instruction. */
18961 case 'A':
18962 {
18963 rtx addr;
18964 bool postinc = FALSE;
18965 unsigned align, memsize, align_bits;
18966
18967 gcc_assert (MEM_P (x));
18968 addr = XEXP (x, 0);
18969 if (GET_CODE (addr) == POST_INC)
18970 {
18971 postinc = 1;
18972 addr = XEXP (addr, 0);
18973 }
18974 asm_fprintf (stream, "[%r", REGNO (addr));
18975
18976 /* We know the alignment of this access, so we can emit a hint in the
18977 instruction (for some alignments) as an aid to the memory subsystem
18978 of the target. */
18979 align = MEM_ALIGN (x) >> 3;
18980 memsize = MEM_SIZE (x);
18981
18982 /* Only certain alignment specifiers are supported by the hardware. */
18983 if (memsize == 32 && (align % 32) == 0)
18984 align_bits = 256;
18985 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
18986 align_bits = 128;
18987 else if (memsize >= 8 && (align % 8) == 0)
18988 align_bits = 64;
18989 else
18990 align_bits = 0;
18991
18992 if (align_bits != 0)
18993 asm_fprintf (stream, ":%d", align_bits);
18994
18995 asm_fprintf (stream, "]");
18996
18997 if (postinc)
18998 fputs("!", stream);
18999 }
19000 return;
19001
19002 case 'C':
19003 {
19004 rtx addr;
19005
19006 gcc_assert (MEM_P (x));
19007 addr = XEXP (x, 0);
19008 gcc_assert (REG_P (addr));
19009 asm_fprintf (stream, "[%r]", REGNO (addr));
19010 }
19011 return;
19012
19013 /* Translate an S register number into a D register number and element index. */
19014 case 'y':
19015 {
19016 int mode = GET_MODE (x);
19017 int regno;
19018
19019 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
19020 {
19021 output_operand_lossage ("invalid operand for code '%c'", code);
19022 return;
19023 }
19024
19025 regno = REGNO (x);
19026 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
19027 {
19028 output_operand_lossage ("invalid operand for code '%c'", code);
19029 return;
19030 }
19031
19032 regno = regno - FIRST_VFP_REGNUM;
19033 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
19034 }
19035 return;
19036
19037 case 'v':
19038 gcc_assert (CONST_DOUBLE_P (x));
19039 fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
19040 return;
19041
19042 /* Register specifier for vld1.16/vst1.16. Translate the S register
19043 number into a D register number and element index. */
19044 case 'z':
19045 {
19046 int mode = GET_MODE (x);
19047 int regno;
19048
19049 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
19050 {
19051 output_operand_lossage ("invalid operand for code '%c'", code);
19052 return;
19053 }
19054
19055 regno = REGNO (x);
19056 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
19057 {
19058 output_operand_lossage ("invalid operand for code '%c'", code);
19059 return;
19060 }
19061
19062 regno = regno - FIRST_VFP_REGNUM;
19063 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
19064 }
19065 return;
19066
19067 default:
19068 if (x == 0)
19069 {
19070 output_operand_lossage ("missing operand");
19071 return;
19072 }
19073
19074 switch (GET_CODE (x))
19075 {
19076 case REG:
19077 asm_fprintf (stream, "%r", REGNO (x));
19078 break;
19079
19080 case MEM:
19081 output_memory_reference_mode = GET_MODE (x);
19082 output_address (XEXP (x, 0));
19083 break;
19084
19085 case CONST_DOUBLE:
19086 if (TARGET_NEON)
19087 {
19088 char fpstr[20];
19089 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
19090 sizeof (fpstr), 0, 1);
19091 fprintf (stream, "#%s", fpstr);
19092 }
19093 else
19094 fprintf (stream, "#%s", fp_immediate_constant (x));
19095 break;
19096
19097 default:
19098 gcc_assert (GET_CODE (x) != NEG);
19099 fputc ('#', stream);
19100 if (GET_CODE (x) == HIGH)
19101 {
19102 fputs (":lower16:", stream);
19103 x = XEXP (x, 0);
19104 }
19105
19106 output_addr_const (stream, x);
19107 break;
19108 }
19109 }
19110 }
19111 \f
19112 /* Target hook for printing a memory address. */
19113 static void
19114 arm_print_operand_address (FILE *stream, rtx x)
19115 {
19116 if (TARGET_32BIT)
19117 {
19118 int is_minus = GET_CODE (x) == MINUS;
19119
19120 if (REG_P (x))
19121 asm_fprintf (stream, "[%r]", REGNO (x));
19122 else if (GET_CODE (x) == PLUS || is_minus)
19123 {
19124 rtx base = XEXP (x, 0);
19125 rtx index = XEXP (x, 1);
19126 HOST_WIDE_INT offset = 0;
19127 if (!REG_P (base)
19128 || (REG_P (index) && REGNO (index) == SP_REGNUM))
19129 {
19130 /* Ensure that BASE is a register. */
19131 /* (one of them must be). */
19132 /* Also ensure the SP is not used as in index register. */
19133 rtx temp = base;
19134 base = index;
19135 index = temp;
19136 }
19137 switch (GET_CODE (index))
19138 {
19139 case CONST_INT:
19140 offset = INTVAL (index);
19141 if (is_minus)
19142 offset = -offset;
19143 asm_fprintf (stream, "[%r, #%wd]",
19144 REGNO (base), offset);
19145 break;
19146
19147 case REG:
19148 asm_fprintf (stream, "[%r, %s%r]",
19149 REGNO (base), is_minus ? "-" : "",
19150 REGNO (index));
19151 break;
19152
19153 case MULT:
19154 case ASHIFTRT:
19155 case LSHIFTRT:
19156 case ASHIFT:
19157 case ROTATERT:
19158 {
19159 asm_fprintf (stream, "[%r, %s%r",
19160 REGNO (base), is_minus ? "-" : "",
19161 REGNO (XEXP (index, 0)));
19162 arm_print_operand (stream, index, 'S');
19163 fputs ("]", stream);
19164 break;
19165 }
19166
19167 default:
19168 gcc_unreachable ();
19169 }
19170 }
19171 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
19172 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
19173 {
19174 extern enum machine_mode output_memory_reference_mode;
19175
19176 gcc_assert (REG_P (XEXP (x, 0)));
19177
19178 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
19179 asm_fprintf (stream, "[%r, #%s%d]!",
19180 REGNO (XEXP (x, 0)),
19181 GET_CODE (x) == PRE_DEC ? "-" : "",
19182 GET_MODE_SIZE (output_memory_reference_mode));
19183 else
19184 asm_fprintf (stream, "[%r], #%s%d",
19185 REGNO (XEXP (x, 0)),
19186 GET_CODE (x) == POST_DEC ? "-" : "",
19187 GET_MODE_SIZE (output_memory_reference_mode));
19188 }
19189 else if (GET_CODE (x) == PRE_MODIFY)
19190 {
19191 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
19192 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
19193 asm_fprintf (stream, "#%wd]!",
19194 INTVAL (XEXP (XEXP (x, 1), 1)));
19195 else
19196 asm_fprintf (stream, "%r]!",
19197 REGNO (XEXP (XEXP (x, 1), 1)));
19198 }
19199 else if (GET_CODE (x) == POST_MODIFY)
19200 {
19201 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
19202 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
19203 asm_fprintf (stream, "#%wd",
19204 INTVAL (XEXP (XEXP (x, 1), 1)));
19205 else
19206 asm_fprintf (stream, "%r",
19207 REGNO (XEXP (XEXP (x, 1), 1)));
19208 }
19209 else output_addr_const (stream, x);
19210 }
19211 else
19212 {
19213 if (REG_P (x))
19214 asm_fprintf (stream, "[%r]", REGNO (x));
19215 else if (GET_CODE (x) == POST_INC)
19216 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
19217 else if (GET_CODE (x) == PLUS)
19218 {
19219 gcc_assert (REG_P (XEXP (x, 0)));
19220 if (CONST_INT_P (XEXP (x, 1)))
19221 asm_fprintf (stream, "[%r, #%wd]",
19222 REGNO (XEXP (x, 0)),
19223 INTVAL (XEXP (x, 1)));
19224 else
19225 asm_fprintf (stream, "[%r, %r]",
19226 REGNO (XEXP (x, 0)),
19227 REGNO (XEXP (x, 1)));
19228 }
19229 else
19230 output_addr_const (stream, x);
19231 }
19232 }
19233 \f
19234 /* Target hook for indicating whether a punctuation character for
19235 TARGET_PRINT_OPERAND is valid. */
19236 static bool
19237 arm_print_operand_punct_valid_p (unsigned char code)
19238 {
19239 return (code == '@' || code == '|' || code == '.'
19240 || code == '(' || code == ')' || code == '#'
19241 || (TARGET_32BIT && (code == '?'))
19242 || (TARGET_THUMB2 && (code == '!'))
19243 || (TARGET_THUMB && (code == '_')));
19244 }
19245 \f
19246 /* Target hook for assembling integer objects. The ARM version needs to
19247 handle word-sized values specially. */
19248 static bool
19249 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
19250 {
19251 enum machine_mode mode;
19252
19253 if (size == UNITS_PER_WORD && aligned_p)
19254 {
19255 fputs ("\t.word\t", asm_out_file);
19256 output_addr_const (asm_out_file, x);
19257
19258 /* Mark symbols as position independent. We only do this in the
19259 .text segment, not in the .data segment. */
19260 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
19261 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
19262 {
19263 /* See legitimize_pic_address for an explanation of the
19264 TARGET_VXWORKS_RTP check. */
19265 if (TARGET_VXWORKS_RTP
19266 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
19267 fputs ("(GOT)", asm_out_file);
19268 else
19269 fputs ("(GOTOFF)", asm_out_file);
19270 }
19271 fputc ('\n', asm_out_file);
19272 return true;
19273 }
19274
19275 mode = GET_MODE (x);
19276
19277 if (arm_vector_mode_supported_p (mode))
19278 {
19279 int i, units;
19280
19281 gcc_assert (GET_CODE (x) == CONST_VECTOR);
19282
19283 units = CONST_VECTOR_NUNITS (x);
19284 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
19285
19286 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19287 for (i = 0; i < units; i++)
19288 {
19289 rtx elt = CONST_VECTOR_ELT (x, i);
19290 assemble_integer
19291 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
19292 }
19293 else
19294 for (i = 0; i < units; i++)
19295 {
19296 rtx elt = CONST_VECTOR_ELT (x, i);
19297 REAL_VALUE_TYPE rval;
19298
19299 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
19300
19301 assemble_real
19302 (rval, GET_MODE_INNER (mode),
19303 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
19304 }
19305
19306 return true;
19307 }
19308
19309 return default_assemble_integer (x, size, aligned_p);
19310 }
19311
19312 static void
19313 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
19314 {
19315 section *s;
19316
19317 if (!TARGET_AAPCS_BASED)
19318 {
19319 (is_ctor ?
19320 default_named_section_asm_out_constructor
19321 : default_named_section_asm_out_destructor) (symbol, priority);
19322 return;
19323 }
19324
19325 /* Put these in the .init_array section, using a special relocation. */
19326 if (priority != DEFAULT_INIT_PRIORITY)
19327 {
19328 char buf[18];
19329 sprintf (buf, "%s.%.5u",
19330 is_ctor ? ".init_array" : ".fini_array",
19331 priority);
19332 s = get_section (buf, SECTION_WRITE, NULL_TREE);
19333 }
19334 else if (is_ctor)
19335 s = ctors_section;
19336 else
19337 s = dtors_section;
19338
19339 switch_to_section (s);
19340 assemble_align (POINTER_SIZE);
19341 fputs ("\t.word\t", asm_out_file);
19342 output_addr_const (asm_out_file, symbol);
19343 fputs ("(target1)\n", asm_out_file);
19344 }
19345
19346 /* Add a function to the list of static constructors. */
19347
19348 static void
19349 arm_elf_asm_constructor (rtx symbol, int priority)
19350 {
19351 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
19352 }
19353
19354 /* Add a function to the list of static destructors. */
19355
19356 static void
19357 arm_elf_asm_destructor (rtx symbol, int priority)
19358 {
19359 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
19360 }
19361 \f
19362 /* A finite state machine takes care of noticing whether or not instructions
19363 can be conditionally executed, and thus decrease execution time and code
19364 size by deleting branch instructions. The fsm is controlled by
19365 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
19366
19367 /* The state of the fsm controlling condition codes are:
19368 0: normal, do nothing special
19369 1: make ASM_OUTPUT_OPCODE not output this instruction
19370 2: make ASM_OUTPUT_OPCODE not output this instruction
19371 3: make instructions conditional
19372 4: make instructions conditional
19373
19374 State transitions (state->state by whom under condition):
19375 0 -> 1 final_prescan_insn if the `target' is a label
19376 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
19377 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
19378 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
19379 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
19380 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
19381 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
19382 (the target insn is arm_target_insn).
19383
19384 If the jump clobbers the conditions then we use states 2 and 4.
19385
19386 A similar thing can be done with conditional return insns.
19387
19388 XXX In case the `target' is an unconditional branch, this conditionalising
19389 of the instructions always reduces code size, but not always execution
19390 time. But then, I want to reduce the code size to somewhere near what
19391 /bin/cc produces. */
19392
19393 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
19394 instructions. When a COND_EXEC instruction is seen the subsequent
19395 instructions are scanned so that multiple conditional instructions can be
19396 combined into a single IT block. arm_condexec_count and arm_condexec_mask
19397 specify the length and true/false mask for the IT block. These will be
19398 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
19399
19400 /* Returns the index of the ARM condition code string in
19401 `arm_condition_codes', or ARM_NV if the comparison is invalid.
19402 COMPARISON should be an rtx like `(eq (...) (...))'. */
19403
19404 enum arm_cond_code
19405 maybe_get_arm_condition_code (rtx comparison)
19406 {
19407 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
19408 enum arm_cond_code code;
19409 enum rtx_code comp_code = GET_CODE (comparison);
19410
19411 if (GET_MODE_CLASS (mode) != MODE_CC)
19412 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
19413 XEXP (comparison, 1));
19414
19415 switch (mode)
19416 {
19417 case CC_DNEmode: code = ARM_NE; goto dominance;
19418 case CC_DEQmode: code = ARM_EQ; goto dominance;
19419 case CC_DGEmode: code = ARM_GE; goto dominance;
19420 case CC_DGTmode: code = ARM_GT; goto dominance;
19421 case CC_DLEmode: code = ARM_LE; goto dominance;
19422 case CC_DLTmode: code = ARM_LT; goto dominance;
19423 case CC_DGEUmode: code = ARM_CS; goto dominance;
19424 case CC_DGTUmode: code = ARM_HI; goto dominance;
19425 case CC_DLEUmode: code = ARM_LS; goto dominance;
19426 case CC_DLTUmode: code = ARM_CC;
19427
19428 dominance:
19429 if (comp_code == EQ)
19430 return ARM_INVERSE_CONDITION_CODE (code);
19431 if (comp_code == NE)
19432 return code;
19433 return ARM_NV;
19434
19435 case CC_NOOVmode:
19436 switch (comp_code)
19437 {
19438 case NE: return ARM_NE;
19439 case EQ: return ARM_EQ;
19440 case GE: return ARM_PL;
19441 case LT: return ARM_MI;
19442 default: return ARM_NV;
19443 }
19444
19445 case CC_Zmode:
19446 switch (comp_code)
19447 {
19448 case NE: return ARM_NE;
19449 case EQ: return ARM_EQ;
19450 default: return ARM_NV;
19451 }
19452
19453 case CC_Nmode:
19454 switch (comp_code)
19455 {
19456 case NE: return ARM_MI;
19457 case EQ: return ARM_PL;
19458 default: return ARM_NV;
19459 }
19460
19461 case CCFPEmode:
19462 case CCFPmode:
19463 /* We can handle all cases except UNEQ and LTGT. */
19464 switch (comp_code)
19465 {
19466 case GE: return ARM_GE;
19467 case GT: return ARM_GT;
19468 case LE: return ARM_LS;
19469 case LT: return ARM_MI;
19470 case NE: return ARM_NE;
19471 case EQ: return ARM_EQ;
19472 case ORDERED: return ARM_VC;
19473 case UNORDERED: return ARM_VS;
19474 case UNLT: return ARM_LT;
19475 case UNLE: return ARM_LE;
19476 case UNGT: return ARM_HI;
19477 case UNGE: return ARM_PL;
19478 /* UNEQ and LTGT do not have a representation. */
19479 case UNEQ: /* Fall through. */
19480 case LTGT: /* Fall through. */
19481 default: return ARM_NV;
19482 }
19483
19484 case CC_SWPmode:
19485 switch (comp_code)
19486 {
19487 case NE: return ARM_NE;
19488 case EQ: return ARM_EQ;
19489 case GE: return ARM_LE;
19490 case GT: return ARM_LT;
19491 case LE: return ARM_GE;
19492 case LT: return ARM_GT;
19493 case GEU: return ARM_LS;
19494 case GTU: return ARM_CC;
19495 case LEU: return ARM_CS;
19496 case LTU: return ARM_HI;
19497 default: return ARM_NV;
19498 }
19499
19500 case CC_Cmode:
19501 switch (comp_code)
19502 {
19503 case LTU: return ARM_CS;
19504 case GEU: return ARM_CC;
19505 default: return ARM_NV;
19506 }
19507
19508 case CC_CZmode:
19509 switch (comp_code)
19510 {
19511 case NE: return ARM_NE;
19512 case EQ: return ARM_EQ;
19513 case GEU: return ARM_CS;
19514 case GTU: return ARM_HI;
19515 case LEU: return ARM_LS;
19516 case LTU: return ARM_CC;
19517 default: return ARM_NV;
19518 }
19519
19520 case CC_NCVmode:
19521 switch (comp_code)
19522 {
19523 case GE: return ARM_GE;
19524 case LT: return ARM_LT;
19525 case GEU: return ARM_CS;
19526 case LTU: return ARM_CC;
19527 default: return ARM_NV;
19528 }
19529
19530 case CCmode:
19531 switch (comp_code)
19532 {
19533 case NE: return ARM_NE;
19534 case EQ: return ARM_EQ;
19535 case GE: return ARM_GE;
19536 case GT: return ARM_GT;
19537 case LE: return ARM_LE;
19538 case LT: return ARM_LT;
19539 case GEU: return ARM_CS;
19540 case GTU: return ARM_HI;
19541 case LEU: return ARM_LS;
19542 case LTU: return ARM_CC;
19543 default: return ARM_NV;
19544 }
19545
19546 default: gcc_unreachable ();
19547 }
19548 }
19549
19550 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
19551 static enum arm_cond_code
19552 get_arm_condition_code (rtx comparison)
19553 {
19554 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
19555 gcc_assert (code != ARM_NV);
19556 return code;
19557 }
19558
19559 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
19560 instructions. */
19561 void
19562 thumb2_final_prescan_insn (rtx insn)
19563 {
19564 rtx first_insn = insn;
19565 rtx body = PATTERN (insn);
19566 rtx predicate;
19567 enum arm_cond_code code;
19568 int n;
19569 int mask;
19570
19571 /* Remove the previous insn from the count of insns to be output. */
19572 if (arm_condexec_count)
19573 arm_condexec_count--;
19574
19575 /* Nothing to do if we are already inside a conditional block. */
19576 if (arm_condexec_count)
19577 return;
19578
19579 if (GET_CODE (body) != COND_EXEC)
19580 return;
19581
19582 /* Conditional jumps are implemented directly. */
19583 if (JUMP_P (insn))
19584 return;
19585
19586 predicate = COND_EXEC_TEST (body);
19587 arm_current_cc = get_arm_condition_code (predicate);
19588
19589 n = get_attr_ce_count (insn);
19590 arm_condexec_count = 1;
19591 arm_condexec_mask = (1 << n) - 1;
19592 arm_condexec_masklen = n;
19593 /* See if subsequent instructions can be combined into the same block. */
19594 for (;;)
19595 {
19596 insn = next_nonnote_insn (insn);
19597
19598 /* Jumping into the middle of an IT block is illegal, so a label or
19599 barrier terminates the block. */
19600 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
19601 break;
19602
19603 body = PATTERN (insn);
19604 /* USE and CLOBBER aren't really insns, so just skip them. */
19605 if (GET_CODE (body) == USE
19606 || GET_CODE (body) == CLOBBER)
19607 continue;
19608
19609 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
19610 if (GET_CODE (body) != COND_EXEC)
19611 break;
19612 /* Allow up to 4 conditionally executed instructions in a block. */
19613 n = get_attr_ce_count (insn);
19614 if (arm_condexec_masklen + n > MAX_INSN_PER_IT_BLOCK)
19615 break;
19616
19617 predicate = COND_EXEC_TEST (body);
19618 code = get_arm_condition_code (predicate);
19619 mask = (1 << n) - 1;
19620 if (arm_current_cc == code)
19621 arm_condexec_mask |= (mask << arm_condexec_masklen);
19622 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
19623 break;
19624
19625 arm_condexec_count++;
19626 arm_condexec_masklen += n;
19627
19628 /* A jump must be the last instruction in a conditional block. */
19629 if (JUMP_P (insn))
19630 break;
19631 }
19632 /* Restore recog_data (getting the attributes of other insns can
19633 destroy this array, but final.c assumes that it remains intact
19634 across this call). */
19635 extract_constrain_insn_cached (first_insn);
19636 }
19637
19638 void
19639 arm_final_prescan_insn (rtx insn)
19640 {
19641 /* BODY will hold the body of INSN. */
19642 rtx body = PATTERN (insn);
19643
19644 /* This will be 1 if trying to repeat the trick, and things need to be
19645 reversed if it appears to fail. */
19646 int reverse = 0;
19647
19648 /* If we start with a return insn, we only succeed if we find another one. */
19649 int seeking_return = 0;
19650 enum rtx_code return_code = UNKNOWN;
19651
19652 /* START_INSN will hold the insn from where we start looking. This is the
19653 first insn after the following code_label if REVERSE is true. */
19654 rtx start_insn = insn;
19655
19656 /* If in state 4, check if the target branch is reached, in order to
19657 change back to state 0. */
19658 if (arm_ccfsm_state == 4)
19659 {
19660 if (insn == arm_target_insn)
19661 {
19662 arm_target_insn = NULL;
19663 arm_ccfsm_state = 0;
19664 }
19665 return;
19666 }
19667
19668 /* If in state 3, it is possible to repeat the trick, if this insn is an
19669 unconditional branch to a label, and immediately following this branch
19670 is the previous target label which is only used once, and the label this
19671 branch jumps to is not too far off. */
19672 if (arm_ccfsm_state == 3)
19673 {
19674 if (simplejump_p (insn))
19675 {
19676 start_insn = next_nonnote_insn (start_insn);
19677 if (BARRIER_P (start_insn))
19678 {
19679 /* XXX Isn't this always a barrier? */
19680 start_insn = next_nonnote_insn (start_insn);
19681 }
19682 if (LABEL_P (start_insn)
19683 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
19684 && LABEL_NUSES (start_insn) == 1)
19685 reverse = TRUE;
19686 else
19687 return;
19688 }
19689 else if (ANY_RETURN_P (body))
19690 {
19691 start_insn = next_nonnote_insn (start_insn);
19692 if (BARRIER_P (start_insn))
19693 start_insn = next_nonnote_insn (start_insn);
19694 if (LABEL_P (start_insn)
19695 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
19696 && LABEL_NUSES (start_insn) == 1)
19697 {
19698 reverse = TRUE;
19699 seeking_return = 1;
19700 return_code = GET_CODE (body);
19701 }
19702 else
19703 return;
19704 }
19705 else
19706 return;
19707 }
19708
19709 gcc_assert (!arm_ccfsm_state || reverse);
19710 if (!JUMP_P (insn))
19711 return;
19712
19713 /* This jump might be paralleled with a clobber of the condition codes
19714 the jump should always come first */
19715 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
19716 body = XVECEXP (body, 0, 0);
19717
19718 if (reverse
19719 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
19720 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
19721 {
19722 int insns_skipped;
19723 int fail = FALSE, succeed = FALSE;
19724 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
19725 int then_not_else = TRUE;
19726 rtx this_insn = start_insn, label = 0;
19727
19728 /* Register the insn jumped to. */
19729 if (reverse)
19730 {
19731 if (!seeking_return)
19732 label = XEXP (SET_SRC (body), 0);
19733 }
19734 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
19735 label = XEXP (XEXP (SET_SRC (body), 1), 0);
19736 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
19737 {
19738 label = XEXP (XEXP (SET_SRC (body), 2), 0);
19739 then_not_else = FALSE;
19740 }
19741 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
19742 {
19743 seeking_return = 1;
19744 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
19745 }
19746 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
19747 {
19748 seeking_return = 1;
19749 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
19750 then_not_else = FALSE;
19751 }
19752 else
19753 gcc_unreachable ();
19754
19755 /* See how many insns this branch skips, and what kind of insns. If all
19756 insns are okay, and the label or unconditional branch to the same
19757 label is not too far away, succeed. */
19758 for (insns_skipped = 0;
19759 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
19760 {
19761 rtx scanbody;
19762
19763 this_insn = next_nonnote_insn (this_insn);
19764 if (!this_insn)
19765 break;
19766
19767 switch (GET_CODE (this_insn))
19768 {
19769 case CODE_LABEL:
19770 /* Succeed if it is the target label, otherwise fail since
19771 control falls in from somewhere else. */
19772 if (this_insn == label)
19773 {
19774 arm_ccfsm_state = 1;
19775 succeed = TRUE;
19776 }
19777 else
19778 fail = TRUE;
19779 break;
19780
19781 case BARRIER:
19782 /* Succeed if the following insn is the target label.
19783 Otherwise fail.
19784 If return insns are used then the last insn in a function
19785 will be a barrier. */
19786 this_insn = next_nonnote_insn (this_insn);
19787 if (this_insn && this_insn == label)
19788 {
19789 arm_ccfsm_state = 1;
19790 succeed = TRUE;
19791 }
19792 else
19793 fail = TRUE;
19794 break;
19795
19796 case CALL_INSN:
19797 /* The AAPCS says that conditional calls should not be
19798 used since they make interworking inefficient (the
19799 linker can't transform BL<cond> into BLX). That's
19800 only a problem if the machine has BLX. */
19801 if (arm_arch5)
19802 {
19803 fail = TRUE;
19804 break;
19805 }
19806
19807 /* Succeed if the following insn is the target label, or
19808 if the following two insns are a barrier and the
19809 target label. */
19810 this_insn = next_nonnote_insn (this_insn);
19811 if (this_insn && BARRIER_P (this_insn))
19812 this_insn = next_nonnote_insn (this_insn);
19813
19814 if (this_insn && this_insn == label
19815 && insns_skipped < max_insns_skipped)
19816 {
19817 arm_ccfsm_state = 1;
19818 succeed = TRUE;
19819 }
19820 else
19821 fail = TRUE;
19822 break;
19823
19824 case JUMP_INSN:
19825 /* If this is an unconditional branch to the same label, succeed.
19826 If it is to another label, do nothing. If it is conditional,
19827 fail. */
19828 /* XXX Probably, the tests for SET and the PC are
19829 unnecessary. */
19830
19831 scanbody = PATTERN (this_insn);
19832 if (GET_CODE (scanbody) == SET
19833 && GET_CODE (SET_DEST (scanbody)) == PC)
19834 {
19835 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
19836 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
19837 {
19838 arm_ccfsm_state = 2;
19839 succeed = TRUE;
19840 }
19841 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
19842 fail = TRUE;
19843 }
19844 /* Fail if a conditional return is undesirable (e.g. on a
19845 StrongARM), but still allow this if optimizing for size. */
19846 else if (GET_CODE (scanbody) == return_code
19847 && !use_return_insn (TRUE, NULL)
19848 && !optimize_size)
19849 fail = TRUE;
19850 else if (GET_CODE (scanbody) == return_code)
19851 {
19852 arm_ccfsm_state = 2;
19853 succeed = TRUE;
19854 }
19855 else if (GET_CODE (scanbody) == PARALLEL)
19856 {
19857 switch (get_attr_conds (this_insn))
19858 {
19859 case CONDS_NOCOND:
19860 break;
19861 default:
19862 fail = TRUE;
19863 break;
19864 }
19865 }
19866 else
19867 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
19868
19869 break;
19870
19871 case INSN:
19872 /* Instructions using or affecting the condition codes make it
19873 fail. */
19874 scanbody = PATTERN (this_insn);
19875 if (!(GET_CODE (scanbody) == SET
19876 || GET_CODE (scanbody) == PARALLEL)
19877 || get_attr_conds (this_insn) != CONDS_NOCOND)
19878 fail = TRUE;
19879 break;
19880
19881 default:
19882 break;
19883 }
19884 }
19885 if (succeed)
19886 {
19887 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
19888 arm_target_label = CODE_LABEL_NUMBER (label);
19889 else
19890 {
19891 gcc_assert (seeking_return || arm_ccfsm_state == 2);
19892
19893 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
19894 {
19895 this_insn = next_nonnote_insn (this_insn);
19896 gcc_assert (!this_insn
19897 || (!BARRIER_P (this_insn)
19898 && !LABEL_P (this_insn)));
19899 }
19900 if (!this_insn)
19901 {
19902 /* Oh, dear! we ran off the end.. give up. */
19903 extract_constrain_insn_cached (insn);
19904 arm_ccfsm_state = 0;
19905 arm_target_insn = NULL;
19906 return;
19907 }
19908 arm_target_insn = this_insn;
19909 }
19910
19911 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
19912 what it was. */
19913 if (!reverse)
19914 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
19915
19916 if (reverse || then_not_else)
19917 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
19918 }
19919
19920 /* Restore recog_data (getting the attributes of other insns can
19921 destroy this array, but final.c assumes that it remains intact
19922 across this call. */
19923 extract_constrain_insn_cached (insn);
19924 }
19925 }
19926
19927 /* Output IT instructions. */
19928 void
19929 thumb2_asm_output_opcode (FILE * stream)
19930 {
19931 char buff[5];
19932 int n;
19933
19934 if (arm_condexec_mask)
19935 {
19936 for (n = 0; n < arm_condexec_masklen; n++)
19937 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
19938 buff[n] = 0;
19939 asm_fprintf(stream, "i%s\t%s\n\t", buff,
19940 arm_condition_codes[arm_current_cc]);
19941 arm_condexec_mask = 0;
19942 }
19943 }
19944
19945 /* Returns true if REGNO is a valid register
19946 for holding a quantity of type MODE. */
19947 int
19948 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
19949 {
19950 if (GET_MODE_CLASS (mode) == MODE_CC)
19951 return (regno == CC_REGNUM
19952 || (TARGET_HARD_FLOAT && TARGET_VFP
19953 && regno == VFPCC_REGNUM));
19954
19955 if (TARGET_THUMB1)
19956 /* For the Thumb we only allow values bigger than SImode in
19957 registers 0 - 6, so that there is always a second low
19958 register available to hold the upper part of the value.
19959 We probably we ought to ensure that the register is the
19960 start of an even numbered register pair. */
19961 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
19962
19963 if (TARGET_HARD_FLOAT && TARGET_VFP
19964 && IS_VFP_REGNUM (regno))
19965 {
19966 if (mode == SFmode || mode == SImode)
19967 return VFP_REGNO_OK_FOR_SINGLE (regno);
19968
19969 if (mode == DFmode)
19970 return VFP_REGNO_OK_FOR_DOUBLE (regno);
19971
19972 /* VFP registers can hold HFmode values, but there is no point in
19973 putting them there unless we have hardware conversion insns. */
19974 if (mode == HFmode)
19975 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
19976
19977 if (TARGET_NEON)
19978 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
19979 || (VALID_NEON_QREG_MODE (mode)
19980 && NEON_REGNO_OK_FOR_QUAD (regno))
19981 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
19982 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
19983 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
19984 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
19985 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
19986
19987 return FALSE;
19988 }
19989
19990 if (TARGET_REALLY_IWMMXT)
19991 {
19992 if (IS_IWMMXT_GR_REGNUM (regno))
19993 return mode == SImode;
19994
19995 if (IS_IWMMXT_REGNUM (regno))
19996 return VALID_IWMMXT_REG_MODE (mode);
19997 }
19998
19999 /* We allow almost any value to be stored in the general registers.
20000 Restrict doubleword quantities to even register pairs so that we can
20001 use ldrd. Do not allow very large Neon structure opaque modes in
20002 general registers; they would use too many. */
20003 if (regno <= LAST_ARM_REGNUM)
20004 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
20005 && ARM_NUM_REGS (mode) <= 4;
20006
20007 if (regno == FRAME_POINTER_REGNUM
20008 || regno == ARG_POINTER_REGNUM)
20009 /* We only allow integers in the fake hard registers. */
20010 return GET_MODE_CLASS (mode) == MODE_INT;
20011
20012 return FALSE;
20013 }
20014
20015 /* Implement MODES_TIEABLE_P. */
20016
20017 bool
20018 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
20019 {
20020 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
20021 return true;
20022
20023 /* We specifically want to allow elements of "structure" modes to
20024 be tieable to the structure. This more general condition allows
20025 other rarer situations too. */
20026 if (TARGET_NEON
20027 && (VALID_NEON_DREG_MODE (mode1)
20028 || VALID_NEON_QREG_MODE (mode1)
20029 || VALID_NEON_STRUCT_MODE (mode1))
20030 && (VALID_NEON_DREG_MODE (mode2)
20031 || VALID_NEON_QREG_MODE (mode2)
20032 || VALID_NEON_STRUCT_MODE (mode2)))
20033 return true;
20034
20035 return false;
20036 }
20037
20038 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
20039 not used in arm mode. */
20040
20041 enum reg_class
20042 arm_regno_class (int regno)
20043 {
20044 if (TARGET_THUMB1)
20045 {
20046 if (regno == STACK_POINTER_REGNUM)
20047 return STACK_REG;
20048 if (regno == CC_REGNUM)
20049 return CC_REG;
20050 if (regno < 8)
20051 return LO_REGS;
20052 return HI_REGS;
20053 }
20054
20055 if (TARGET_THUMB2 && regno < 8)
20056 return LO_REGS;
20057
20058 if ( regno <= LAST_ARM_REGNUM
20059 || regno == FRAME_POINTER_REGNUM
20060 || regno == ARG_POINTER_REGNUM)
20061 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
20062
20063 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
20064 return TARGET_THUMB2 ? CC_REG : NO_REGS;
20065
20066 if (IS_VFP_REGNUM (regno))
20067 {
20068 if (regno <= D7_VFP_REGNUM)
20069 return VFP_D0_D7_REGS;
20070 else if (regno <= LAST_LO_VFP_REGNUM)
20071 return VFP_LO_REGS;
20072 else
20073 return VFP_HI_REGS;
20074 }
20075
20076 if (IS_IWMMXT_REGNUM (regno))
20077 return IWMMXT_REGS;
20078
20079 if (IS_IWMMXT_GR_REGNUM (regno))
20080 return IWMMXT_GR_REGS;
20081
20082 return NO_REGS;
20083 }
20084
20085 /* Handle a special case when computing the offset
20086 of an argument from the frame pointer. */
20087 int
20088 arm_debugger_arg_offset (int value, rtx addr)
20089 {
20090 rtx insn;
20091
20092 /* We are only interested if dbxout_parms() failed to compute the offset. */
20093 if (value != 0)
20094 return 0;
20095
20096 /* We can only cope with the case where the address is held in a register. */
20097 if (!REG_P (addr))
20098 return 0;
20099
20100 /* If we are using the frame pointer to point at the argument, then
20101 an offset of 0 is correct. */
20102 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
20103 return 0;
20104
20105 /* If we are using the stack pointer to point at the
20106 argument, then an offset of 0 is correct. */
20107 /* ??? Check this is consistent with thumb2 frame layout. */
20108 if ((TARGET_THUMB || !frame_pointer_needed)
20109 && REGNO (addr) == SP_REGNUM)
20110 return 0;
20111
20112 /* Oh dear. The argument is pointed to by a register rather
20113 than being held in a register, or being stored at a known
20114 offset from the frame pointer. Since GDB only understands
20115 those two kinds of argument we must translate the address
20116 held in the register into an offset from the frame pointer.
20117 We do this by searching through the insns for the function
20118 looking to see where this register gets its value. If the
20119 register is initialized from the frame pointer plus an offset
20120 then we are in luck and we can continue, otherwise we give up.
20121
20122 This code is exercised by producing debugging information
20123 for a function with arguments like this:
20124
20125 double func (double a, double b, int c, double d) {return d;}
20126
20127 Without this code the stab for parameter 'd' will be set to
20128 an offset of 0 from the frame pointer, rather than 8. */
20129
20130 /* The if() statement says:
20131
20132 If the insn is a normal instruction
20133 and if the insn is setting the value in a register
20134 and if the register being set is the register holding the address of the argument
20135 and if the address is computing by an addition
20136 that involves adding to a register
20137 which is the frame pointer
20138 a constant integer
20139
20140 then... */
20141
20142 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20143 {
20144 if ( NONJUMP_INSN_P (insn)
20145 && GET_CODE (PATTERN (insn)) == SET
20146 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
20147 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
20148 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
20149 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
20150 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
20151 )
20152 {
20153 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
20154
20155 break;
20156 }
20157 }
20158
20159 if (value == 0)
20160 {
20161 debug_rtx (addr);
20162 warning (0, "unable to compute real location of stacked parameter");
20163 value = 8; /* XXX magic hack */
20164 }
20165
20166 return value;
20167 }
20168 \f
20169 typedef enum {
20170 T_V8QI,
20171 T_V4HI,
20172 T_V4HF,
20173 T_V2SI,
20174 T_V2SF,
20175 T_DI,
20176 T_V16QI,
20177 T_V8HI,
20178 T_V4SI,
20179 T_V4SF,
20180 T_V2DI,
20181 T_TI,
20182 T_EI,
20183 T_OI,
20184 T_MAX /* Size of enum. Keep last. */
20185 } neon_builtin_type_mode;
20186
20187 #define TYPE_MODE_BIT(X) (1 << (X))
20188
20189 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
20190 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
20191 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
20192 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
20193 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
20194 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
20195
20196 #define v8qi_UP T_V8QI
20197 #define v4hi_UP T_V4HI
20198 #define v4hf_UP T_V4HF
20199 #define v2si_UP T_V2SI
20200 #define v2sf_UP T_V2SF
20201 #define di_UP T_DI
20202 #define v16qi_UP T_V16QI
20203 #define v8hi_UP T_V8HI
20204 #define v4si_UP T_V4SI
20205 #define v4sf_UP T_V4SF
20206 #define v2di_UP T_V2DI
20207 #define ti_UP T_TI
20208 #define ei_UP T_EI
20209 #define oi_UP T_OI
20210
20211 #define UP(X) X##_UP
20212
20213 typedef enum {
20214 NEON_BINOP,
20215 NEON_TERNOP,
20216 NEON_UNOP,
20217 NEON_GETLANE,
20218 NEON_SETLANE,
20219 NEON_CREATE,
20220 NEON_RINT,
20221 NEON_DUP,
20222 NEON_DUPLANE,
20223 NEON_COMBINE,
20224 NEON_SPLIT,
20225 NEON_LANEMUL,
20226 NEON_LANEMULL,
20227 NEON_LANEMULH,
20228 NEON_LANEMAC,
20229 NEON_SCALARMUL,
20230 NEON_SCALARMULL,
20231 NEON_SCALARMULH,
20232 NEON_SCALARMAC,
20233 NEON_CONVERT,
20234 NEON_FLOAT_WIDEN,
20235 NEON_FLOAT_NARROW,
20236 NEON_FIXCONV,
20237 NEON_SELECT,
20238 NEON_RESULTPAIR,
20239 NEON_REINTERP,
20240 NEON_VTBL,
20241 NEON_VTBX,
20242 NEON_LOAD1,
20243 NEON_LOAD1LANE,
20244 NEON_STORE1,
20245 NEON_STORE1LANE,
20246 NEON_LOADSTRUCT,
20247 NEON_LOADSTRUCTLANE,
20248 NEON_STORESTRUCT,
20249 NEON_STORESTRUCTLANE,
20250 NEON_LOGICBINOP,
20251 NEON_SHIFTINSERT,
20252 NEON_SHIFTIMM,
20253 NEON_SHIFTACC
20254 } neon_itype;
20255
20256 typedef struct {
20257 const char *name;
20258 const neon_itype itype;
20259 const neon_builtin_type_mode mode;
20260 const enum insn_code code;
20261 unsigned int fcode;
20262 } neon_builtin_datum;
20263
20264 #define CF(N,X) CODE_FOR_neon_##N##X
20265
20266 #define VAR1(T, N, A) \
20267 {#N, NEON_##T, UP (A), CF (N, A), 0}
20268 #define VAR2(T, N, A, B) \
20269 VAR1 (T, N, A), \
20270 {#N, NEON_##T, UP (B), CF (N, B), 0}
20271 #define VAR3(T, N, A, B, C) \
20272 VAR2 (T, N, A, B), \
20273 {#N, NEON_##T, UP (C), CF (N, C), 0}
20274 #define VAR4(T, N, A, B, C, D) \
20275 VAR3 (T, N, A, B, C), \
20276 {#N, NEON_##T, UP (D), CF (N, D), 0}
20277 #define VAR5(T, N, A, B, C, D, E) \
20278 VAR4 (T, N, A, B, C, D), \
20279 {#N, NEON_##T, UP (E), CF (N, E), 0}
20280 #define VAR6(T, N, A, B, C, D, E, F) \
20281 VAR5 (T, N, A, B, C, D, E), \
20282 {#N, NEON_##T, UP (F), CF (N, F), 0}
20283 #define VAR7(T, N, A, B, C, D, E, F, G) \
20284 VAR6 (T, N, A, B, C, D, E, F), \
20285 {#N, NEON_##T, UP (G), CF (N, G), 0}
20286 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
20287 VAR7 (T, N, A, B, C, D, E, F, G), \
20288 {#N, NEON_##T, UP (H), CF (N, H), 0}
20289 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
20290 VAR8 (T, N, A, B, C, D, E, F, G, H), \
20291 {#N, NEON_##T, UP (I), CF (N, I), 0}
20292 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
20293 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
20294 {#N, NEON_##T, UP (J), CF (N, J), 0}
20295
20296 /* The NEON builtin data can be found in arm_neon_builtins.def.
20297 The mode entries in the following table correspond to the "key" type of the
20298 instruction variant, i.e. equivalent to that which would be specified after
20299 the assembler mnemonic, which usually refers to the last vector operand.
20300 (Signed/unsigned/polynomial types are not differentiated between though, and
20301 are all mapped onto the same mode for a given element size.) The modes
20302 listed per instruction should be the same as those defined for that
20303 instruction's pattern in neon.md. */
20304
20305 static neon_builtin_datum neon_builtin_data[] =
20306 {
20307 #include "arm_neon_builtins.def"
20308 };
20309
20310 #undef CF
20311 #undef VAR1
20312 #undef VAR2
20313 #undef VAR3
20314 #undef VAR4
20315 #undef VAR5
20316 #undef VAR6
20317 #undef VAR7
20318 #undef VAR8
20319 #undef VAR9
20320 #undef VAR10
20321
20322 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
20323 #define VAR1(T, N, A) \
20324 CF (N, A)
20325 #define VAR2(T, N, A, B) \
20326 VAR1 (T, N, A), \
20327 CF (N, B)
20328 #define VAR3(T, N, A, B, C) \
20329 VAR2 (T, N, A, B), \
20330 CF (N, C)
20331 #define VAR4(T, N, A, B, C, D) \
20332 VAR3 (T, N, A, B, C), \
20333 CF (N, D)
20334 #define VAR5(T, N, A, B, C, D, E) \
20335 VAR4 (T, N, A, B, C, D), \
20336 CF (N, E)
20337 #define VAR6(T, N, A, B, C, D, E, F) \
20338 VAR5 (T, N, A, B, C, D, E), \
20339 CF (N, F)
20340 #define VAR7(T, N, A, B, C, D, E, F, G) \
20341 VAR6 (T, N, A, B, C, D, E, F), \
20342 CF (N, G)
20343 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
20344 VAR7 (T, N, A, B, C, D, E, F, G), \
20345 CF (N, H)
20346 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
20347 VAR8 (T, N, A, B, C, D, E, F, G, H), \
20348 CF (N, I)
20349 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
20350 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
20351 CF (N, J)
20352 enum arm_builtins
20353 {
20354 ARM_BUILTIN_GETWCGR0,
20355 ARM_BUILTIN_GETWCGR1,
20356 ARM_BUILTIN_GETWCGR2,
20357 ARM_BUILTIN_GETWCGR3,
20358
20359 ARM_BUILTIN_SETWCGR0,
20360 ARM_BUILTIN_SETWCGR1,
20361 ARM_BUILTIN_SETWCGR2,
20362 ARM_BUILTIN_SETWCGR3,
20363
20364 ARM_BUILTIN_WZERO,
20365
20366 ARM_BUILTIN_WAVG2BR,
20367 ARM_BUILTIN_WAVG2HR,
20368 ARM_BUILTIN_WAVG2B,
20369 ARM_BUILTIN_WAVG2H,
20370
20371 ARM_BUILTIN_WACCB,
20372 ARM_BUILTIN_WACCH,
20373 ARM_BUILTIN_WACCW,
20374
20375 ARM_BUILTIN_WMACS,
20376 ARM_BUILTIN_WMACSZ,
20377 ARM_BUILTIN_WMACU,
20378 ARM_BUILTIN_WMACUZ,
20379
20380 ARM_BUILTIN_WSADB,
20381 ARM_BUILTIN_WSADBZ,
20382 ARM_BUILTIN_WSADH,
20383 ARM_BUILTIN_WSADHZ,
20384
20385 ARM_BUILTIN_WALIGNI,
20386 ARM_BUILTIN_WALIGNR0,
20387 ARM_BUILTIN_WALIGNR1,
20388 ARM_BUILTIN_WALIGNR2,
20389 ARM_BUILTIN_WALIGNR3,
20390
20391 ARM_BUILTIN_TMIA,
20392 ARM_BUILTIN_TMIAPH,
20393 ARM_BUILTIN_TMIABB,
20394 ARM_BUILTIN_TMIABT,
20395 ARM_BUILTIN_TMIATB,
20396 ARM_BUILTIN_TMIATT,
20397
20398 ARM_BUILTIN_TMOVMSKB,
20399 ARM_BUILTIN_TMOVMSKH,
20400 ARM_BUILTIN_TMOVMSKW,
20401
20402 ARM_BUILTIN_TBCSTB,
20403 ARM_BUILTIN_TBCSTH,
20404 ARM_BUILTIN_TBCSTW,
20405
20406 ARM_BUILTIN_WMADDS,
20407 ARM_BUILTIN_WMADDU,
20408
20409 ARM_BUILTIN_WPACKHSS,
20410 ARM_BUILTIN_WPACKWSS,
20411 ARM_BUILTIN_WPACKDSS,
20412 ARM_BUILTIN_WPACKHUS,
20413 ARM_BUILTIN_WPACKWUS,
20414 ARM_BUILTIN_WPACKDUS,
20415
20416 ARM_BUILTIN_WADDB,
20417 ARM_BUILTIN_WADDH,
20418 ARM_BUILTIN_WADDW,
20419 ARM_BUILTIN_WADDSSB,
20420 ARM_BUILTIN_WADDSSH,
20421 ARM_BUILTIN_WADDSSW,
20422 ARM_BUILTIN_WADDUSB,
20423 ARM_BUILTIN_WADDUSH,
20424 ARM_BUILTIN_WADDUSW,
20425 ARM_BUILTIN_WSUBB,
20426 ARM_BUILTIN_WSUBH,
20427 ARM_BUILTIN_WSUBW,
20428 ARM_BUILTIN_WSUBSSB,
20429 ARM_BUILTIN_WSUBSSH,
20430 ARM_BUILTIN_WSUBSSW,
20431 ARM_BUILTIN_WSUBUSB,
20432 ARM_BUILTIN_WSUBUSH,
20433 ARM_BUILTIN_WSUBUSW,
20434
20435 ARM_BUILTIN_WAND,
20436 ARM_BUILTIN_WANDN,
20437 ARM_BUILTIN_WOR,
20438 ARM_BUILTIN_WXOR,
20439
20440 ARM_BUILTIN_WCMPEQB,
20441 ARM_BUILTIN_WCMPEQH,
20442 ARM_BUILTIN_WCMPEQW,
20443 ARM_BUILTIN_WCMPGTUB,
20444 ARM_BUILTIN_WCMPGTUH,
20445 ARM_BUILTIN_WCMPGTUW,
20446 ARM_BUILTIN_WCMPGTSB,
20447 ARM_BUILTIN_WCMPGTSH,
20448 ARM_BUILTIN_WCMPGTSW,
20449
20450 ARM_BUILTIN_TEXTRMSB,
20451 ARM_BUILTIN_TEXTRMSH,
20452 ARM_BUILTIN_TEXTRMSW,
20453 ARM_BUILTIN_TEXTRMUB,
20454 ARM_BUILTIN_TEXTRMUH,
20455 ARM_BUILTIN_TEXTRMUW,
20456 ARM_BUILTIN_TINSRB,
20457 ARM_BUILTIN_TINSRH,
20458 ARM_BUILTIN_TINSRW,
20459
20460 ARM_BUILTIN_WMAXSW,
20461 ARM_BUILTIN_WMAXSH,
20462 ARM_BUILTIN_WMAXSB,
20463 ARM_BUILTIN_WMAXUW,
20464 ARM_BUILTIN_WMAXUH,
20465 ARM_BUILTIN_WMAXUB,
20466 ARM_BUILTIN_WMINSW,
20467 ARM_BUILTIN_WMINSH,
20468 ARM_BUILTIN_WMINSB,
20469 ARM_BUILTIN_WMINUW,
20470 ARM_BUILTIN_WMINUH,
20471 ARM_BUILTIN_WMINUB,
20472
20473 ARM_BUILTIN_WMULUM,
20474 ARM_BUILTIN_WMULSM,
20475 ARM_BUILTIN_WMULUL,
20476
20477 ARM_BUILTIN_PSADBH,
20478 ARM_BUILTIN_WSHUFH,
20479
20480 ARM_BUILTIN_WSLLH,
20481 ARM_BUILTIN_WSLLW,
20482 ARM_BUILTIN_WSLLD,
20483 ARM_BUILTIN_WSRAH,
20484 ARM_BUILTIN_WSRAW,
20485 ARM_BUILTIN_WSRAD,
20486 ARM_BUILTIN_WSRLH,
20487 ARM_BUILTIN_WSRLW,
20488 ARM_BUILTIN_WSRLD,
20489 ARM_BUILTIN_WRORH,
20490 ARM_BUILTIN_WRORW,
20491 ARM_BUILTIN_WRORD,
20492 ARM_BUILTIN_WSLLHI,
20493 ARM_BUILTIN_WSLLWI,
20494 ARM_BUILTIN_WSLLDI,
20495 ARM_BUILTIN_WSRAHI,
20496 ARM_BUILTIN_WSRAWI,
20497 ARM_BUILTIN_WSRADI,
20498 ARM_BUILTIN_WSRLHI,
20499 ARM_BUILTIN_WSRLWI,
20500 ARM_BUILTIN_WSRLDI,
20501 ARM_BUILTIN_WRORHI,
20502 ARM_BUILTIN_WRORWI,
20503 ARM_BUILTIN_WRORDI,
20504
20505 ARM_BUILTIN_WUNPCKIHB,
20506 ARM_BUILTIN_WUNPCKIHH,
20507 ARM_BUILTIN_WUNPCKIHW,
20508 ARM_BUILTIN_WUNPCKILB,
20509 ARM_BUILTIN_WUNPCKILH,
20510 ARM_BUILTIN_WUNPCKILW,
20511
20512 ARM_BUILTIN_WUNPCKEHSB,
20513 ARM_BUILTIN_WUNPCKEHSH,
20514 ARM_BUILTIN_WUNPCKEHSW,
20515 ARM_BUILTIN_WUNPCKEHUB,
20516 ARM_BUILTIN_WUNPCKEHUH,
20517 ARM_BUILTIN_WUNPCKEHUW,
20518 ARM_BUILTIN_WUNPCKELSB,
20519 ARM_BUILTIN_WUNPCKELSH,
20520 ARM_BUILTIN_WUNPCKELSW,
20521 ARM_BUILTIN_WUNPCKELUB,
20522 ARM_BUILTIN_WUNPCKELUH,
20523 ARM_BUILTIN_WUNPCKELUW,
20524
20525 ARM_BUILTIN_WABSB,
20526 ARM_BUILTIN_WABSH,
20527 ARM_BUILTIN_WABSW,
20528
20529 ARM_BUILTIN_WADDSUBHX,
20530 ARM_BUILTIN_WSUBADDHX,
20531
20532 ARM_BUILTIN_WABSDIFFB,
20533 ARM_BUILTIN_WABSDIFFH,
20534 ARM_BUILTIN_WABSDIFFW,
20535
20536 ARM_BUILTIN_WADDCH,
20537 ARM_BUILTIN_WADDCW,
20538
20539 ARM_BUILTIN_WAVG4,
20540 ARM_BUILTIN_WAVG4R,
20541
20542 ARM_BUILTIN_WMADDSX,
20543 ARM_BUILTIN_WMADDUX,
20544
20545 ARM_BUILTIN_WMADDSN,
20546 ARM_BUILTIN_WMADDUN,
20547
20548 ARM_BUILTIN_WMULWSM,
20549 ARM_BUILTIN_WMULWUM,
20550
20551 ARM_BUILTIN_WMULWSMR,
20552 ARM_BUILTIN_WMULWUMR,
20553
20554 ARM_BUILTIN_WMULWL,
20555
20556 ARM_BUILTIN_WMULSMR,
20557 ARM_BUILTIN_WMULUMR,
20558
20559 ARM_BUILTIN_WQMULM,
20560 ARM_BUILTIN_WQMULMR,
20561
20562 ARM_BUILTIN_WQMULWM,
20563 ARM_BUILTIN_WQMULWMR,
20564
20565 ARM_BUILTIN_WADDBHUSM,
20566 ARM_BUILTIN_WADDBHUSL,
20567
20568 ARM_BUILTIN_WQMIABB,
20569 ARM_BUILTIN_WQMIABT,
20570 ARM_BUILTIN_WQMIATB,
20571 ARM_BUILTIN_WQMIATT,
20572
20573 ARM_BUILTIN_WQMIABBN,
20574 ARM_BUILTIN_WQMIABTN,
20575 ARM_BUILTIN_WQMIATBN,
20576 ARM_BUILTIN_WQMIATTN,
20577
20578 ARM_BUILTIN_WMIABB,
20579 ARM_BUILTIN_WMIABT,
20580 ARM_BUILTIN_WMIATB,
20581 ARM_BUILTIN_WMIATT,
20582
20583 ARM_BUILTIN_WMIABBN,
20584 ARM_BUILTIN_WMIABTN,
20585 ARM_BUILTIN_WMIATBN,
20586 ARM_BUILTIN_WMIATTN,
20587
20588 ARM_BUILTIN_WMIAWBB,
20589 ARM_BUILTIN_WMIAWBT,
20590 ARM_BUILTIN_WMIAWTB,
20591 ARM_BUILTIN_WMIAWTT,
20592
20593 ARM_BUILTIN_WMIAWBBN,
20594 ARM_BUILTIN_WMIAWBTN,
20595 ARM_BUILTIN_WMIAWTBN,
20596 ARM_BUILTIN_WMIAWTTN,
20597
20598 ARM_BUILTIN_WMERGE,
20599
20600 #include "arm_neon_builtins.def"
20601
20602 ,ARM_BUILTIN_MAX
20603 };
20604
20605 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
20606
20607 #undef CF
20608 #undef VAR1
20609 #undef VAR2
20610 #undef VAR3
20611 #undef VAR4
20612 #undef VAR5
20613 #undef VAR6
20614 #undef VAR7
20615 #undef VAR8
20616 #undef VAR9
20617 #undef VAR10
20618
20619 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
20620
20621 static void
20622 arm_init_neon_builtins (void)
20623 {
20624 unsigned int i, fcode;
20625 tree decl;
20626
20627 tree neon_intQI_type_node;
20628 tree neon_intHI_type_node;
20629 tree neon_floatHF_type_node;
20630 tree neon_polyQI_type_node;
20631 tree neon_polyHI_type_node;
20632 tree neon_intSI_type_node;
20633 tree neon_intDI_type_node;
20634 tree neon_float_type_node;
20635
20636 tree intQI_pointer_node;
20637 tree intHI_pointer_node;
20638 tree intSI_pointer_node;
20639 tree intDI_pointer_node;
20640 tree float_pointer_node;
20641
20642 tree const_intQI_node;
20643 tree const_intHI_node;
20644 tree const_intSI_node;
20645 tree const_intDI_node;
20646 tree const_float_node;
20647
20648 tree const_intQI_pointer_node;
20649 tree const_intHI_pointer_node;
20650 tree const_intSI_pointer_node;
20651 tree const_intDI_pointer_node;
20652 tree const_float_pointer_node;
20653
20654 tree V8QI_type_node;
20655 tree V4HI_type_node;
20656 tree V4HF_type_node;
20657 tree V2SI_type_node;
20658 tree V2SF_type_node;
20659 tree V16QI_type_node;
20660 tree V8HI_type_node;
20661 tree V4SI_type_node;
20662 tree V4SF_type_node;
20663 tree V2DI_type_node;
20664
20665 tree intUQI_type_node;
20666 tree intUHI_type_node;
20667 tree intUSI_type_node;
20668 tree intUDI_type_node;
20669
20670 tree intEI_type_node;
20671 tree intOI_type_node;
20672 tree intCI_type_node;
20673 tree intXI_type_node;
20674
20675 tree V8QI_pointer_node;
20676 tree V4HI_pointer_node;
20677 tree V2SI_pointer_node;
20678 tree V2SF_pointer_node;
20679 tree V16QI_pointer_node;
20680 tree V8HI_pointer_node;
20681 tree V4SI_pointer_node;
20682 tree V4SF_pointer_node;
20683 tree V2DI_pointer_node;
20684
20685 tree void_ftype_pv8qi_v8qi_v8qi;
20686 tree void_ftype_pv4hi_v4hi_v4hi;
20687 tree void_ftype_pv2si_v2si_v2si;
20688 tree void_ftype_pv2sf_v2sf_v2sf;
20689 tree void_ftype_pdi_di_di;
20690 tree void_ftype_pv16qi_v16qi_v16qi;
20691 tree void_ftype_pv8hi_v8hi_v8hi;
20692 tree void_ftype_pv4si_v4si_v4si;
20693 tree void_ftype_pv4sf_v4sf_v4sf;
20694 tree void_ftype_pv2di_v2di_v2di;
20695
20696 tree reinterp_ftype_dreg[5][5];
20697 tree reinterp_ftype_qreg[5][5];
20698 tree dreg_types[5], qreg_types[5];
20699
20700 /* Create distinguished type nodes for NEON vector element types,
20701 and pointers to values of such types, so we can detect them later. */
20702 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
20703 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
20704 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
20705 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
20706 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
20707 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
20708 neon_float_type_node = make_node (REAL_TYPE);
20709 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
20710 layout_type (neon_float_type_node);
20711 neon_floatHF_type_node = make_node (REAL_TYPE);
20712 TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
20713 layout_type (neon_floatHF_type_node);
20714
20715 /* Define typedefs which exactly correspond to the modes we are basing vector
20716 types on. If you change these names you'll need to change
20717 the table used by arm_mangle_type too. */
20718 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
20719 "__builtin_neon_qi");
20720 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
20721 "__builtin_neon_hi");
20722 (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
20723 "__builtin_neon_hf");
20724 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
20725 "__builtin_neon_si");
20726 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
20727 "__builtin_neon_sf");
20728 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
20729 "__builtin_neon_di");
20730 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
20731 "__builtin_neon_poly8");
20732 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
20733 "__builtin_neon_poly16");
20734
20735 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
20736 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
20737 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
20738 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
20739 float_pointer_node = build_pointer_type (neon_float_type_node);
20740
20741 /* Next create constant-qualified versions of the above types. */
20742 const_intQI_node = build_qualified_type (neon_intQI_type_node,
20743 TYPE_QUAL_CONST);
20744 const_intHI_node = build_qualified_type (neon_intHI_type_node,
20745 TYPE_QUAL_CONST);
20746 const_intSI_node = build_qualified_type (neon_intSI_type_node,
20747 TYPE_QUAL_CONST);
20748 const_intDI_node = build_qualified_type (neon_intDI_type_node,
20749 TYPE_QUAL_CONST);
20750 const_float_node = build_qualified_type (neon_float_type_node,
20751 TYPE_QUAL_CONST);
20752
20753 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
20754 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
20755 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
20756 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
20757 const_float_pointer_node = build_pointer_type (const_float_node);
20758
20759 /* Now create vector types based on our NEON element types. */
20760 /* 64-bit vectors. */
20761 V8QI_type_node =
20762 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
20763 V4HI_type_node =
20764 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
20765 V4HF_type_node =
20766 build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
20767 V2SI_type_node =
20768 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
20769 V2SF_type_node =
20770 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
20771 /* 128-bit vectors. */
20772 V16QI_type_node =
20773 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
20774 V8HI_type_node =
20775 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
20776 V4SI_type_node =
20777 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
20778 V4SF_type_node =
20779 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
20780 V2DI_type_node =
20781 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
20782
20783 /* Unsigned integer types for various mode sizes. */
20784 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
20785 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
20786 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
20787 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
20788
20789 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
20790 "__builtin_neon_uqi");
20791 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
20792 "__builtin_neon_uhi");
20793 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
20794 "__builtin_neon_usi");
20795 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
20796 "__builtin_neon_udi");
20797
20798 /* Opaque integer types for structures of vectors. */
20799 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
20800 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
20801 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
20802 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
20803
20804 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
20805 "__builtin_neon_ti");
20806 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
20807 "__builtin_neon_ei");
20808 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
20809 "__builtin_neon_oi");
20810 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
20811 "__builtin_neon_ci");
20812 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
20813 "__builtin_neon_xi");
20814
20815 /* Pointers to vector types. */
20816 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
20817 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
20818 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
20819 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
20820 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
20821 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
20822 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
20823 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
20824 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
20825
20826 /* Operations which return results as pairs. */
20827 void_ftype_pv8qi_v8qi_v8qi =
20828 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
20829 V8QI_type_node, NULL);
20830 void_ftype_pv4hi_v4hi_v4hi =
20831 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
20832 V4HI_type_node, NULL);
20833 void_ftype_pv2si_v2si_v2si =
20834 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
20835 V2SI_type_node, NULL);
20836 void_ftype_pv2sf_v2sf_v2sf =
20837 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
20838 V2SF_type_node, NULL);
20839 void_ftype_pdi_di_di =
20840 build_function_type_list (void_type_node, intDI_pointer_node,
20841 neon_intDI_type_node, neon_intDI_type_node, NULL);
20842 void_ftype_pv16qi_v16qi_v16qi =
20843 build_function_type_list (void_type_node, V16QI_pointer_node,
20844 V16QI_type_node, V16QI_type_node, NULL);
20845 void_ftype_pv8hi_v8hi_v8hi =
20846 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
20847 V8HI_type_node, NULL);
20848 void_ftype_pv4si_v4si_v4si =
20849 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
20850 V4SI_type_node, NULL);
20851 void_ftype_pv4sf_v4sf_v4sf =
20852 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
20853 V4SF_type_node, NULL);
20854 void_ftype_pv2di_v2di_v2di =
20855 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
20856 V2DI_type_node, NULL);
20857
20858 dreg_types[0] = V8QI_type_node;
20859 dreg_types[1] = V4HI_type_node;
20860 dreg_types[2] = V2SI_type_node;
20861 dreg_types[3] = V2SF_type_node;
20862 dreg_types[4] = neon_intDI_type_node;
20863
20864 qreg_types[0] = V16QI_type_node;
20865 qreg_types[1] = V8HI_type_node;
20866 qreg_types[2] = V4SI_type_node;
20867 qreg_types[3] = V4SF_type_node;
20868 qreg_types[4] = V2DI_type_node;
20869
20870 for (i = 0; i < 5; i++)
20871 {
20872 int j;
20873 for (j = 0; j < 5; j++)
20874 {
20875 reinterp_ftype_dreg[i][j]
20876 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
20877 reinterp_ftype_qreg[i][j]
20878 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
20879 }
20880 }
20881
20882 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
20883 i < ARRAY_SIZE (neon_builtin_data);
20884 i++, fcode++)
20885 {
20886 neon_builtin_datum *d = &neon_builtin_data[i];
20887
20888 const char* const modenames[] = {
20889 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
20890 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
20891 "ti", "ei", "oi"
20892 };
20893 char namebuf[60];
20894 tree ftype = NULL;
20895 int is_load = 0, is_store = 0;
20896
20897 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
20898
20899 d->fcode = fcode;
20900
20901 switch (d->itype)
20902 {
20903 case NEON_LOAD1:
20904 case NEON_LOAD1LANE:
20905 case NEON_LOADSTRUCT:
20906 case NEON_LOADSTRUCTLANE:
20907 is_load = 1;
20908 /* Fall through. */
20909 case NEON_STORE1:
20910 case NEON_STORE1LANE:
20911 case NEON_STORESTRUCT:
20912 case NEON_STORESTRUCTLANE:
20913 if (!is_load)
20914 is_store = 1;
20915 /* Fall through. */
20916 case NEON_UNOP:
20917 case NEON_RINT:
20918 case NEON_BINOP:
20919 case NEON_LOGICBINOP:
20920 case NEON_SHIFTINSERT:
20921 case NEON_TERNOP:
20922 case NEON_GETLANE:
20923 case NEON_SETLANE:
20924 case NEON_CREATE:
20925 case NEON_DUP:
20926 case NEON_DUPLANE:
20927 case NEON_SHIFTIMM:
20928 case NEON_SHIFTACC:
20929 case NEON_COMBINE:
20930 case NEON_SPLIT:
20931 case NEON_CONVERT:
20932 case NEON_FIXCONV:
20933 case NEON_LANEMUL:
20934 case NEON_LANEMULL:
20935 case NEON_LANEMULH:
20936 case NEON_LANEMAC:
20937 case NEON_SCALARMUL:
20938 case NEON_SCALARMULL:
20939 case NEON_SCALARMULH:
20940 case NEON_SCALARMAC:
20941 case NEON_SELECT:
20942 case NEON_VTBL:
20943 case NEON_VTBX:
20944 {
20945 int k;
20946 tree return_type = void_type_node, args = void_list_node;
20947
20948 /* Build a function type directly from the insn_data for
20949 this builtin. The build_function_type() function takes
20950 care of removing duplicates for us. */
20951 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
20952 {
20953 tree eltype;
20954
20955 if (is_load && k == 1)
20956 {
20957 /* Neon load patterns always have the memory
20958 operand in the operand 1 position. */
20959 gcc_assert (insn_data[d->code].operand[k].predicate
20960 == neon_struct_operand);
20961
20962 switch (d->mode)
20963 {
20964 case T_V8QI:
20965 case T_V16QI:
20966 eltype = const_intQI_pointer_node;
20967 break;
20968
20969 case T_V4HI:
20970 case T_V8HI:
20971 eltype = const_intHI_pointer_node;
20972 break;
20973
20974 case T_V2SI:
20975 case T_V4SI:
20976 eltype = const_intSI_pointer_node;
20977 break;
20978
20979 case T_V2SF:
20980 case T_V4SF:
20981 eltype = const_float_pointer_node;
20982 break;
20983
20984 case T_DI:
20985 case T_V2DI:
20986 eltype = const_intDI_pointer_node;
20987 break;
20988
20989 default: gcc_unreachable ();
20990 }
20991 }
20992 else if (is_store && k == 0)
20993 {
20994 /* Similarly, Neon store patterns use operand 0 as
20995 the memory location to store to. */
20996 gcc_assert (insn_data[d->code].operand[k].predicate
20997 == neon_struct_operand);
20998
20999 switch (d->mode)
21000 {
21001 case T_V8QI:
21002 case T_V16QI:
21003 eltype = intQI_pointer_node;
21004 break;
21005
21006 case T_V4HI:
21007 case T_V8HI:
21008 eltype = intHI_pointer_node;
21009 break;
21010
21011 case T_V2SI:
21012 case T_V4SI:
21013 eltype = intSI_pointer_node;
21014 break;
21015
21016 case T_V2SF:
21017 case T_V4SF:
21018 eltype = float_pointer_node;
21019 break;
21020
21021 case T_DI:
21022 case T_V2DI:
21023 eltype = intDI_pointer_node;
21024 break;
21025
21026 default: gcc_unreachable ();
21027 }
21028 }
21029 else
21030 {
21031 switch (insn_data[d->code].operand[k].mode)
21032 {
21033 case VOIDmode: eltype = void_type_node; break;
21034 /* Scalars. */
21035 case QImode: eltype = neon_intQI_type_node; break;
21036 case HImode: eltype = neon_intHI_type_node; break;
21037 case SImode: eltype = neon_intSI_type_node; break;
21038 case SFmode: eltype = neon_float_type_node; break;
21039 case DImode: eltype = neon_intDI_type_node; break;
21040 case TImode: eltype = intTI_type_node; break;
21041 case EImode: eltype = intEI_type_node; break;
21042 case OImode: eltype = intOI_type_node; break;
21043 case CImode: eltype = intCI_type_node; break;
21044 case XImode: eltype = intXI_type_node; break;
21045 /* 64-bit vectors. */
21046 case V8QImode: eltype = V8QI_type_node; break;
21047 case V4HImode: eltype = V4HI_type_node; break;
21048 case V2SImode: eltype = V2SI_type_node; break;
21049 case V2SFmode: eltype = V2SF_type_node; break;
21050 /* 128-bit vectors. */
21051 case V16QImode: eltype = V16QI_type_node; break;
21052 case V8HImode: eltype = V8HI_type_node; break;
21053 case V4SImode: eltype = V4SI_type_node; break;
21054 case V4SFmode: eltype = V4SF_type_node; break;
21055 case V2DImode: eltype = V2DI_type_node; break;
21056 default: gcc_unreachable ();
21057 }
21058 }
21059
21060 if (k == 0 && !is_store)
21061 return_type = eltype;
21062 else
21063 args = tree_cons (NULL_TREE, eltype, args);
21064 }
21065
21066 ftype = build_function_type (return_type, args);
21067 }
21068 break;
21069
21070 case NEON_RESULTPAIR:
21071 {
21072 switch (insn_data[d->code].operand[1].mode)
21073 {
21074 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
21075 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
21076 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
21077 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
21078 case DImode: ftype = void_ftype_pdi_di_di; break;
21079 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
21080 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
21081 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
21082 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
21083 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
21084 default: gcc_unreachable ();
21085 }
21086 }
21087 break;
21088
21089 case NEON_REINTERP:
21090 {
21091 /* We iterate over 5 doubleword types, then 5 quadword
21092 types. V4HF is not a type used in reinterpret, so we translate
21093 d->mode to the correct index in reinterp_ftype_dreg. */
21094 int rhs = (d->mode - ((d->mode > T_V4HF) ? 1 : 0)) % 5;
21095 switch (insn_data[d->code].operand[0].mode)
21096 {
21097 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
21098 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
21099 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
21100 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
21101 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
21102 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
21103 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
21104 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
21105 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
21106 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
21107 default: gcc_unreachable ();
21108 }
21109 }
21110 break;
21111 case NEON_FLOAT_WIDEN:
21112 {
21113 tree eltype = NULL_TREE;
21114 tree return_type = NULL_TREE;
21115
21116 switch (insn_data[d->code].operand[1].mode)
21117 {
21118 case V4HFmode:
21119 eltype = V4HF_type_node;
21120 return_type = V4SF_type_node;
21121 break;
21122 default: gcc_unreachable ();
21123 }
21124 ftype = build_function_type_list (return_type, eltype, NULL);
21125 break;
21126 }
21127 case NEON_FLOAT_NARROW:
21128 {
21129 tree eltype = NULL_TREE;
21130 tree return_type = NULL_TREE;
21131
21132 switch (insn_data[d->code].operand[1].mode)
21133 {
21134 case V4SFmode:
21135 eltype = V4SF_type_node;
21136 return_type = V4HF_type_node;
21137 break;
21138 default: gcc_unreachable ();
21139 }
21140 ftype = build_function_type_list (return_type, eltype, NULL);
21141 break;
21142 }
21143 default:
21144 gcc_unreachable ();
21145 }
21146
21147 gcc_assert (ftype != NULL);
21148
21149 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
21150
21151 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
21152 NULL_TREE);
21153 arm_builtin_decls[fcode] = decl;
21154 }
21155 }
21156
21157 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
21158 do \
21159 { \
21160 if ((MASK) & insn_flags) \
21161 { \
21162 tree bdecl; \
21163 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
21164 BUILT_IN_MD, NULL, NULL_TREE); \
21165 arm_builtin_decls[CODE] = bdecl; \
21166 } \
21167 } \
21168 while (0)
21169
21170 struct builtin_description
21171 {
21172 const unsigned int mask;
21173 const enum insn_code icode;
21174 const char * const name;
21175 const enum arm_builtins code;
21176 const enum rtx_code comparison;
21177 const unsigned int flag;
21178 };
21179
21180 static const struct builtin_description bdesc_2arg[] =
21181 {
21182 #define IWMMXT_BUILTIN(code, string, builtin) \
21183 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
21184 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
21185
21186 #define IWMMXT2_BUILTIN(code, string, builtin) \
21187 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
21188 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
21189
21190 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
21191 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
21192 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
21193 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
21194 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
21195 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
21196 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
21197 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
21198 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
21199 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
21200 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
21201 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
21202 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
21203 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
21204 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
21205 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
21206 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
21207 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
21208 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
21209 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
21210 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
21211 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
21212 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
21213 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
21214 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
21215 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
21216 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
21217 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
21218 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
21219 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
21220 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
21221 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
21222 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
21223 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
21224 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
21225 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
21226 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
21227 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
21228 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
21229 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
21230 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
21231 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
21232 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
21233 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
21234 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
21235 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
21236 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
21237 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
21238 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
21239 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
21240 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
21241 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
21242 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
21243 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
21244 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
21245 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
21246 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
21247 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
21248 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
21249 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
21250 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
21251 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
21252 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
21253 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
21254 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
21255 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
21256 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
21257 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
21258 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
21259 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
21260 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
21261 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
21262 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
21263 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
21264 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
21265 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
21266 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
21267 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
21268
21269 #define IWMMXT_BUILTIN2(code, builtin) \
21270 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
21271
21272 #define IWMMXT2_BUILTIN2(code, builtin) \
21273 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
21274
21275 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
21276 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
21277 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
21278 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
21279 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
21280 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
21281 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
21282 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
21283 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
21284 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
21285 };
21286
21287 static const struct builtin_description bdesc_1arg[] =
21288 {
21289 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
21290 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
21291 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
21292 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
21293 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
21294 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
21295 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
21296 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
21297 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
21298 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
21299 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
21300 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
21301 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
21302 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
21303 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
21304 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
21305 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
21306 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
21307 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
21308 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
21309 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
21310 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
21311 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
21312 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
21313 };
21314
21315 /* Set up all the iWMMXt builtins. This is not called if
21316 TARGET_IWMMXT is zero. */
21317
21318 static void
21319 arm_init_iwmmxt_builtins (void)
21320 {
21321 const struct builtin_description * d;
21322 size_t i;
21323
21324 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
21325 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
21326 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
21327
21328 tree v8qi_ftype_v8qi_v8qi_int
21329 = build_function_type_list (V8QI_type_node,
21330 V8QI_type_node, V8QI_type_node,
21331 integer_type_node, NULL_TREE);
21332 tree v4hi_ftype_v4hi_int
21333 = build_function_type_list (V4HI_type_node,
21334 V4HI_type_node, integer_type_node, NULL_TREE);
21335 tree v2si_ftype_v2si_int
21336 = build_function_type_list (V2SI_type_node,
21337 V2SI_type_node, integer_type_node, NULL_TREE);
21338 tree v2si_ftype_di_di
21339 = build_function_type_list (V2SI_type_node,
21340 long_long_integer_type_node,
21341 long_long_integer_type_node,
21342 NULL_TREE);
21343 tree di_ftype_di_int
21344 = build_function_type_list (long_long_integer_type_node,
21345 long_long_integer_type_node,
21346 integer_type_node, NULL_TREE);
21347 tree di_ftype_di_int_int
21348 = build_function_type_list (long_long_integer_type_node,
21349 long_long_integer_type_node,
21350 integer_type_node,
21351 integer_type_node, NULL_TREE);
21352 tree int_ftype_v8qi
21353 = build_function_type_list (integer_type_node,
21354 V8QI_type_node, NULL_TREE);
21355 tree int_ftype_v4hi
21356 = build_function_type_list (integer_type_node,
21357 V4HI_type_node, NULL_TREE);
21358 tree int_ftype_v2si
21359 = build_function_type_list (integer_type_node,
21360 V2SI_type_node, NULL_TREE);
21361 tree int_ftype_v8qi_int
21362 = build_function_type_list (integer_type_node,
21363 V8QI_type_node, integer_type_node, NULL_TREE);
21364 tree int_ftype_v4hi_int
21365 = build_function_type_list (integer_type_node,
21366 V4HI_type_node, integer_type_node, NULL_TREE);
21367 tree int_ftype_v2si_int
21368 = build_function_type_list (integer_type_node,
21369 V2SI_type_node, integer_type_node, NULL_TREE);
21370 tree v8qi_ftype_v8qi_int_int
21371 = build_function_type_list (V8QI_type_node,
21372 V8QI_type_node, integer_type_node,
21373 integer_type_node, NULL_TREE);
21374 tree v4hi_ftype_v4hi_int_int
21375 = build_function_type_list (V4HI_type_node,
21376 V4HI_type_node, integer_type_node,
21377 integer_type_node, NULL_TREE);
21378 tree v2si_ftype_v2si_int_int
21379 = build_function_type_list (V2SI_type_node,
21380 V2SI_type_node, integer_type_node,
21381 integer_type_node, NULL_TREE);
21382 /* Miscellaneous. */
21383 tree v8qi_ftype_v4hi_v4hi
21384 = build_function_type_list (V8QI_type_node,
21385 V4HI_type_node, V4HI_type_node, NULL_TREE);
21386 tree v4hi_ftype_v2si_v2si
21387 = build_function_type_list (V4HI_type_node,
21388 V2SI_type_node, V2SI_type_node, NULL_TREE);
21389 tree v8qi_ftype_v4hi_v8qi
21390 = build_function_type_list (V8QI_type_node,
21391 V4HI_type_node, V8QI_type_node, NULL_TREE);
21392 tree v2si_ftype_v4hi_v4hi
21393 = build_function_type_list (V2SI_type_node,
21394 V4HI_type_node, V4HI_type_node, NULL_TREE);
21395 tree v2si_ftype_v8qi_v8qi
21396 = build_function_type_list (V2SI_type_node,
21397 V8QI_type_node, V8QI_type_node, NULL_TREE);
21398 tree v4hi_ftype_v4hi_di
21399 = build_function_type_list (V4HI_type_node,
21400 V4HI_type_node, long_long_integer_type_node,
21401 NULL_TREE);
21402 tree v2si_ftype_v2si_di
21403 = build_function_type_list (V2SI_type_node,
21404 V2SI_type_node, long_long_integer_type_node,
21405 NULL_TREE);
21406 tree di_ftype_void
21407 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
21408 tree int_ftype_void
21409 = build_function_type_list (integer_type_node, NULL_TREE);
21410 tree di_ftype_v8qi
21411 = build_function_type_list (long_long_integer_type_node,
21412 V8QI_type_node, NULL_TREE);
21413 tree di_ftype_v4hi
21414 = build_function_type_list (long_long_integer_type_node,
21415 V4HI_type_node, NULL_TREE);
21416 tree di_ftype_v2si
21417 = build_function_type_list (long_long_integer_type_node,
21418 V2SI_type_node, NULL_TREE);
21419 tree v2si_ftype_v4hi
21420 = build_function_type_list (V2SI_type_node,
21421 V4HI_type_node, NULL_TREE);
21422 tree v4hi_ftype_v8qi
21423 = build_function_type_list (V4HI_type_node,
21424 V8QI_type_node, NULL_TREE);
21425 tree v8qi_ftype_v8qi
21426 = build_function_type_list (V8QI_type_node,
21427 V8QI_type_node, NULL_TREE);
21428 tree v4hi_ftype_v4hi
21429 = build_function_type_list (V4HI_type_node,
21430 V4HI_type_node, NULL_TREE);
21431 tree v2si_ftype_v2si
21432 = build_function_type_list (V2SI_type_node,
21433 V2SI_type_node, NULL_TREE);
21434
21435 tree di_ftype_di_v4hi_v4hi
21436 = build_function_type_list (long_long_unsigned_type_node,
21437 long_long_unsigned_type_node,
21438 V4HI_type_node, V4HI_type_node,
21439 NULL_TREE);
21440
21441 tree di_ftype_v4hi_v4hi
21442 = build_function_type_list (long_long_unsigned_type_node,
21443 V4HI_type_node,V4HI_type_node,
21444 NULL_TREE);
21445
21446 tree v2si_ftype_v2si_v4hi_v4hi
21447 = build_function_type_list (V2SI_type_node,
21448 V2SI_type_node, V4HI_type_node,
21449 V4HI_type_node, NULL_TREE);
21450
21451 tree v2si_ftype_v2si_v8qi_v8qi
21452 = build_function_type_list (V2SI_type_node,
21453 V2SI_type_node, V8QI_type_node,
21454 V8QI_type_node, NULL_TREE);
21455
21456 tree di_ftype_di_v2si_v2si
21457 = build_function_type_list (long_long_unsigned_type_node,
21458 long_long_unsigned_type_node,
21459 V2SI_type_node, V2SI_type_node,
21460 NULL_TREE);
21461
21462 tree di_ftype_di_di_int
21463 = build_function_type_list (long_long_unsigned_type_node,
21464 long_long_unsigned_type_node,
21465 long_long_unsigned_type_node,
21466 integer_type_node, NULL_TREE);
21467
21468 tree void_ftype_int
21469 = build_function_type_list (void_type_node,
21470 integer_type_node, NULL_TREE);
21471
21472 tree v8qi_ftype_char
21473 = build_function_type_list (V8QI_type_node,
21474 signed_char_type_node, NULL_TREE);
21475
21476 tree v4hi_ftype_short
21477 = build_function_type_list (V4HI_type_node,
21478 short_integer_type_node, NULL_TREE);
21479
21480 tree v2si_ftype_int
21481 = build_function_type_list (V2SI_type_node,
21482 integer_type_node, NULL_TREE);
21483
21484 /* Normal vector binops. */
21485 tree v8qi_ftype_v8qi_v8qi
21486 = build_function_type_list (V8QI_type_node,
21487 V8QI_type_node, V8QI_type_node, NULL_TREE);
21488 tree v4hi_ftype_v4hi_v4hi
21489 = build_function_type_list (V4HI_type_node,
21490 V4HI_type_node,V4HI_type_node, NULL_TREE);
21491 tree v2si_ftype_v2si_v2si
21492 = build_function_type_list (V2SI_type_node,
21493 V2SI_type_node, V2SI_type_node, NULL_TREE);
21494 tree di_ftype_di_di
21495 = build_function_type_list (long_long_unsigned_type_node,
21496 long_long_unsigned_type_node,
21497 long_long_unsigned_type_node,
21498 NULL_TREE);
21499
21500 /* Add all builtins that are more or less simple operations on two
21501 operands. */
21502 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21503 {
21504 /* Use one of the operands; the target can have a different mode for
21505 mask-generating compares. */
21506 enum machine_mode mode;
21507 tree type;
21508
21509 if (d->name == 0)
21510 continue;
21511
21512 mode = insn_data[d->icode].operand[1].mode;
21513
21514 switch (mode)
21515 {
21516 case V8QImode:
21517 type = v8qi_ftype_v8qi_v8qi;
21518 break;
21519 case V4HImode:
21520 type = v4hi_ftype_v4hi_v4hi;
21521 break;
21522 case V2SImode:
21523 type = v2si_ftype_v2si_v2si;
21524 break;
21525 case DImode:
21526 type = di_ftype_di_di;
21527 break;
21528
21529 default:
21530 gcc_unreachable ();
21531 }
21532
21533 def_mbuiltin (d->mask, d->name, type, d->code);
21534 }
21535
21536 /* Add the remaining MMX insns with somewhat more complicated types. */
21537 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
21538 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
21539 ARM_BUILTIN_ ## CODE)
21540
21541 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
21542 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
21543 ARM_BUILTIN_ ## CODE)
21544
21545 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
21546 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
21547 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
21548 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
21549 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
21550 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
21551 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
21552 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
21553 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
21554
21555 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
21556 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
21557 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
21558 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
21559 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
21560 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
21561
21562 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
21563 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
21564 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
21565 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
21566 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
21567 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
21568
21569 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
21570 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
21571 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
21572 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
21573 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
21574 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
21575
21576 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
21577 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
21578 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
21579 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
21580 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
21581 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
21582
21583 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
21584
21585 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
21586 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
21587 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
21588 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
21589 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
21590 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
21591 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
21592 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
21593 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
21594 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
21595
21596 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
21597 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
21598 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
21599 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
21600 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
21601 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
21602 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
21603 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
21604 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
21605
21606 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
21607 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
21608 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
21609
21610 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
21611 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
21612 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
21613
21614 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
21615 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
21616
21617 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
21618 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
21619 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
21620 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
21621 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
21622 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
21623
21624 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
21625 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
21626 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
21627 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
21628 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
21629 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
21630 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
21631 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
21632 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
21633 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
21634 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
21635 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
21636
21637 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
21638 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
21639 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
21640 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
21641
21642 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
21643 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
21644 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
21645 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
21646 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
21647 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
21648 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
21649
21650 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
21651 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
21652 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
21653
21654 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
21655 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
21656 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
21657 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
21658
21659 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
21660 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
21661 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
21662 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
21663
21664 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
21665 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
21666 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
21667 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
21668
21669 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
21670 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
21671 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
21672 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
21673
21674 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
21675 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
21676 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
21677 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
21678
21679 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
21680 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
21681 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
21682 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
21683
21684 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
21685
21686 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
21687 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
21688 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
21689
21690 #undef iwmmx_mbuiltin
21691 #undef iwmmx2_mbuiltin
21692 }
21693
21694 static void
21695 arm_init_fp16_builtins (void)
21696 {
21697 tree fp16_type = make_node (REAL_TYPE);
21698 TYPE_PRECISION (fp16_type) = 16;
21699 layout_type (fp16_type);
21700 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
21701 }
21702
21703 static void
21704 arm_init_builtins (void)
21705 {
21706 if (TARGET_REALLY_IWMMXT)
21707 arm_init_iwmmxt_builtins ();
21708
21709 if (TARGET_NEON)
21710 arm_init_neon_builtins ();
21711
21712 if (arm_fp16_format)
21713 arm_init_fp16_builtins ();
21714 }
21715
21716 /* Return the ARM builtin for CODE. */
21717
21718 static tree
21719 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
21720 {
21721 if (code >= ARM_BUILTIN_MAX)
21722 return error_mark_node;
21723
21724 return arm_builtin_decls[code];
21725 }
21726
21727 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
21728
21729 static const char *
21730 arm_invalid_parameter_type (const_tree t)
21731 {
21732 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
21733 return N_("function parameters cannot have __fp16 type");
21734 return NULL;
21735 }
21736
21737 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
21738
21739 static const char *
21740 arm_invalid_return_type (const_tree t)
21741 {
21742 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
21743 return N_("functions cannot return __fp16 type");
21744 return NULL;
21745 }
21746
21747 /* Implement TARGET_PROMOTED_TYPE. */
21748
21749 static tree
21750 arm_promoted_type (const_tree t)
21751 {
21752 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
21753 return float_type_node;
21754 return NULL_TREE;
21755 }
21756
21757 /* Implement TARGET_CONVERT_TO_TYPE.
21758 Specifically, this hook implements the peculiarity of the ARM
21759 half-precision floating-point C semantics that requires conversions between
21760 __fp16 to or from double to do an intermediate conversion to float. */
21761
21762 static tree
21763 arm_convert_to_type (tree type, tree expr)
21764 {
21765 tree fromtype = TREE_TYPE (expr);
21766 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
21767 return NULL_TREE;
21768 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
21769 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
21770 return convert (type, convert (float_type_node, expr));
21771 return NULL_TREE;
21772 }
21773
21774 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
21775 This simply adds HFmode as a supported mode; even though we don't
21776 implement arithmetic on this type directly, it's supported by
21777 optabs conversions, much the way the double-word arithmetic is
21778 special-cased in the default hook. */
21779
21780 static bool
21781 arm_scalar_mode_supported_p (enum machine_mode mode)
21782 {
21783 if (mode == HFmode)
21784 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
21785 else if (ALL_FIXED_POINT_MODE_P (mode))
21786 return true;
21787 else
21788 return default_scalar_mode_supported_p (mode);
21789 }
21790
21791 /* Errors in the source file can cause expand_expr to return const0_rtx
21792 where we expect a vector. To avoid crashing, use one of the vector
21793 clear instructions. */
21794
21795 static rtx
21796 safe_vector_operand (rtx x, enum machine_mode mode)
21797 {
21798 if (x != const0_rtx)
21799 return x;
21800 x = gen_reg_rtx (mode);
21801
21802 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
21803 : gen_rtx_SUBREG (DImode, x, 0)));
21804 return x;
21805 }
21806
21807 /* Subroutine of arm_expand_builtin to take care of binop insns. */
21808
21809 static rtx
21810 arm_expand_binop_builtin (enum insn_code icode,
21811 tree exp, rtx target)
21812 {
21813 rtx pat;
21814 tree arg0 = CALL_EXPR_ARG (exp, 0);
21815 tree arg1 = CALL_EXPR_ARG (exp, 1);
21816 rtx op0 = expand_normal (arg0);
21817 rtx op1 = expand_normal (arg1);
21818 enum machine_mode tmode = insn_data[icode].operand[0].mode;
21819 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
21820 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
21821
21822 if (VECTOR_MODE_P (mode0))
21823 op0 = safe_vector_operand (op0, mode0);
21824 if (VECTOR_MODE_P (mode1))
21825 op1 = safe_vector_operand (op1, mode1);
21826
21827 if (! target
21828 || GET_MODE (target) != tmode
21829 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21830 target = gen_reg_rtx (tmode);
21831
21832 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
21833 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
21834
21835 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21836 op0 = copy_to_mode_reg (mode0, op0);
21837 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21838 op1 = copy_to_mode_reg (mode1, op1);
21839
21840 pat = GEN_FCN (icode) (target, op0, op1);
21841 if (! pat)
21842 return 0;
21843 emit_insn (pat);
21844 return target;
21845 }
21846
21847 /* Subroutine of arm_expand_builtin to take care of unop insns. */
21848
21849 static rtx
21850 arm_expand_unop_builtin (enum insn_code icode,
21851 tree exp, rtx target, int do_load)
21852 {
21853 rtx pat;
21854 tree arg0 = CALL_EXPR_ARG (exp, 0);
21855 rtx op0 = expand_normal (arg0);
21856 enum machine_mode tmode = insn_data[icode].operand[0].mode;
21857 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
21858
21859 if (! target
21860 || GET_MODE (target) != tmode
21861 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21862 target = gen_reg_rtx (tmode);
21863 if (do_load)
21864 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
21865 else
21866 {
21867 if (VECTOR_MODE_P (mode0))
21868 op0 = safe_vector_operand (op0, mode0);
21869
21870 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21871 op0 = copy_to_mode_reg (mode0, op0);
21872 }
21873
21874 pat = GEN_FCN (icode) (target, op0);
21875 if (! pat)
21876 return 0;
21877 emit_insn (pat);
21878 return target;
21879 }
21880
21881 typedef enum {
21882 NEON_ARG_COPY_TO_REG,
21883 NEON_ARG_CONSTANT,
21884 NEON_ARG_MEMORY,
21885 NEON_ARG_STOP
21886 } builtin_arg;
21887
21888 #define NEON_MAX_BUILTIN_ARGS 5
21889
21890 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
21891 and return an expression for the accessed memory.
21892
21893 The intrinsic function operates on a block of registers that has
21894 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
21895 function references the memory at EXP of type TYPE and in mode
21896 MEM_MODE; this mode may be BLKmode if no more suitable mode is
21897 available. */
21898
21899 static tree
21900 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
21901 enum machine_mode reg_mode,
21902 neon_builtin_type_mode type_mode)
21903 {
21904 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
21905 tree elem_type, upper_bound, array_type;
21906
21907 /* Work out the size of the register block in bytes. */
21908 reg_size = GET_MODE_SIZE (reg_mode);
21909
21910 /* Work out the size of each vector in bytes. */
21911 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
21912 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
21913
21914 /* Work out how many vectors there are. */
21915 gcc_assert (reg_size % vector_size == 0);
21916 nvectors = reg_size / vector_size;
21917
21918 /* Work out the type of each element. */
21919 gcc_assert (POINTER_TYPE_P (type));
21920 elem_type = TREE_TYPE (type);
21921
21922 /* Work out how many elements are being loaded or stored.
21923 MEM_MODE == REG_MODE implies a one-to-one mapping between register
21924 and memory elements; anything else implies a lane load or store. */
21925 if (mem_mode == reg_mode)
21926 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
21927 else
21928 nelems = nvectors;
21929
21930 /* Create a type that describes the full access. */
21931 upper_bound = build_int_cst (size_type_node, nelems - 1);
21932 array_type = build_array_type (elem_type, build_index_type (upper_bound));
21933
21934 /* Dereference EXP using that type. */
21935 return fold_build2 (MEM_REF, array_type, exp,
21936 build_int_cst (build_pointer_type (array_type), 0));
21937 }
21938
21939 /* Expand a Neon builtin. */
21940 static rtx
21941 arm_expand_neon_args (rtx target, int icode, int have_retval,
21942 neon_builtin_type_mode type_mode,
21943 tree exp, int fcode, ...)
21944 {
21945 va_list ap;
21946 rtx pat;
21947 tree arg[NEON_MAX_BUILTIN_ARGS];
21948 rtx op[NEON_MAX_BUILTIN_ARGS];
21949 tree arg_type;
21950 tree formals;
21951 enum machine_mode tmode = insn_data[icode].operand[0].mode;
21952 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
21953 enum machine_mode other_mode;
21954 int argc = 0;
21955 int opno;
21956
21957 if (have_retval
21958 && (!target
21959 || GET_MODE (target) != tmode
21960 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
21961 target = gen_reg_rtx (tmode);
21962
21963 va_start (ap, fcode);
21964
21965 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
21966
21967 for (;;)
21968 {
21969 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
21970
21971 if (thisarg == NEON_ARG_STOP)
21972 break;
21973 else
21974 {
21975 opno = argc + have_retval;
21976 mode[argc] = insn_data[icode].operand[opno].mode;
21977 arg[argc] = CALL_EXPR_ARG (exp, argc);
21978 arg_type = TREE_VALUE (formals);
21979 if (thisarg == NEON_ARG_MEMORY)
21980 {
21981 other_mode = insn_data[icode].operand[1 - opno].mode;
21982 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
21983 mode[argc], other_mode,
21984 type_mode);
21985 }
21986
21987 op[argc] = expand_normal (arg[argc]);
21988
21989 switch (thisarg)
21990 {
21991 case NEON_ARG_COPY_TO_REG:
21992 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
21993 if (!(*insn_data[icode].operand[opno].predicate)
21994 (op[argc], mode[argc]))
21995 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
21996 break;
21997
21998 case NEON_ARG_CONSTANT:
21999 /* FIXME: This error message is somewhat unhelpful. */
22000 if (!(*insn_data[icode].operand[opno].predicate)
22001 (op[argc], mode[argc]))
22002 error ("argument must be a constant");
22003 break;
22004
22005 case NEON_ARG_MEMORY:
22006 gcc_assert (MEM_P (op[argc]));
22007 PUT_MODE (op[argc], mode[argc]);
22008 /* ??? arm_neon.h uses the same built-in functions for signed
22009 and unsigned accesses, casting where necessary. This isn't
22010 alias safe. */
22011 set_mem_alias_set (op[argc], 0);
22012 if (!(*insn_data[icode].operand[opno].predicate)
22013 (op[argc], mode[argc]))
22014 op[argc] = (replace_equiv_address
22015 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
22016 break;
22017
22018 case NEON_ARG_STOP:
22019 gcc_unreachable ();
22020 }
22021
22022 argc++;
22023 formals = TREE_CHAIN (formals);
22024 }
22025 }
22026
22027 va_end (ap);
22028
22029 if (have_retval)
22030 switch (argc)
22031 {
22032 case 1:
22033 pat = GEN_FCN (icode) (target, op[0]);
22034 break;
22035
22036 case 2:
22037 pat = GEN_FCN (icode) (target, op[0], op[1]);
22038 break;
22039
22040 case 3:
22041 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
22042 break;
22043
22044 case 4:
22045 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
22046 break;
22047
22048 case 5:
22049 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
22050 break;
22051
22052 default:
22053 gcc_unreachable ();
22054 }
22055 else
22056 switch (argc)
22057 {
22058 case 1:
22059 pat = GEN_FCN (icode) (op[0]);
22060 break;
22061
22062 case 2:
22063 pat = GEN_FCN (icode) (op[0], op[1]);
22064 break;
22065
22066 case 3:
22067 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
22068 break;
22069
22070 case 4:
22071 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
22072 break;
22073
22074 case 5:
22075 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
22076 break;
22077
22078 default:
22079 gcc_unreachable ();
22080 }
22081
22082 if (!pat)
22083 return 0;
22084
22085 emit_insn (pat);
22086
22087 return target;
22088 }
22089
22090 /* Expand a Neon builtin. These are "special" because they don't have symbolic
22091 constants defined per-instruction or per instruction-variant. Instead, the
22092 required info is looked up in the table neon_builtin_data. */
22093 static rtx
22094 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
22095 {
22096 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
22097 neon_itype itype = d->itype;
22098 enum insn_code icode = d->code;
22099 neon_builtin_type_mode type_mode = d->mode;
22100
22101 switch (itype)
22102 {
22103 case NEON_UNOP:
22104 case NEON_CONVERT:
22105 case NEON_DUPLANE:
22106 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22107 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
22108
22109 case NEON_BINOP:
22110 case NEON_SETLANE:
22111 case NEON_SCALARMUL:
22112 case NEON_SCALARMULL:
22113 case NEON_SCALARMULH:
22114 case NEON_SHIFTINSERT:
22115 case NEON_LOGICBINOP:
22116 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22117 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
22118 NEON_ARG_STOP);
22119
22120 case NEON_TERNOP:
22121 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22122 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
22123 NEON_ARG_CONSTANT, NEON_ARG_STOP);
22124
22125 case NEON_GETLANE:
22126 case NEON_FIXCONV:
22127 case NEON_SHIFTIMM:
22128 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22129 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
22130 NEON_ARG_STOP);
22131
22132 case NEON_CREATE:
22133 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22134 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
22135
22136 case NEON_DUP:
22137 case NEON_RINT:
22138 case NEON_SPLIT:
22139 case NEON_FLOAT_WIDEN:
22140 case NEON_FLOAT_NARROW:
22141 case NEON_REINTERP:
22142 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22143 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
22144
22145 case NEON_COMBINE:
22146 case NEON_VTBL:
22147 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22148 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
22149
22150 case NEON_RESULTPAIR:
22151 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
22152 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
22153 NEON_ARG_STOP);
22154
22155 case NEON_LANEMUL:
22156 case NEON_LANEMULL:
22157 case NEON_LANEMULH:
22158 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22159 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
22160 NEON_ARG_CONSTANT, NEON_ARG_STOP);
22161
22162 case NEON_LANEMAC:
22163 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22164 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
22165 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
22166
22167 case NEON_SHIFTACC:
22168 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22169 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
22170 NEON_ARG_CONSTANT, NEON_ARG_STOP);
22171
22172 case NEON_SCALARMAC:
22173 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22174 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
22175 NEON_ARG_CONSTANT, NEON_ARG_STOP);
22176
22177 case NEON_SELECT:
22178 case NEON_VTBX:
22179 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22180 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
22181 NEON_ARG_STOP);
22182
22183 case NEON_LOAD1:
22184 case NEON_LOADSTRUCT:
22185 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22186 NEON_ARG_MEMORY, NEON_ARG_STOP);
22187
22188 case NEON_LOAD1LANE:
22189 case NEON_LOADSTRUCTLANE:
22190 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22191 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
22192 NEON_ARG_STOP);
22193
22194 case NEON_STORE1:
22195 case NEON_STORESTRUCT:
22196 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
22197 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
22198
22199 case NEON_STORE1LANE:
22200 case NEON_STORESTRUCTLANE:
22201 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
22202 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
22203 NEON_ARG_STOP);
22204 }
22205
22206 gcc_unreachable ();
22207 }
22208
22209 /* Emit code to reinterpret one Neon type as another, without altering bits. */
22210 void
22211 neon_reinterpret (rtx dest, rtx src)
22212 {
22213 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
22214 }
22215
22216 /* Emit code to place a Neon pair result in memory locations (with equal
22217 registers). */
22218 void
22219 neon_emit_pair_result_insn (enum machine_mode mode,
22220 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
22221 rtx op1, rtx op2)
22222 {
22223 rtx mem = gen_rtx_MEM (mode, destaddr);
22224 rtx tmp1 = gen_reg_rtx (mode);
22225 rtx tmp2 = gen_reg_rtx (mode);
22226
22227 emit_insn (intfn (tmp1, op1, op2, tmp2));
22228
22229 emit_move_insn (mem, tmp1);
22230 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
22231 emit_move_insn (mem, tmp2);
22232 }
22233
22234 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
22235 not to early-clobber SRC registers in the process.
22236
22237 We assume that the operands described by SRC and DEST represent a
22238 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
22239 number of components into which the copy has been decomposed. */
22240 void
22241 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
22242 {
22243 unsigned int i;
22244
22245 if (!reg_overlap_mentioned_p (operands[0], operands[1])
22246 || REGNO (operands[0]) < REGNO (operands[1]))
22247 {
22248 for (i = 0; i < count; i++)
22249 {
22250 operands[2 * i] = dest[i];
22251 operands[2 * i + 1] = src[i];
22252 }
22253 }
22254 else
22255 {
22256 for (i = 0; i < count; i++)
22257 {
22258 operands[2 * i] = dest[count - i - 1];
22259 operands[2 * i + 1] = src[count - i - 1];
22260 }
22261 }
22262 }
22263
22264 /* Split operands into moves from op[1] + op[2] into op[0]. */
22265
22266 void
22267 neon_split_vcombine (rtx operands[3])
22268 {
22269 unsigned int dest = REGNO (operands[0]);
22270 unsigned int src1 = REGNO (operands[1]);
22271 unsigned int src2 = REGNO (operands[2]);
22272 enum machine_mode halfmode = GET_MODE (operands[1]);
22273 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
22274 rtx destlo, desthi;
22275
22276 if (src1 == dest && src2 == dest + halfregs)
22277 {
22278 /* No-op move. Can't split to nothing; emit something. */
22279 emit_note (NOTE_INSN_DELETED);
22280 return;
22281 }
22282
22283 /* Preserve register attributes for variable tracking. */
22284 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
22285 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
22286 GET_MODE_SIZE (halfmode));
22287
22288 /* Special case of reversed high/low parts. Use VSWP. */
22289 if (src2 == dest && src1 == dest + halfregs)
22290 {
22291 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
22292 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
22293 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
22294 return;
22295 }
22296
22297 if (!reg_overlap_mentioned_p (operands[2], destlo))
22298 {
22299 /* Try to avoid unnecessary moves if part of the result
22300 is in the right place already. */
22301 if (src1 != dest)
22302 emit_move_insn (destlo, operands[1]);
22303 if (src2 != dest + halfregs)
22304 emit_move_insn (desthi, operands[2]);
22305 }
22306 else
22307 {
22308 if (src2 != dest + halfregs)
22309 emit_move_insn (desthi, operands[2]);
22310 if (src1 != dest)
22311 emit_move_insn (destlo, operands[1]);
22312 }
22313 }
22314
22315 /* Expand an expression EXP that calls a built-in function,
22316 with result going to TARGET if that's convenient
22317 (and in mode MODE if that's convenient).
22318 SUBTARGET may be used as the target for computing one of EXP's operands.
22319 IGNORE is nonzero if the value is to be ignored. */
22320
22321 static rtx
22322 arm_expand_builtin (tree exp,
22323 rtx target,
22324 rtx subtarget ATTRIBUTE_UNUSED,
22325 enum machine_mode mode ATTRIBUTE_UNUSED,
22326 int ignore ATTRIBUTE_UNUSED)
22327 {
22328 const struct builtin_description * d;
22329 enum insn_code icode;
22330 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
22331 tree arg0;
22332 tree arg1;
22333 tree arg2;
22334 rtx op0;
22335 rtx op1;
22336 rtx op2;
22337 rtx pat;
22338 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
22339 size_t i;
22340 enum machine_mode tmode;
22341 enum machine_mode mode0;
22342 enum machine_mode mode1;
22343 enum machine_mode mode2;
22344 int opint;
22345 int selector;
22346 int mask;
22347 int imm;
22348
22349 if (fcode >= ARM_BUILTIN_NEON_BASE)
22350 return arm_expand_neon_builtin (fcode, exp, target);
22351
22352 switch (fcode)
22353 {
22354 case ARM_BUILTIN_TEXTRMSB:
22355 case ARM_BUILTIN_TEXTRMUB:
22356 case ARM_BUILTIN_TEXTRMSH:
22357 case ARM_BUILTIN_TEXTRMUH:
22358 case ARM_BUILTIN_TEXTRMSW:
22359 case ARM_BUILTIN_TEXTRMUW:
22360 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
22361 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
22362 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
22363 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
22364 : CODE_FOR_iwmmxt_textrmw);
22365
22366 arg0 = CALL_EXPR_ARG (exp, 0);
22367 arg1 = CALL_EXPR_ARG (exp, 1);
22368 op0 = expand_normal (arg0);
22369 op1 = expand_normal (arg1);
22370 tmode = insn_data[icode].operand[0].mode;
22371 mode0 = insn_data[icode].operand[1].mode;
22372 mode1 = insn_data[icode].operand[2].mode;
22373
22374 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
22375 op0 = copy_to_mode_reg (mode0, op0);
22376 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
22377 {
22378 /* @@@ better error message */
22379 error ("selector must be an immediate");
22380 return gen_reg_rtx (tmode);
22381 }
22382
22383 opint = INTVAL (op1);
22384 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
22385 {
22386 if (opint > 7 || opint < 0)
22387 error ("the range of selector should be in 0 to 7");
22388 }
22389 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
22390 {
22391 if (opint > 3 || opint < 0)
22392 error ("the range of selector should be in 0 to 3");
22393 }
22394 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
22395 {
22396 if (opint > 1 || opint < 0)
22397 error ("the range of selector should be in 0 to 1");
22398 }
22399
22400 if (target == 0
22401 || GET_MODE (target) != tmode
22402 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22403 target = gen_reg_rtx (tmode);
22404 pat = GEN_FCN (icode) (target, op0, op1);
22405 if (! pat)
22406 return 0;
22407 emit_insn (pat);
22408 return target;
22409
22410 case ARM_BUILTIN_WALIGNI:
22411 /* If op2 is immediate, call walighi, else call walighr. */
22412 arg0 = CALL_EXPR_ARG (exp, 0);
22413 arg1 = CALL_EXPR_ARG (exp, 1);
22414 arg2 = CALL_EXPR_ARG (exp, 2);
22415 op0 = expand_normal (arg0);
22416 op1 = expand_normal (arg1);
22417 op2 = expand_normal (arg2);
22418 if (CONST_INT_P (op2))
22419 {
22420 icode = CODE_FOR_iwmmxt_waligni;
22421 tmode = insn_data[icode].operand[0].mode;
22422 mode0 = insn_data[icode].operand[1].mode;
22423 mode1 = insn_data[icode].operand[2].mode;
22424 mode2 = insn_data[icode].operand[3].mode;
22425 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
22426 op0 = copy_to_mode_reg (mode0, op0);
22427 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
22428 op1 = copy_to_mode_reg (mode1, op1);
22429 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
22430 selector = INTVAL (op2);
22431 if (selector > 7 || selector < 0)
22432 error ("the range of selector should be in 0 to 7");
22433 }
22434 else
22435 {
22436 icode = CODE_FOR_iwmmxt_walignr;
22437 tmode = insn_data[icode].operand[0].mode;
22438 mode0 = insn_data[icode].operand[1].mode;
22439 mode1 = insn_data[icode].operand[2].mode;
22440 mode2 = insn_data[icode].operand[3].mode;
22441 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
22442 op0 = copy_to_mode_reg (mode0, op0);
22443 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
22444 op1 = copy_to_mode_reg (mode1, op1);
22445 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
22446 op2 = copy_to_mode_reg (mode2, op2);
22447 }
22448 if (target == 0
22449 || GET_MODE (target) != tmode
22450 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
22451 target = gen_reg_rtx (tmode);
22452 pat = GEN_FCN (icode) (target, op0, op1, op2);
22453 if (!pat)
22454 return 0;
22455 emit_insn (pat);
22456 return target;
22457
22458 case ARM_BUILTIN_TINSRB:
22459 case ARM_BUILTIN_TINSRH:
22460 case ARM_BUILTIN_TINSRW:
22461 case ARM_BUILTIN_WMERGE:
22462 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
22463 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
22464 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
22465 : CODE_FOR_iwmmxt_tinsrw);
22466 arg0 = CALL_EXPR_ARG (exp, 0);
22467 arg1 = CALL_EXPR_ARG (exp, 1);
22468 arg2 = CALL_EXPR_ARG (exp, 2);
22469 op0 = expand_normal (arg0);
22470 op1 = expand_normal (arg1);
22471 op2 = expand_normal (arg2);
22472 tmode = insn_data[icode].operand[0].mode;
22473 mode0 = insn_data[icode].operand[1].mode;
22474 mode1 = insn_data[icode].operand[2].mode;
22475 mode2 = insn_data[icode].operand[3].mode;
22476
22477 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
22478 op0 = copy_to_mode_reg (mode0, op0);
22479 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
22480 op1 = copy_to_mode_reg (mode1, op1);
22481 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
22482 {
22483 error ("selector must be an immediate");
22484 return const0_rtx;
22485 }
22486 if (icode == CODE_FOR_iwmmxt_wmerge)
22487 {
22488 selector = INTVAL (op2);
22489 if (selector > 7 || selector < 0)
22490 error ("the range of selector should be in 0 to 7");
22491 }
22492 if ((icode == CODE_FOR_iwmmxt_tinsrb)
22493 || (icode == CODE_FOR_iwmmxt_tinsrh)
22494 || (icode == CODE_FOR_iwmmxt_tinsrw))
22495 {
22496 mask = 0x01;
22497 selector= INTVAL (op2);
22498 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
22499 error ("the range of selector should be in 0 to 7");
22500 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
22501 error ("the range of selector should be in 0 to 3");
22502 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
22503 error ("the range of selector should be in 0 to 1");
22504 mask <<= selector;
22505 op2 = GEN_INT (mask);
22506 }
22507 if (target == 0
22508 || GET_MODE (target) != tmode
22509 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22510 target = gen_reg_rtx (tmode);
22511 pat = GEN_FCN (icode) (target, op0, op1, op2);
22512 if (! pat)
22513 return 0;
22514 emit_insn (pat);
22515 return target;
22516
22517 case ARM_BUILTIN_SETWCGR0:
22518 case ARM_BUILTIN_SETWCGR1:
22519 case ARM_BUILTIN_SETWCGR2:
22520 case ARM_BUILTIN_SETWCGR3:
22521 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
22522 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
22523 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
22524 : CODE_FOR_iwmmxt_setwcgr3);
22525 arg0 = CALL_EXPR_ARG (exp, 0);
22526 op0 = expand_normal (arg0);
22527 mode0 = insn_data[icode].operand[0].mode;
22528 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
22529 op0 = copy_to_mode_reg (mode0, op0);
22530 pat = GEN_FCN (icode) (op0);
22531 if (!pat)
22532 return 0;
22533 emit_insn (pat);
22534 return 0;
22535
22536 case ARM_BUILTIN_GETWCGR0:
22537 case ARM_BUILTIN_GETWCGR1:
22538 case ARM_BUILTIN_GETWCGR2:
22539 case ARM_BUILTIN_GETWCGR3:
22540 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
22541 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
22542 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
22543 : CODE_FOR_iwmmxt_getwcgr3);
22544 tmode = insn_data[icode].operand[0].mode;
22545 if (target == 0
22546 || GET_MODE (target) != tmode
22547 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
22548 target = gen_reg_rtx (tmode);
22549 pat = GEN_FCN (icode) (target);
22550 if (!pat)
22551 return 0;
22552 emit_insn (pat);
22553 return target;
22554
22555 case ARM_BUILTIN_WSHUFH:
22556 icode = CODE_FOR_iwmmxt_wshufh;
22557 arg0 = CALL_EXPR_ARG (exp, 0);
22558 arg1 = CALL_EXPR_ARG (exp, 1);
22559 op0 = expand_normal (arg0);
22560 op1 = expand_normal (arg1);
22561 tmode = insn_data[icode].operand[0].mode;
22562 mode1 = insn_data[icode].operand[1].mode;
22563 mode2 = insn_data[icode].operand[2].mode;
22564
22565 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
22566 op0 = copy_to_mode_reg (mode1, op0);
22567 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
22568 {
22569 error ("mask must be an immediate");
22570 return const0_rtx;
22571 }
22572 selector = INTVAL (op1);
22573 if (selector < 0 || selector > 255)
22574 error ("the range of mask should be in 0 to 255");
22575 if (target == 0
22576 || GET_MODE (target) != tmode
22577 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22578 target = gen_reg_rtx (tmode);
22579 pat = GEN_FCN (icode) (target, op0, op1);
22580 if (! pat)
22581 return 0;
22582 emit_insn (pat);
22583 return target;
22584
22585 case ARM_BUILTIN_WMADDS:
22586 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
22587 case ARM_BUILTIN_WMADDSX:
22588 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
22589 case ARM_BUILTIN_WMADDSN:
22590 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
22591 case ARM_BUILTIN_WMADDU:
22592 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
22593 case ARM_BUILTIN_WMADDUX:
22594 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
22595 case ARM_BUILTIN_WMADDUN:
22596 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
22597 case ARM_BUILTIN_WSADBZ:
22598 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
22599 case ARM_BUILTIN_WSADHZ:
22600 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
22601
22602 /* Several three-argument builtins. */
22603 case ARM_BUILTIN_WMACS:
22604 case ARM_BUILTIN_WMACU:
22605 case ARM_BUILTIN_TMIA:
22606 case ARM_BUILTIN_TMIAPH:
22607 case ARM_BUILTIN_TMIATT:
22608 case ARM_BUILTIN_TMIATB:
22609 case ARM_BUILTIN_TMIABT:
22610 case ARM_BUILTIN_TMIABB:
22611 case ARM_BUILTIN_WQMIABB:
22612 case ARM_BUILTIN_WQMIABT:
22613 case ARM_BUILTIN_WQMIATB:
22614 case ARM_BUILTIN_WQMIATT:
22615 case ARM_BUILTIN_WQMIABBN:
22616 case ARM_BUILTIN_WQMIABTN:
22617 case ARM_BUILTIN_WQMIATBN:
22618 case ARM_BUILTIN_WQMIATTN:
22619 case ARM_BUILTIN_WMIABB:
22620 case ARM_BUILTIN_WMIABT:
22621 case ARM_BUILTIN_WMIATB:
22622 case ARM_BUILTIN_WMIATT:
22623 case ARM_BUILTIN_WMIABBN:
22624 case ARM_BUILTIN_WMIABTN:
22625 case ARM_BUILTIN_WMIATBN:
22626 case ARM_BUILTIN_WMIATTN:
22627 case ARM_BUILTIN_WMIAWBB:
22628 case ARM_BUILTIN_WMIAWBT:
22629 case ARM_BUILTIN_WMIAWTB:
22630 case ARM_BUILTIN_WMIAWTT:
22631 case ARM_BUILTIN_WMIAWBBN:
22632 case ARM_BUILTIN_WMIAWBTN:
22633 case ARM_BUILTIN_WMIAWTBN:
22634 case ARM_BUILTIN_WMIAWTTN:
22635 case ARM_BUILTIN_WSADB:
22636 case ARM_BUILTIN_WSADH:
22637 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
22638 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
22639 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
22640 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
22641 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
22642 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
22643 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
22644 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
22645 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
22646 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
22647 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
22648 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
22649 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
22650 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
22651 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
22652 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
22653 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
22654 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
22655 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
22656 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
22657 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
22658 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
22659 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
22660 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
22661 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
22662 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
22663 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
22664 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
22665 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
22666 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
22667 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
22668 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
22669 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
22670 : CODE_FOR_iwmmxt_wsadh);
22671 arg0 = CALL_EXPR_ARG (exp, 0);
22672 arg1 = CALL_EXPR_ARG (exp, 1);
22673 arg2 = CALL_EXPR_ARG (exp, 2);
22674 op0 = expand_normal (arg0);
22675 op1 = expand_normal (arg1);
22676 op2 = expand_normal (arg2);
22677 tmode = insn_data[icode].operand[0].mode;
22678 mode0 = insn_data[icode].operand[1].mode;
22679 mode1 = insn_data[icode].operand[2].mode;
22680 mode2 = insn_data[icode].operand[3].mode;
22681
22682 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
22683 op0 = copy_to_mode_reg (mode0, op0);
22684 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
22685 op1 = copy_to_mode_reg (mode1, op1);
22686 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
22687 op2 = copy_to_mode_reg (mode2, op2);
22688 if (target == 0
22689 || GET_MODE (target) != tmode
22690 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22691 target = gen_reg_rtx (tmode);
22692 pat = GEN_FCN (icode) (target, op0, op1, op2);
22693 if (! pat)
22694 return 0;
22695 emit_insn (pat);
22696 return target;
22697
22698 case ARM_BUILTIN_WZERO:
22699 target = gen_reg_rtx (DImode);
22700 emit_insn (gen_iwmmxt_clrdi (target));
22701 return target;
22702
22703 case ARM_BUILTIN_WSRLHI:
22704 case ARM_BUILTIN_WSRLWI:
22705 case ARM_BUILTIN_WSRLDI:
22706 case ARM_BUILTIN_WSLLHI:
22707 case ARM_BUILTIN_WSLLWI:
22708 case ARM_BUILTIN_WSLLDI:
22709 case ARM_BUILTIN_WSRAHI:
22710 case ARM_BUILTIN_WSRAWI:
22711 case ARM_BUILTIN_WSRADI:
22712 case ARM_BUILTIN_WRORHI:
22713 case ARM_BUILTIN_WRORWI:
22714 case ARM_BUILTIN_WRORDI:
22715 case ARM_BUILTIN_WSRLH:
22716 case ARM_BUILTIN_WSRLW:
22717 case ARM_BUILTIN_WSRLD:
22718 case ARM_BUILTIN_WSLLH:
22719 case ARM_BUILTIN_WSLLW:
22720 case ARM_BUILTIN_WSLLD:
22721 case ARM_BUILTIN_WSRAH:
22722 case ARM_BUILTIN_WSRAW:
22723 case ARM_BUILTIN_WSRAD:
22724 case ARM_BUILTIN_WRORH:
22725 case ARM_BUILTIN_WRORW:
22726 case ARM_BUILTIN_WRORD:
22727 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
22728 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
22729 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
22730 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
22731 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
22732 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
22733 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
22734 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
22735 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
22736 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
22737 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
22738 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
22739 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
22740 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
22741 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
22742 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
22743 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
22744 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
22745 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
22746 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
22747 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
22748 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
22749 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
22750 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
22751 : CODE_FOR_nothing);
22752 arg1 = CALL_EXPR_ARG (exp, 1);
22753 op1 = expand_normal (arg1);
22754 if (GET_MODE (op1) == VOIDmode)
22755 {
22756 imm = INTVAL (op1);
22757 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
22758 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
22759 && (imm < 0 || imm > 32))
22760 {
22761 if (fcode == ARM_BUILTIN_WRORHI)
22762 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
22763 else if (fcode == ARM_BUILTIN_WRORWI)
22764 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
22765 else if (fcode == ARM_BUILTIN_WRORH)
22766 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
22767 else
22768 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
22769 }
22770 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
22771 && (imm < 0 || imm > 64))
22772 {
22773 if (fcode == ARM_BUILTIN_WRORDI)
22774 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
22775 else
22776 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
22777 }
22778 else if (imm < 0)
22779 {
22780 if (fcode == ARM_BUILTIN_WSRLHI)
22781 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
22782 else if (fcode == ARM_BUILTIN_WSRLWI)
22783 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
22784 else if (fcode == ARM_BUILTIN_WSRLDI)
22785 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
22786 else if (fcode == ARM_BUILTIN_WSLLHI)
22787 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
22788 else if (fcode == ARM_BUILTIN_WSLLWI)
22789 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
22790 else if (fcode == ARM_BUILTIN_WSLLDI)
22791 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
22792 else if (fcode == ARM_BUILTIN_WSRAHI)
22793 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
22794 else if (fcode == ARM_BUILTIN_WSRAWI)
22795 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
22796 else if (fcode == ARM_BUILTIN_WSRADI)
22797 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
22798 else if (fcode == ARM_BUILTIN_WSRLH)
22799 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
22800 else if (fcode == ARM_BUILTIN_WSRLW)
22801 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
22802 else if (fcode == ARM_BUILTIN_WSRLD)
22803 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
22804 else if (fcode == ARM_BUILTIN_WSLLH)
22805 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
22806 else if (fcode == ARM_BUILTIN_WSLLW)
22807 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
22808 else if (fcode == ARM_BUILTIN_WSLLD)
22809 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
22810 else if (fcode == ARM_BUILTIN_WSRAH)
22811 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
22812 else if (fcode == ARM_BUILTIN_WSRAW)
22813 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
22814 else
22815 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
22816 }
22817 }
22818 return arm_expand_binop_builtin (icode, exp, target);
22819
22820 default:
22821 break;
22822 }
22823
22824 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
22825 if (d->code == (const enum arm_builtins) fcode)
22826 return arm_expand_binop_builtin (d->icode, exp, target);
22827
22828 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
22829 if (d->code == (const enum arm_builtins) fcode)
22830 return arm_expand_unop_builtin (d->icode, exp, target, 0);
22831
22832 /* @@@ Should really do something sensible here. */
22833 return NULL_RTX;
22834 }
22835 \f
22836 /* Return the number (counting from 0) of
22837 the least significant set bit in MASK. */
22838
22839 inline static int
22840 number_of_first_bit_set (unsigned mask)
22841 {
22842 return ctz_hwi (mask);
22843 }
22844
22845 /* Like emit_multi_reg_push, but allowing for a different set of
22846 registers to be described as saved. MASK is the set of registers
22847 to be saved; REAL_REGS is the set of registers to be described as
22848 saved. If REAL_REGS is 0, only describe the stack adjustment. */
22849
22850 static rtx
22851 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
22852 {
22853 unsigned long regno;
22854 rtx par[10], tmp, reg, insn;
22855 int i, j;
22856
22857 /* Build the parallel of the registers actually being stored. */
22858 for (i = 0; mask; ++i, mask &= mask - 1)
22859 {
22860 regno = ctz_hwi (mask);
22861 reg = gen_rtx_REG (SImode, regno);
22862
22863 if (i == 0)
22864 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
22865 else
22866 tmp = gen_rtx_USE (VOIDmode, reg);
22867
22868 par[i] = tmp;
22869 }
22870
22871 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
22872 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22873 tmp = gen_frame_mem (BLKmode, tmp);
22874 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
22875 par[0] = tmp;
22876
22877 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
22878 insn = emit_insn (tmp);
22879
22880 /* Always build the stack adjustment note for unwind info. */
22881 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
22882 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
22883 par[0] = tmp;
22884
22885 /* Build the parallel of the registers recorded as saved for unwind. */
22886 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
22887 {
22888 regno = ctz_hwi (real_regs);
22889 reg = gen_rtx_REG (SImode, regno);
22890
22891 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
22892 tmp = gen_frame_mem (SImode, tmp);
22893 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
22894 RTX_FRAME_RELATED_P (tmp) = 1;
22895 par[j + 1] = tmp;
22896 }
22897
22898 if (j == 0)
22899 tmp = par[0];
22900 else
22901 {
22902 RTX_FRAME_RELATED_P (par[0]) = 1;
22903 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
22904 }
22905
22906 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
22907
22908 return insn;
22909 }
22910
22911 /* Emit code to push or pop registers to or from the stack. F is the
22912 assembly file. MASK is the registers to pop. */
22913 static void
22914 thumb_pop (FILE *f, unsigned long mask)
22915 {
22916 int regno;
22917 int lo_mask = mask & 0xFF;
22918 int pushed_words = 0;
22919
22920 gcc_assert (mask);
22921
22922 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
22923 {
22924 /* Special case. Do not generate a POP PC statement here, do it in
22925 thumb_exit() */
22926 thumb_exit (f, -1);
22927 return;
22928 }
22929
22930 fprintf (f, "\tpop\t{");
22931
22932 /* Look at the low registers first. */
22933 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
22934 {
22935 if (lo_mask & 1)
22936 {
22937 asm_fprintf (f, "%r", regno);
22938
22939 if ((lo_mask & ~1) != 0)
22940 fprintf (f, ", ");
22941
22942 pushed_words++;
22943 }
22944 }
22945
22946 if (mask & (1 << PC_REGNUM))
22947 {
22948 /* Catch popping the PC. */
22949 if (TARGET_INTERWORK || TARGET_BACKTRACE
22950 || crtl->calls_eh_return)
22951 {
22952 /* The PC is never poped directly, instead
22953 it is popped into r3 and then BX is used. */
22954 fprintf (f, "}\n");
22955
22956 thumb_exit (f, -1);
22957
22958 return;
22959 }
22960 else
22961 {
22962 if (mask & 0xFF)
22963 fprintf (f, ", ");
22964
22965 asm_fprintf (f, "%r", PC_REGNUM);
22966 }
22967 }
22968
22969 fprintf (f, "}\n");
22970 }
22971
22972 /* Generate code to return from a thumb function.
22973 If 'reg_containing_return_addr' is -1, then the return address is
22974 actually on the stack, at the stack pointer. */
22975 static void
22976 thumb_exit (FILE *f, int reg_containing_return_addr)
22977 {
22978 unsigned regs_available_for_popping;
22979 unsigned regs_to_pop;
22980 int pops_needed;
22981 unsigned available;
22982 unsigned required;
22983 int mode;
22984 int size;
22985 int restore_a4 = FALSE;
22986
22987 /* Compute the registers we need to pop. */
22988 regs_to_pop = 0;
22989 pops_needed = 0;
22990
22991 if (reg_containing_return_addr == -1)
22992 {
22993 regs_to_pop |= 1 << LR_REGNUM;
22994 ++pops_needed;
22995 }
22996
22997 if (TARGET_BACKTRACE)
22998 {
22999 /* Restore the (ARM) frame pointer and stack pointer. */
23000 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23001 pops_needed += 2;
23002 }
23003
23004 /* If there is nothing to pop then just emit the BX instruction and
23005 return. */
23006 if (pops_needed == 0)
23007 {
23008 if (crtl->calls_eh_return)
23009 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23010
23011 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23012 return;
23013 }
23014 /* Otherwise if we are not supporting interworking and we have not created
23015 a backtrace structure and the function was not entered in ARM mode then
23016 just pop the return address straight into the PC. */
23017 else if (!TARGET_INTERWORK
23018 && !TARGET_BACKTRACE
23019 && !is_called_in_ARM_mode (current_function_decl)
23020 && !crtl->calls_eh_return)
23021 {
23022 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23023 return;
23024 }
23025
23026 /* Find out how many of the (return) argument registers we can corrupt. */
23027 regs_available_for_popping = 0;
23028
23029 /* If returning via __builtin_eh_return, the bottom three registers
23030 all contain information needed for the return. */
23031 if (crtl->calls_eh_return)
23032 size = 12;
23033 else
23034 {
23035 /* If we can deduce the registers used from the function's
23036 return value. This is more reliable that examining
23037 df_regs_ever_live_p () because that will be set if the register is
23038 ever used in the function, not just if the register is used
23039 to hold a return value. */
23040
23041 if (crtl->return_rtx != 0)
23042 mode = GET_MODE (crtl->return_rtx);
23043 else
23044 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23045
23046 size = GET_MODE_SIZE (mode);
23047
23048 if (size == 0)
23049 {
23050 /* In a void function we can use any argument register.
23051 In a function that returns a structure on the stack
23052 we can use the second and third argument registers. */
23053 if (mode == VOIDmode)
23054 regs_available_for_popping =
23055 (1 << ARG_REGISTER (1))
23056 | (1 << ARG_REGISTER (2))
23057 | (1 << ARG_REGISTER (3));
23058 else
23059 regs_available_for_popping =
23060 (1 << ARG_REGISTER (2))
23061 | (1 << ARG_REGISTER (3));
23062 }
23063 else if (size <= 4)
23064 regs_available_for_popping =
23065 (1 << ARG_REGISTER (2))
23066 | (1 << ARG_REGISTER (3));
23067 else if (size <= 8)
23068 regs_available_for_popping =
23069 (1 << ARG_REGISTER (3));
23070 }
23071
23072 /* Match registers to be popped with registers into which we pop them. */
23073 for (available = regs_available_for_popping,
23074 required = regs_to_pop;
23075 required != 0 && available != 0;
23076 available &= ~(available & - available),
23077 required &= ~(required & - required))
23078 -- pops_needed;
23079
23080 /* If we have any popping registers left over, remove them. */
23081 if (available > 0)
23082 regs_available_for_popping &= ~available;
23083
23084 /* Otherwise if we need another popping register we can use
23085 the fourth argument register. */
23086 else if (pops_needed)
23087 {
23088 /* If we have not found any free argument registers and
23089 reg a4 contains the return address, we must move it. */
23090 if (regs_available_for_popping == 0
23091 && reg_containing_return_addr == LAST_ARG_REGNUM)
23092 {
23093 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23094 reg_containing_return_addr = LR_REGNUM;
23095 }
23096 else if (size > 12)
23097 {
23098 /* Register a4 is being used to hold part of the return value,
23099 but we have dire need of a free, low register. */
23100 restore_a4 = TRUE;
23101
23102 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23103 }
23104
23105 if (reg_containing_return_addr != LAST_ARG_REGNUM)
23106 {
23107 /* The fourth argument register is available. */
23108 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23109
23110 --pops_needed;
23111 }
23112 }
23113
23114 /* Pop as many registers as we can. */
23115 thumb_pop (f, regs_available_for_popping);
23116
23117 /* Process the registers we popped. */
23118 if (reg_containing_return_addr == -1)
23119 {
23120 /* The return address was popped into the lowest numbered register. */
23121 regs_to_pop &= ~(1 << LR_REGNUM);
23122
23123 reg_containing_return_addr =
23124 number_of_first_bit_set (regs_available_for_popping);
23125
23126 /* Remove this register for the mask of available registers, so that
23127 the return address will not be corrupted by further pops. */
23128 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23129 }
23130
23131 /* If we popped other registers then handle them here. */
23132 if (regs_available_for_popping)
23133 {
23134 int frame_pointer;
23135
23136 /* Work out which register currently contains the frame pointer. */
23137 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23138
23139 /* Move it into the correct place. */
23140 asm_fprintf (f, "\tmov\t%r, %r\n",
23141 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23142
23143 /* (Temporarily) remove it from the mask of popped registers. */
23144 regs_available_for_popping &= ~(1 << frame_pointer);
23145 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23146
23147 if (regs_available_for_popping)
23148 {
23149 int stack_pointer;
23150
23151 /* We popped the stack pointer as well,
23152 find the register that contains it. */
23153 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23154
23155 /* Move it into the stack register. */
23156 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23157
23158 /* At this point we have popped all necessary registers, so
23159 do not worry about restoring regs_available_for_popping
23160 to its correct value:
23161
23162 assert (pops_needed == 0)
23163 assert (regs_available_for_popping == (1 << frame_pointer))
23164 assert (regs_to_pop == (1 << STACK_POINTER)) */
23165 }
23166 else
23167 {
23168 /* Since we have just move the popped value into the frame
23169 pointer, the popping register is available for reuse, and
23170 we know that we still have the stack pointer left to pop. */
23171 regs_available_for_popping |= (1 << frame_pointer);
23172 }
23173 }
23174
23175 /* If we still have registers left on the stack, but we no longer have
23176 any registers into which we can pop them, then we must move the return
23177 address into the link register and make available the register that
23178 contained it. */
23179 if (regs_available_for_popping == 0 && pops_needed > 0)
23180 {
23181 regs_available_for_popping |= 1 << reg_containing_return_addr;
23182
23183 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23184 reg_containing_return_addr);
23185
23186 reg_containing_return_addr = LR_REGNUM;
23187 }
23188
23189 /* If we have registers left on the stack then pop some more.
23190 We know that at most we will want to pop FP and SP. */
23191 if (pops_needed > 0)
23192 {
23193 int popped_into;
23194 int move_to;
23195
23196 thumb_pop (f, regs_available_for_popping);
23197
23198 /* We have popped either FP or SP.
23199 Move whichever one it is into the correct register. */
23200 popped_into = number_of_first_bit_set (regs_available_for_popping);
23201 move_to = number_of_first_bit_set (regs_to_pop);
23202
23203 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
23204
23205 regs_to_pop &= ~(1 << move_to);
23206
23207 --pops_needed;
23208 }
23209
23210 /* If we still have not popped everything then we must have only
23211 had one register available to us and we are now popping the SP. */
23212 if (pops_needed > 0)
23213 {
23214 int popped_into;
23215
23216 thumb_pop (f, regs_available_for_popping);
23217
23218 popped_into = number_of_first_bit_set (regs_available_for_popping);
23219
23220 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
23221 /*
23222 assert (regs_to_pop == (1 << STACK_POINTER))
23223 assert (pops_needed == 1)
23224 */
23225 }
23226
23227 /* If necessary restore the a4 register. */
23228 if (restore_a4)
23229 {
23230 if (reg_containing_return_addr != LR_REGNUM)
23231 {
23232 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23233 reg_containing_return_addr = LR_REGNUM;
23234 }
23235
23236 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
23237 }
23238
23239 if (crtl->calls_eh_return)
23240 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23241
23242 /* Return to caller. */
23243 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23244 }
23245 \f
23246 /* Scan INSN just before assembler is output for it.
23247 For Thumb-1, we track the status of the condition codes; this
23248 information is used in the cbranchsi4_insn pattern. */
23249 void
23250 thumb1_final_prescan_insn (rtx insn)
23251 {
23252 if (flag_print_asm_name)
23253 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
23254 INSN_ADDRESSES (INSN_UID (insn)));
23255 /* Don't overwrite the previous setter when we get to a cbranch. */
23256 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
23257 {
23258 enum attr_conds conds;
23259
23260 if (cfun->machine->thumb1_cc_insn)
23261 {
23262 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
23263 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
23264 CC_STATUS_INIT;
23265 }
23266 conds = get_attr_conds (insn);
23267 if (conds == CONDS_SET)
23268 {
23269 rtx set = single_set (insn);
23270 cfun->machine->thumb1_cc_insn = insn;
23271 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
23272 cfun->machine->thumb1_cc_op1 = const0_rtx;
23273 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
23274 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
23275 {
23276 rtx src1 = XEXP (SET_SRC (set), 1);
23277 if (src1 == const0_rtx)
23278 cfun->machine->thumb1_cc_mode = CCmode;
23279 }
23280 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
23281 {
23282 /* Record the src register operand instead of dest because
23283 cprop_hardreg pass propagates src. */
23284 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
23285 }
23286 }
23287 else if (conds != CONDS_NOCOND)
23288 cfun->machine->thumb1_cc_insn = NULL_RTX;
23289 }
23290
23291 /* Check if unexpected far jump is used. */
23292 if (cfun->machine->lr_save_eliminated
23293 && get_attr_far_jump (insn) == FAR_JUMP_YES)
23294 internal_error("Unexpected thumb1 far jump");
23295 }
23296
23297 int
23298 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
23299 {
23300 unsigned HOST_WIDE_INT mask = 0xff;
23301 int i;
23302
23303 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
23304 if (val == 0) /* XXX */
23305 return 0;
23306
23307 for (i = 0; i < 25; i++)
23308 if ((val & (mask << i)) == val)
23309 return 1;
23310
23311 return 0;
23312 }
23313
23314 /* Returns nonzero if the current function contains,
23315 or might contain a far jump. */
23316 static int
23317 thumb_far_jump_used_p (void)
23318 {
23319 rtx insn;
23320 bool far_jump = false;
23321 unsigned int func_size = 0;
23322
23323 /* This test is only important for leaf functions. */
23324 /* assert (!leaf_function_p ()); */
23325
23326 /* If we have already decided that far jumps may be used,
23327 do not bother checking again, and always return true even if
23328 it turns out that they are not being used. Once we have made
23329 the decision that far jumps are present (and that hence the link
23330 register will be pushed onto the stack) we cannot go back on it. */
23331 if (cfun->machine->far_jump_used)
23332 return 1;
23333
23334 /* If this function is not being called from the prologue/epilogue
23335 generation code then it must be being called from the
23336 INITIAL_ELIMINATION_OFFSET macro. */
23337 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
23338 {
23339 /* In this case we know that we are being asked about the elimination
23340 of the arg pointer register. If that register is not being used,
23341 then there are no arguments on the stack, and we do not have to
23342 worry that a far jump might force the prologue to push the link
23343 register, changing the stack offsets. In this case we can just
23344 return false, since the presence of far jumps in the function will
23345 not affect stack offsets.
23346
23347 If the arg pointer is live (or if it was live, but has now been
23348 eliminated and so set to dead) then we do have to test to see if
23349 the function might contain a far jump. This test can lead to some
23350 false negatives, since before reload is completed, then length of
23351 branch instructions is not known, so gcc defaults to returning their
23352 longest length, which in turn sets the far jump attribute to true.
23353
23354 A false negative will not result in bad code being generated, but it
23355 will result in a needless push and pop of the link register. We
23356 hope that this does not occur too often.
23357
23358 If we need doubleword stack alignment this could affect the other
23359 elimination offsets so we can't risk getting it wrong. */
23360 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
23361 cfun->machine->arg_pointer_live = 1;
23362 else if (!cfun->machine->arg_pointer_live)
23363 return 0;
23364 }
23365
23366 /* Check to see if the function contains a branch
23367 insn with the far jump attribute set. */
23368 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23369 {
23370 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
23371 {
23372 far_jump = true;
23373 }
23374 func_size += get_attr_length (insn);
23375 }
23376
23377 /* Attribute far_jump will always be true for thumb1 before
23378 shorten_branch pass. So checking far_jump attribute before
23379 shorten_branch isn't much useful.
23380
23381 Following heuristic tries to estimate more accurately if a far jump
23382 may finally be used. The heuristic is very conservative as there is
23383 no chance to roll-back the decision of not to use far jump.
23384
23385 Thumb1 long branch offset is -2048 to 2046. The worst case is each
23386 2-byte insn is associated with a 4 byte constant pool. Using
23387 function size 2048/3 as the threshold is conservative enough. */
23388 if (far_jump)
23389 {
23390 if ((func_size * 3) >= 2048)
23391 {
23392 /* Record the fact that we have decided that
23393 the function does use far jumps. */
23394 cfun->machine->far_jump_used = 1;
23395 return 1;
23396 }
23397 }
23398
23399 return 0;
23400 }
23401
23402 /* Return nonzero if FUNC must be entered in ARM mode. */
23403 int
23404 is_called_in_ARM_mode (tree func)
23405 {
23406 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
23407
23408 /* Ignore the problem about functions whose address is taken. */
23409 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
23410 return TRUE;
23411
23412 #ifdef ARM_PE
23413 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
23414 #else
23415 return FALSE;
23416 #endif
23417 }
23418
23419 /* Given the stack offsets and register mask in OFFSETS, decide how
23420 many additional registers to push instead of subtracting a constant
23421 from SP. For epilogues the principle is the same except we use pop.
23422 FOR_PROLOGUE indicates which we're generating. */
23423 static int
23424 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
23425 {
23426 HOST_WIDE_INT amount;
23427 unsigned long live_regs_mask = offsets->saved_regs_mask;
23428 /* Extract a mask of the ones we can give to the Thumb's push/pop
23429 instruction. */
23430 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
23431 /* Then count how many other high registers will need to be pushed. */
23432 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23433 int n_free, reg_base, size;
23434
23435 if (!for_prologue && frame_pointer_needed)
23436 amount = offsets->locals_base - offsets->saved_regs;
23437 else
23438 amount = offsets->outgoing_args - offsets->saved_regs;
23439
23440 /* If the stack frame size is 512 exactly, we can save one load
23441 instruction, which should make this a win even when optimizing
23442 for speed. */
23443 if (!optimize_size && amount != 512)
23444 return 0;
23445
23446 /* Can't do this if there are high registers to push. */
23447 if (high_regs_pushed != 0)
23448 return 0;
23449
23450 /* Shouldn't do it in the prologue if no registers would normally
23451 be pushed at all. In the epilogue, also allow it if we'll have
23452 a pop insn for the PC. */
23453 if (l_mask == 0
23454 && (for_prologue
23455 || TARGET_BACKTRACE
23456 || (live_regs_mask & 1 << LR_REGNUM) == 0
23457 || TARGET_INTERWORK
23458 || crtl->args.pretend_args_size != 0))
23459 return 0;
23460
23461 /* Don't do this if thumb_expand_prologue wants to emit instructions
23462 between the push and the stack frame allocation. */
23463 if (for_prologue
23464 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
23465 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
23466 return 0;
23467
23468 reg_base = 0;
23469 n_free = 0;
23470 if (!for_prologue)
23471 {
23472 size = arm_size_return_regs ();
23473 reg_base = ARM_NUM_INTS (size);
23474 live_regs_mask >>= reg_base;
23475 }
23476
23477 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
23478 && (for_prologue || call_used_regs[reg_base + n_free]))
23479 {
23480 live_regs_mask >>= 1;
23481 n_free++;
23482 }
23483
23484 if (n_free == 0)
23485 return 0;
23486 gcc_assert (amount / 4 * 4 == amount);
23487
23488 if (amount >= 512 && (amount - n_free * 4) < 512)
23489 return (amount - 508) / 4;
23490 if (amount <= n_free * 4)
23491 return amount / 4;
23492 return 0;
23493 }
23494
23495 /* The bits which aren't usefully expanded as rtl. */
23496 const char *
23497 thumb1_unexpanded_epilogue (void)
23498 {
23499 arm_stack_offsets *offsets;
23500 int regno;
23501 unsigned long live_regs_mask = 0;
23502 int high_regs_pushed = 0;
23503 int extra_pop;
23504 int had_to_push_lr;
23505 int size;
23506
23507 if (cfun->machine->return_used_this_function != 0)
23508 return "";
23509
23510 if (IS_NAKED (arm_current_func_type ()))
23511 return "";
23512
23513 offsets = arm_get_frame_offsets ();
23514 live_regs_mask = offsets->saved_regs_mask;
23515 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23516
23517 /* If we can deduce the registers used from the function's return value.
23518 This is more reliable that examining df_regs_ever_live_p () because that
23519 will be set if the register is ever used in the function, not just if
23520 the register is used to hold a return value. */
23521 size = arm_size_return_regs ();
23522
23523 extra_pop = thumb1_extra_regs_pushed (offsets, false);
23524 if (extra_pop > 0)
23525 {
23526 unsigned long extra_mask = (1 << extra_pop) - 1;
23527 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
23528 }
23529
23530 /* The prolog may have pushed some high registers to use as
23531 work registers. e.g. the testsuite file:
23532 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
23533 compiles to produce:
23534 push {r4, r5, r6, r7, lr}
23535 mov r7, r9
23536 mov r6, r8
23537 push {r6, r7}
23538 as part of the prolog. We have to undo that pushing here. */
23539
23540 if (high_regs_pushed)
23541 {
23542 unsigned long mask = live_regs_mask & 0xff;
23543 int next_hi_reg;
23544
23545 /* The available low registers depend on the size of the value we are
23546 returning. */
23547 if (size <= 12)
23548 mask |= 1 << 3;
23549 if (size <= 8)
23550 mask |= 1 << 2;
23551
23552 if (mask == 0)
23553 /* Oh dear! We have no low registers into which we can pop
23554 high registers! */
23555 internal_error
23556 ("no low registers available for popping high registers");
23557
23558 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
23559 if (live_regs_mask & (1 << next_hi_reg))
23560 break;
23561
23562 while (high_regs_pushed)
23563 {
23564 /* Find lo register(s) into which the high register(s) can
23565 be popped. */
23566 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
23567 {
23568 if (mask & (1 << regno))
23569 high_regs_pushed--;
23570 if (high_regs_pushed == 0)
23571 break;
23572 }
23573
23574 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
23575
23576 /* Pop the values into the low register(s). */
23577 thumb_pop (asm_out_file, mask);
23578
23579 /* Move the value(s) into the high registers. */
23580 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
23581 {
23582 if (mask & (1 << regno))
23583 {
23584 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
23585 regno);
23586
23587 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
23588 if (live_regs_mask & (1 << next_hi_reg))
23589 break;
23590 }
23591 }
23592 }
23593 live_regs_mask &= ~0x0f00;
23594 }
23595
23596 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
23597 live_regs_mask &= 0xff;
23598
23599 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
23600 {
23601 /* Pop the return address into the PC. */
23602 if (had_to_push_lr)
23603 live_regs_mask |= 1 << PC_REGNUM;
23604
23605 /* Either no argument registers were pushed or a backtrace
23606 structure was created which includes an adjusted stack
23607 pointer, so just pop everything. */
23608 if (live_regs_mask)
23609 thumb_pop (asm_out_file, live_regs_mask);
23610
23611 /* We have either just popped the return address into the
23612 PC or it is was kept in LR for the entire function.
23613 Note that thumb_pop has already called thumb_exit if the
23614 PC was in the list. */
23615 if (!had_to_push_lr)
23616 thumb_exit (asm_out_file, LR_REGNUM);
23617 }
23618 else
23619 {
23620 /* Pop everything but the return address. */
23621 if (live_regs_mask)
23622 thumb_pop (asm_out_file, live_regs_mask);
23623
23624 if (had_to_push_lr)
23625 {
23626 if (size > 12)
23627 {
23628 /* We have no free low regs, so save one. */
23629 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
23630 LAST_ARG_REGNUM);
23631 }
23632
23633 /* Get the return address into a temporary register. */
23634 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
23635
23636 if (size > 12)
23637 {
23638 /* Move the return address to lr. */
23639 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
23640 LAST_ARG_REGNUM);
23641 /* Restore the low register. */
23642 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
23643 IP_REGNUM);
23644 regno = LR_REGNUM;
23645 }
23646 else
23647 regno = LAST_ARG_REGNUM;
23648 }
23649 else
23650 regno = LR_REGNUM;
23651
23652 /* Remove the argument registers that were pushed onto the stack. */
23653 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
23654 SP_REGNUM, SP_REGNUM,
23655 crtl->args.pretend_args_size);
23656
23657 thumb_exit (asm_out_file, regno);
23658 }
23659
23660 return "";
23661 }
23662
23663 /* Functions to save and restore machine-specific function data. */
23664 static struct machine_function *
23665 arm_init_machine_status (void)
23666 {
23667 struct machine_function *machine;
23668 machine = ggc_alloc_cleared_machine_function ();
23669
23670 #if ARM_FT_UNKNOWN != 0
23671 machine->func_type = ARM_FT_UNKNOWN;
23672 #endif
23673 return machine;
23674 }
23675
23676 /* Return an RTX indicating where the return address to the
23677 calling function can be found. */
23678 rtx
23679 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
23680 {
23681 if (count != 0)
23682 return NULL_RTX;
23683
23684 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
23685 }
23686
23687 /* Do anything needed before RTL is emitted for each function. */
23688 void
23689 arm_init_expanders (void)
23690 {
23691 /* Arrange to initialize and mark the machine per-function status. */
23692 init_machine_status = arm_init_machine_status;
23693
23694 /* This is to stop the combine pass optimizing away the alignment
23695 adjustment of va_arg. */
23696 /* ??? It is claimed that this should not be necessary. */
23697 if (cfun)
23698 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
23699 }
23700
23701
23702 /* Like arm_compute_initial_elimination offset. Simpler because there
23703 isn't an ABI specified frame pointer for Thumb. Instead, we set it
23704 to point at the base of the local variables after static stack
23705 space for a function has been allocated. */
23706
23707 HOST_WIDE_INT
23708 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
23709 {
23710 arm_stack_offsets *offsets;
23711
23712 offsets = arm_get_frame_offsets ();
23713
23714 switch (from)
23715 {
23716 case ARG_POINTER_REGNUM:
23717 switch (to)
23718 {
23719 case STACK_POINTER_REGNUM:
23720 return offsets->outgoing_args - offsets->saved_args;
23721
23722 case FRAME_POINTER_REGNUM:
23723 return offsets->soft_frame - offsets->saved_args;
23724
23725 case ARM_HARD_FRAME_POINTER_REGNUM:
23726 return offsets->saved_regs - offsets->saved_args;
23727
23728 case THUMB_HARD_FRAME_POINTER_REGNUM:
23729 return offsets->locals_base - offsets->saved_args;
23730
23731 default:
23732 gcc_unreachable ();
23733 }
23734 break;
23735
23736 case FRAME_POINTER_REGNUM:
23737 switch (to)
23738 {
23739 case STACK_POINTER_REGNUM:
23740 return offsets->outgoing_args - offsets->soft_frame;
23741
23742 case ARM_HARD_FRAME_POINTER_REGNUM:
23743 return offsets->saved_regs - offsets->soft_frame;
23744
23745 case THUMB_HARD_FRAME_POINTER_REGNUM:
23746 return offsets->locals_base - offsets->soft_frame;
23747
23748 default:
23749 gcc_unreachable ();
23750 }
23751 break;
23752
23753 default:
23754 gcc_unreachable ();
23755 }
23756 }
23757
23758 /* Generate the function's prologue. */
23759
23760 void
23761 thumb1_expand_prologue (void)
23762 {
23763 rtx insn;
23764
23765 HOST_WIDE_INT amount;
23766 arm_stack_offsets *offsets;
23767 unsigned long func_type;
23768 int regno;
23769 unsigned long live_regs_mask;
23770 unsigned long l_mask;
23771 unsigned high_regs_pushed = 0;
23772
23773 func_type = arm_current_func_type ();
23774
23775 /* Naked functions don't have prologues. */
23776 if (IS_NAKED (func_type))
23777 return;
23778
23779 if (IS_INTERRUPT (func_type))
23780 {
23781 error ("interrupt Service Routines cannot be coded in Thumb mode");
23782 return;
23783 }
23784
23785 if (is_called_in_ARM_mode (current_function_decl))
23786 emit_insn (gen_prologue_thumb1_interwork ());
23787
23788 offsets = arm_get_frame_offsets ();
23789 live_regs_mask = offsets->saved_regs_mask;
23790
23791 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
23792 l_mask = live_regs_mask & 0x40ff;
23793 /* Then count how many other high registers will need to be pushed. */
23794 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23795
23796 if (crtl->args.pretend_args_size)
23797 {
23798 rtx x = GEN_INT (-crtl->args.pretend_args_size);
23799
23800 if (cfun->machine->uses_anonymous_args)
23801 {
23802 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
23803 unsigned long mask;
23804
23805 mask = 1ul << (LAST_ARG_REGNUM + 1);
23806 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
23807
23808 insn = thumb1_emit_multi_reg_push (mask, 0);
23809 }
23810 else
23811 {
23812 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
23813 stack_pointer_rtx, x));
23814 }
23815 RTX_FRAME_RELATED_P (insn) = 1;
23816 }
23817
23818 if (TARGET_BACKTRACE)
23819 {
23820 HOST_WIDE_INT offset = 0;
23821 unsigned work_register;
23822 rtx work_reg, x, arm_hfp_rtx;
23823
23824 /* We have been asked to create a stack backtrace structure.
23825 The code looks like this:
23826
23827 0 .align 2
23828 0 func:
23829 0 sub SP, #16 Reserve space for 4 registers.
23830 2 push {R7} Push low registers.
23831 4 add R7, SP, #20 Get the stack pointer before the push.
23832 6 str R7, [SP, #8] Store the stack pointer
23833 (before reserving the space).
23834 8 mov R7, PC Get hold of the start of this code + 12.
23835 10 str R7, [SP, #16] Store it.
23836 12 mov R7, FP Get hold of the current frame pointer.
23837 14 str R7, [SP, #4] Store it.
23838 16 mov R7, LR Get hold of the current return address.
23839 18 str R7, [SP, #12] Store it.
23840 20 add R7, SP, #16 Point at the start of the
23841 backtrace structure.
23842 22 mov FP, R7 Put this value into the frame pointer. */
23843
23844 work_register = thumb_find_work_register (live_regs_mask);
23845 work_reg = gen_rtx_REG (SImode, work_register);
23846 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
23847
23848 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
23849 stack_pointer_rtx, GEN_INT (-16)));
23850 RTX_FRAME_RELATED_P (insn) = 1;
23851
23852 if (l_mask)
23853 {
23854 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
23855 RTX_FRAME_RELATED_P (insn) = 1;
23856
23857 offset = bit_count (l_mask) * UNITS_PER_WORD;
23858 }
23859
23860 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
23861 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
23862
23863 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
23864 x = gen_frame_mem (SImode, x);
23865 emit_move_insn (x, work_reg);
23866
23867 /* Make sure that the instruction fetching the PC is in the right place
23868 to calculate "start of backtrace creation code + 12". */
23869 /* ??? The stores using the common WORK_REG ought to be enough to
23870 prevent the scheduler from doing anything weird. Failing that
23871 we could always move all of the following into an UNSPEC_VOLATILE. */
23872 if (l_mask)
23873 {
23874 x = gen_rtx_REG (SImode, PC_REGNUM);
23875 emit_move_insn (work_reg, x);
23876
23877 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
23878 x = gen_frame_mem (SImode, x);
23879 emit_move_insn (x, work_reg);
23880
23881 emit_move_insn (work_reg, arm_hfp_rtx);
23882
23883 x = plus_constant (Pmode, stack_pointer_rtx, offset);
23884 x = gen_frame_mem (SImode, x);
23885 emit_move_insn (x, work_reg);
23886 }
23887 else
23888 {
23889 emit_move_insn (work_reg, arm_hfp_rtx);
23890
23891 x = plus_constant (Pmode, stack_pointer_rtx, offset);
23892 x = gen_frame_mem (SImode, x);
23893 emit_move_insn (x, work_reg);
23894
23895 x = gen_rtx_REG (SImode, PC_REGNUM);
23896 emit_move_insn (work_reg, x);
23897
23898 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
23899 x = gen_frame_mem (SImode, x);
23900 emit_move_insn (x, work_reg);
23901 }
23902
23903 x = gen_rtx_REG (SImode, LR_REGNUM);
23904 emit_move_insn (work_reg, x);
23905
23906 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
23907 x = gen_frame_mem (SImode, x);
23908 emit_move_insn (x, work_reg);
23909
23910 x = GEN_INT (offset + 12);
23911 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
23912
23913 emit_move_insn (arm_hfp_rtx, work_reg);
23914 }
23915 /* Optimization: If we are not pushing any low registers but we are going
23916 to push some high registers then delay our first push. This will just
23917 be a push of LR and we can combine it with the push of the first high
23918 register. */
23919 else if ((l_mask & 0xff) != 0
23920 || (high_regs_pushed == 0 && l_mask))
23921 {
23922 unsigned long mask = l_mask;
23923 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
23924 insn = thumb1_emit_multi_reg_push (mask, mask);
23925 RTX_FRAME_RELATED_P (insn) = 1;
23926 }
23927
23928 if (high_regs_pushed)
23929 {
23930 unsigned pushable_regs;
23931 unsigned next_hi_reg;
23932 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
23933 : crtl->args.info.nregs;
23934 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
23935
23936 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
23937 if (live_regs_mask & (1 << next_hi_reg))
23938 break;
23939
23940 /* Here we need to mask out registers used for passing arguments
23941 even if they can be pushed. This is to avoid using them to stash the high
23942 registers. Such kind of stash may clobber the use of arguments. */
23943 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
23944
23945 if (pushable_regs == 0)
23946 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
23947
23948 while (high_regs_pushed > 0)
23949 {
23950 unsigned long real_regs_mask = 0;
23951
23952 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
23953 {
23954 if (pushable_regs & (1 << regno))
23955 {
23956 emit_move_insn (gen_rtx_REG (SImode, regno),
23957 gen_rtx_REG (SImode, next_hi_reg));
23958
23959 high_regs_pushed --;
23960 real_regs_mask |= (1 << next_hi_reg);
23961
23962 if (high_regs_pushed)
23963 {
23964 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
23965 next_hi_reg --)
23966 if (live_regs_mask & (1 << next_hi_reg))
23967 break;
23968 }
23969 else
23970 {
23971 pushable_regs &= ~((1 << regno) - 1);
23972 break;
23973 }
23974 }
23975 }
23976
23977 /* If we had to find a work register and we have not yet
23978 saved the LR then add it to the list of regs to push. */
23979 if (l_mask == (1 << LR_REGNUM))
23980 {
23981 pushable_regs |= l_mask;
23982 real_regs_mask |= l_mask;
23983 l_mask = 0;
23984 }
23985
23986 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
23987 RTX_FRAME_RELATED_P (insn) = 1;
23988 }
23989 }
23990
23991 /* Load the pic register before setting the frame pointer,
23992 so we can use r7 as a temporary work register. */
23993 if (flag_pic && arm_pic_register != INVALID_REGNUM)
23994 arm_load_pic_register (live_regs_mask);
23995
23996 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
23997 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
23998 stack_pointer_rtx);
23999
24000 if (flag_stack_usage_info)
24001 current_function_static_stack_size
24002 = offsets->outgoing_args - offsets->saved_args;
24003
24004 amount = offsets->outgoing_args - offsets->saved_regs;
24005 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24006 if (amount)
24007 {
24008 if (amount < 512)
24009 {
24010 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24011 GEN_INT (- amount)));
24012 RTX_FRAME_RELATED_P (insn) = 1;
24013 }
24014 else
24015 {
24016 rtx reg, dwarf;
24017
24018 /* The stack decrement is too big for an immediate value in a single
24019 insn. In theory we could issue multiple subtracts, but after
24020 three of them it becomes more space efficient to place the full
24021 value in the constant pool and load into a register. (Also the
24022 ARM debugger really likes to see only one stack decrement per
24023 function). So instead we look for a scratch register into which
24024 we can load the decrement, and then we subtract this from the
24025 stack pointer. Unfortunately on the thumb the only available
24026 scratch registers are the argument registers, and we cannot use
24027 these as they may hold arguments to the function. Instead we
24028 attempt to locate a call preserved register which is used by this
24029 function. If we can find one, then we know that it will have
24030 been pushed at the start of the prologue and so we can corrupt
24031 it now. */
24032 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24033 if (live_regs_mask & (1 << regno))
24034 break;
24035
24036 gcc_assert(regno <= LAST_LO_REGNUM);
24037
24038 reg = gen_rtx_REG (SImode, regno);
24039
24040 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24041
24042 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24043 stack_pointer_rtx, reg));
24044
24045 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
24046 plus_constant (Pmode, stack_pointer_rtx,
24047 -amount));
24048 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24049 RTX_FRAME_RELATED_P (insn) = 1;
24050 }
24051 }
24052
24053 if (frame_pointer_needed)
24054 thumb_set_frame_pointer (offsets);
24055
24056 /* If we are profiling, make sure no instructions are scheduled before
24057 the call to mcount. Similarly if the user has requested no
24058 scheduling in the prolog. Similarly if we want non-call exceptions
24059 using the EABI unwinder, to prevent faulting instructions from being
24060 swapped with a stack adjustment. */
24061 if (crtl->profile || !TARGET_SCHED_PROLOG
24062 || (arm_except_unwind_info (&global_options) == UI_TARGET
24063 && cfun->can_throw_non_call_exceptions))
24064 emit_insn (gen_blockage ());
24065
24066 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24067 if (live_regs_mask & 0xff)
24068 cfun->machine->lr_save_eliminated = 0;
24069 }
24070
24071 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24072 POP instruction can be generated. LR should be replaced by PC. All
24073 the checks required are already done by USE_RETURN_INSN (). Hence,
24074 all we really need to check here is if single register is to be
24075 returned, or multiple register return. */
24076 void
24077 thumb2_expand_return (bool simple_return)
24078 {
24079 int i, num_regs;
24080 unsigned long saved_regs_mask;
24081 arm_stack_offsets *offsets;
24082
24083 offsets = arm_get_frame_offsets ();
24084 saved_regs_mask = offsets->saved_regs_mask;
24085
24086 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
24087 if (saved_regs_mask & (1 << i))
24088 num_regs++;
24089
24090 if (!simple_return && saved_regs_mask)
24091 {
24092 if (num_regs == 1)
24093 {
24094 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24095 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
24096 rtx addr = gen_rtx_MEM (SImode,
24097 gen_rtx_POST_INC (SImode,
24098 stack_pointer_rtx));
24099 set_mem_alias_set (addr, get_frame_alias_set ());
24100 XVECEXP (par, 0, 0) = ret_rtx;
24101 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
24102 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
24103 emit_jump_insn (par);
24104 }
24105 else
24106 {
24107 saved_regs_mask &= ~ (1 << LR_REGNUM);
24108 saved_regs_mask |= (1 << PC_REGNUM);
24109 arm_emit_multi_reg_pop (saved_regs_mask);
24110 }
24111 }
24112 else
24113 {
24114 emit_jump_insn (simple_return_rtx);
24115 }
24116 }
24117
24118 void
24119 thumb1_expand_epilogue (void)
24120 {
24121 HOST_WIDE_INT amount;
24122 arm_stack_offsets *offsets;
24123 int regno;
24124
24125 /* Naked functions don't have prologues. */
24126 if (IS_NAKED (arm_current_func_type ()))
24127 return;
24128
24129 offsets = arm_get_frame_offsets ();
24130 amount = offsets->outgoing_args - offsets->saved_regs;
24131
24132 if (frame_pointer_needed)
24133 {
24134 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
24135 amount = offsets->locals_base - offsets->saved_regs;
24136 }
24137 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
24138
24139 gcc_assert (amount >= 0);
24140 if (amount)
24141 {
24142 emit_insn (gen_blockage ());
24143
24144 if (amount < 512)
24145 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24146 GEN_INT (amount)));
24147 else
24148 {
24149 /* r3 is always free in the epilogue. */
24150 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
24151
24152 emit_insn (gen_movsi (reg, GEN_INT (amount)));
24153 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
24154 }
24155 }
24156
24157 /* Emit a USE (stack_pointer_rtx), so that
24158 the stack adjustment will not be deleted. */
24159 emit_insn (gen_force_register_use (stack_pointer_rtx));
24160
24161 if (crtl->profile || !TARGET_SCHED_PROLOG)
24162 emit_insn (gen_blockage ());
24163
24164 /* Emit a clobber for each insn that will be restored in the epilogue,
24165 so that flow2 will get register lifetimes correct. */
24166 for (regno = 0; regno < 13; regno++)
24167 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
24168 emit_clobber (gen_rtx_REG (SImode, regno));
24169
24170 if (! df_regs_ever_live_p (LR_REGNUM))
24171 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
24172 }
24173
24174 /* Epilogue code for APCS frame. */
24175 static void
24176 arm_expand_epilogue_apcs_frame (bool really_return)
24177 {
24178 unsigned long func_type;
24179 unsigned long saved_regs_mask;
24180 int num_regs = 0;
24181 int i;
24182 int floats_from_frame = 0;
24183 arm_stack_offsets *offsets;
24184
24185 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
24186 func_type = arm_current_func_type ();
24187
24188 /* Get frame offsets for ARM. */
24189 offsets = arm_get_frame_offsets ();
24190 saved_regs_mask = offsets->saved_regs_mask;
24191
24192 /* Find the offset of the floating-point save area in the frame. */
24193 floats_from_frame = offsets->saved_args - offsets->frame;
24194
24195 /* Compute how many core registers saved and how far away the floats are. */
24196 for (i = 0; i <= LAST_ARM_REGNUM; i++)
24197 if (saved_regs_mask & (1 << i))
24198 {
24199 num_regs++;
24200 floats_from_frame += 4;
24201 }
24202
24203 if (TARGET_HARD_FLOAT && TARGET_VFP)
24204 {
24205 int start_reg;
24206
24207 /* The offset is from IP_REGNUM. */
24208 int saved_size = arm_get_vfp_saved_size ();
24209 if (saved_size > 0)
24210 {
24211 floats_from_frame += saved_size;
24212 emit_insn (gen_addsi3 (gen_rtx_REG (SImode, IP_REGNUM),
24213 hard_frame_pointer_rtx,
24214 GEN_INT (-floats_from_frame)));
24215 }
24216
24217 /* Generate VFP register multi-pop. */
24218 start_reg = FIRST_VFP_REGNUM;
24219
24220 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
24221 /* Look for a case where a reg does not need restoring. */
24222 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24223 && (!df_regs_ever_live_p (i + 1)
24224 || call_used_regs[i + 1]))
24225 {
24226 if (start_reg != i)
24227 arm_emit_vfp_multi_reg_pop (start_reg,
24228 (i - start_reg) / 2,
24229 gen_rtx_REG (SImode,
24230 IP_REGNUM));
24231 start_reg = i + 2;
24232 }
24233
24234 /* Restore the remaining regs that we have discovered (or possibly
24235 even all of them, if the conditional in the for loop never
24236 fired). */
24237 if (start_reg != i)
24238 arm_emit_vfp_multi_reg_pop (start_reg,
24239 (i - start_reg) / 2,
24240 gen_rtx_REG (SImode, IP_REGNUM));
24241 }
24242
24243 if (TARGET_IWMMXT)
24244 {
24245 /* The frame pointer is guaranteed to be non-double-word aligned, as
24246 it is set to double-word-aligned old_stack_pointer - 4. */
24247 rtx insn;
24248 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
24249
24250 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
24251 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24252 {
24253 rtx addr = gen_frame_mem (V2SImode,
24254 plus_constant (Pmode, hard_frame_pointer_rtx,
24255 - lrm_count * 4));
24256 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24257 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24258 gen_rtx_REG (V2SImode, i),
24259 NULL_RTX);
24260 lrm_count += 2;
24261 }
24262 }
24263
24264 /* saved_regs_mask should contain IP which contains old stack pointer
24265 at the time of activation creation. Since SP and IP are adjacent registers,
24266 we can restore the value directly into SP. */
24267 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
24268 saved_regs_mask &= ~(1 << IP_REGNUM);
24269 saved_regs_mask |= (1 << SP_REGNUM);
24270
24271 /* There are two registers left in saved_regs_mask - LR and PC. We
24272 only need to restore LR (the return address), but to
24273 save time we can load it directly into PC, unless we need a
24274 special function exit sequence, or we are not really returning. */
24275 if (really_return
24276 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
24277 && !crtl->calls_eh_return)
24278 /* Delete LR from the register mask, so that LR on
24279 the stack is loaded into the PC in the register mask. */
24280 saved_regs_mask &= ~(1 << LR_REGNUM);
24281 else
24282 saved_regs_mask &= ~(1 << PC_REGNUM);
24283
24284 num_regs = bit_count (saved_regs_mask);
24285 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
24286 {
24287 /* Unwind the stack to just below the saved registers. */
24288 emit_insn (gen_addsi3 (stack_pointer_rtx,
24289 hard_frame_pointer_rtx,
24290 GEN_INT (- 4 * num_regs)));
24291 }
24292
24293 arm_emit_multi_reg_pop (saved_regs_mask);
24294
24295 if (IS_INTERRUPT (func_type))
24296 {
24297 /* Interrupt handlers will have pushed the
24298 IP onto the stack, so restore it now. */
24299 rtx insn;
24300 rtx addr = gen_rtx_MEM (SImode,
24301 gen_rtx_POST_INC (SImode,
24302 stack_pointer_rtx));
24303 set_mem_alias_set (addr, get_frame_alias_set ());
24304 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
24305 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24306 gen_rtx_REG (SImode, IP_REGNUM),
24307 NULL_RTX);
24308 }
24309
24310 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
24311 return;
24312
24313 if (crtl->calls_eh_return)
24314 emit_insn (gen_addsi3 (stack_pointer_rtx,
24315 stack_pointer_rtx,
24316 GEN_INT (ARM_EH_STACKADJ_REGNUM)));
24317
24318 if (IS_STACKALIGN (func_type))
24319 /* Restore the original stack pointer. Before prologue, the stack was
24320 realigned and the original stack pointer saved in r0. For details,
24321 see comment in arm_expand_prologue. */
24322 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
24323
24324 emit_jump_insn (simple_return_rtx);
24325 }
24326
24327 /* Generate RTL to represent ARM epilogue. Really_return is true if the
24328 function is not a sibcall. */
24329 void
24330 arm_expand_epilogue (bool really_return)
24331 {
24332 unsigned long func_type;
24333 unsigned long saved_regs_mask;
24334 int num_regs = 0;
24335 int i;
24336 int amount;
24337 arm_stack_offsets *offsets;
24338
24339 func_type = arm_current_func_type ();
24340
24341 /* Naked functions don't have epilogue. Hence, generate return pattern, and
24342 let output_return_instruction take care of instruction emition if any. */
24343 if (IS_NAKED (func_type)
24344 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
24345 {
24346 if (really_return)
24347 emit_jump_insn (simple_return_rtx);
24348 return;
24349 }
24350
24351 /* If we are throwing an exception, then we really must be doing a
24352 return, so we can't tail-call. */
24353 gcc_assert (!crtl->calls_eh_return || really_return);
24354
24355 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
24356 {
24357 arm_expand_epilogue_apcs_frame (really_return);
24358 return;
24359 }
24360
24361 /* Get frame offsets for ARM. */
24362 offsets = arm_get_frame_offsets ();
24363 saved_regs_mask = offsets->saved_regs_mask;
24364 num_regs = bit_count (saved_regs_mask);
24365
24366 if (frame_pointer_needed)
24367 {
24368 rtx insn;
24369 /* Restore stack pointer if necessary. */
24370 if (TARGET_ARM)
24371 {
24372 /* In ARM mode, frame pointer points to first saved register.
24373 Restore stack pointer to last saved register. */
24374 amount = offsets->frame - offsets->saved_regs;
24375
24376 /* Force out any pending memory operations that reference stacked data
24377 before stack de-allocation occurs. */
24378 emit_insn (gen_blockage ());
24379 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24380 hard_frame_pointer_rtx,
24381 GEN_INT (amount)));
24382 arm_add_cfa_adjust_cfa_note (insn, amount,
24383 stack_pointer_rtx,
24384 hard_frame_pointer_rtx);
24385
24386 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24387 deleted. */
24388 emit_insn (gen_force_register_use (stack_pointer_rtx));
24389 }
24390 else
24391 {
24392 /* In Thumb-2 mode, the frame pointer points to the last saved
24393 register. */
24394 amount = offsets->locals_base - offsets->saved_regs;
24395 if (amount)
24396 {
24397 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
24398 hard_frame_pointer_rtx,
24399 GEN_INT (amount)));
24400 arm_add_cfa_adjust_cfa_note (insn, amount,
24401 hard_frame_pointer_rtx,
24402 hard_frame_pointer_rtx);
24403 }
24404
24405 /* Force out any pending memory operations that reference stacked data
24406 before stack de-allocation occurs. */
24407 emit_insn (gen_blockage ());
24408 insn = emit_insn (gen_movsi (stack_pointer_rtx,
24409 hard_frame_pointer_rtx));
24410 arm_add_cfa_adjust_cfa_note (insn, 0,
24411 stack_pointer_rtx,
24412 hard_frame_pointer_rtx);
24413 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24414 deleted. */
24415 emit_insn (gen_force_register_use (stack_pointer_rtx));
24416 }
24417 }
24418 else
24419 {
24420 /* Pop off outgoing args and local frame to adjust stack pointer to
24421 last saved register. */
24422 amount = offsets->outgoing_args - offsets->saved_regs;
24423 if (amount)
24424 {
24425 rtx tmp;
24426 /* Force out any pending memory operations that reference stacked data
24427 before stack de-allocation occurs. */
24428 emit_insn (gen_blockage ());
24429 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
24430 stack_pointer_rtx,
24431 GEN_INT (amount)));
24432 arm_add_cfa_adjust_cfa_note (tmp, amount,
24433 stack_pointer_rtx, stack_pointer_rtx);
24434 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
24435 not deleted. */
24436 emit_insn (gen_force_register_use (stack_pointer_rtx));
24437 }
24438 }
24439
24440 if (TARGET_HARD_FLOAT && TARGET_VFP)
24441 {
24442 /* Generate VFP register multi-pop. */
24443 int end_reg = LAST_VFP_REGNUM + 1;
24444
24445 /* Scan the registers in reverse order. We need to match
24446 any groupings made in the prologue and generate matching
24447 vldm operations. The need to match groups is because,
24448 unlike pop, vldm can only do consecutive regs. */
24449 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
24450 /* Look for a case where a reg does not need restoring. */
24451 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24452 && (!df_regs_ever_live_p (i + 1)
24453 || call_used_regs[i + 1]))
24454 {
24455 /* Restore the regs discovered so far (from reg+2 to
24456 end_reg). */
24457 if (end_reg > i + 2)
24458 arm_emit_vfp_multi_reg_pop (i + 2,
24459 (end_reg - (i + 2)) / 2,
24460 stack_pointer_rtx);
24461 end_reg = i;
24462 }
24463
24464 /* Restore the remaining regs that we have discovered (or possibly
24465 even all of them, if the conditional in the for loop never
24466 fired). */
24467 if (end_reg > i + 2)
24468 arm_emit_vfp_multi_reg_pop (i + 2,
24469 (end_reg - (i + 2)) / 2,
24470 stack_pointer_rtx);
24471 }
24472
24473 if (TARGET_IWMMXT)
24474 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
24475 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24476 {
24477 rtx insn;
24478 rtx addr = gen_rtx_MEM (V2SImode,
24479 gen_rtx_POST_INC (SImode,
24480 stack_pointer_rtx));
24481 set_mem_alias_set (addr, get_frame_alias_set ());
24482 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24483 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24484 gen_rtx_REG (V2SImode, i),
24485 NULL_RTX);
24486 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
24487 stack_pointer_rtx, stack_pointer_rtx);
24488 }
24489
24490 if (saved_regs_mask)
24491 {
24492 rtx insn;
24493 bool return_in_pc = false;
24494
24495 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
24496 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
24497 && !IS_STACKALIGN (func_type)
24498 && really_return
24499 && crtl->args.pretend_args_size == 0
24500 && saved_regs_mask & (1 << LR_REGNUM)
24501 && !crtl->calls_eh_return)
24502 {
24503 saved_regs_mask &= ~(1 << LR_REGNUM);
24504 saved_regs_mask |= (1 << PC_REGNUM);
24505 return_in_pc = true;
24506 }
24507
24508 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
24509 {
24510 for (i = 0; i <= LAST_ARM_REGNUM; i++)
24511 if (saved_regs_mask & (1 << i))
24512 {
24513 rtx addr = gen_rtx_MEM (SImode,
24514 gen_rtx_POST_INC (SImode,
24515 stack_pointer_rtx));
24516 set_mem_alias_set (addr, get_frame_alias_set ());
24517
24518 if (i == PC_REGNUM)
24519 {
24520 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24521 XVECEXP (insn, 0, 0) = ret_rtx;
24522 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
24523 gen_rtx_REG (SImode, i),
24524 addr);
24525 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
24526 insn = emit_jump_insn (insn);
24527 }
24528 else
24529 {
24530 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
24531 addr));
24532 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24533 gen_rtx_REG (SImode, i),
24534 NULL_RTX);
24535 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
24536 stack_pointer_rtx,
24537 stack_pointer_rtx);
24538 }
24539 }
24540 }
24541 else
24542 {
24543 if (current_tune->prefer_ldrd_strd
24544 && !optimize_function_for_size_p (cfun))
24545 {
24546 if (TARGET_THUMB2)
24547 thumb2_emit_ldrd_pop (saved_regs_mask);
24548 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
24549 arm_emit_ldrd_pop (saved_regs_mask);
24550 else
24551 arm_emit_multi_reg_pop (saved_regs_mask);
24552 }
24553 else
24554 arm_emit_multi_reg_pop (saved_regs_mask);
24555 }
24556
24557 if (return_in_pc == true)
24558 return;
24559 }
24560
24561 if (crtl->args.pretend_args_size)
24562 {
24563 int i, j;
24564 rtx dwarf = NULL_RTX;
24565 rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
24566 stack_pointer_rtx,
24567 GEN_INT (crtl->args.pretend_args_size)));
24568
24569 RTX_FRAME_RELATED_P (tmp) = 1;
24570
24571 if (cfun->machine->uses_anonymous_args)
24572 {
24573 /* Restore pretend args. Refer arm_expand_prologue on how to save
24574 pretend_args in stack. */
24575 int num_regs = crtl->args.pretend_args_size / 4;
24576 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
24577 for (j = 0, i = 0; j < num_regs; i++)
24578 if (saved_regs_mask & (1 << i))
24579 {
24580 rtx reg = gen_rtx_REG (SImode, i);
24581 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
24582 j++;
24583 }
24584 REG_NOTES (tmp) = dwarf;
24585 }
24586 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
24587 stack_pointer_rtx, stack_pointer_rtx);
24588 }
24589
24590 if (!really_return)
24591 return;
24592
24593 if (crtl->calls_eh_return)
24594 emit_insn (gen_addsi3 (stack_pointer_rtx,
24595 stack_pointer_rtx,
24596 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
24597
24598 if (IS_STACKALIGN (func_type))
24599 /* Restore the original stack pointer. Before prologue, the stack was
24600 realigned and the original stack pointer saved in r0. For details,
24601 see comment in arm_expand_prologue. */
24602 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
24603
24604 emit_jump_insn (simple_return_rtx);
24605 }
24606
24607 /* Implementation of insn prologue_thumb1_interwork. This is the first
24608 "instruction" of a function called in ARM mode. Swap to thumb mode. */
24609
24610 const char *
24611 thumb1_output_interwork (void)
24612 {
24613 const char * name;
24614 FILE *f = asm_out_file;
24615
24616 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
24617 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
24618 == SYMBOL_REF);
24619 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
24620
24621 /* Generate code sequence to switch us into Thumb mode. */
24622 /* The .code 32 directive has already been emitted by
24623 ASM_DECLARE_FUNCTION_NAME. */
24624 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
24625 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
24626
24627 /* Generate a label, so that the debugger will notice the
24628 change in instruction sets. This label is also used by
24629 the assembler to bypass the ARM code when this function
24630 is called from a Thumb encoded function elsewhere in the
24631 same file. Hence the definition of STUB_NAME here must
24632 agree with the definition in gas/config/tc-arm.c. */
24633
24634 #define STUB_NAME ".real_start_of"
24635
24636 fprintf (f, "\t.code\t16\n");
24637 #ifdef ARM_PE
24638 if (arm_dllexport_name_p (name))
24639 name = arm_strip_name_encoding (name);
24640 #endif
24641 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
24642 fprintf (f, "\t.thumb_func\n");
24643 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
24644
24645 return "";
24646 }
24647
24648 /* Handle the case of a double word load into a low register from
24649 a computed memory address. The computed address may involve a
24650 register which is overwritten by the load. */
24651 const char *
24652 thumb_load_double_from_address (rtx *operands)
24653 {
24654 rtx addr;
24655 rtx base;
24656 rtx offset;
24657 rtx arg1;
24658 rtx arg2;
24659
24660 gcc_assert (REG_P (operands[0]));
24661 gcc_assert (MEM_P (operands[1]));
24662
24663 /* Get the memory address. */
24664 addr = XEXP (operands[1], 0);
24665
24666 /* Work out how the memory address is computed. */
24667 switch (GET_CODE (addr))
24668 {
24669 case REG:
24670 operands[2] = adjust_address (operands[1], SImode, 4);
24671
24672 if (REGNO (operands[0]) == REGNO (addr))
24673 {
24674 output_asm_insn ("ldr\t%H0, %2", operands);
24675 output_asm_insn ("ldr\t%0, %1", operands);
24676 }
24677 else
24678 {
24679 output_asm_insn ("ldr\t%0, %1", operands);
24680 output_asm_insn ("ldr\t%H0, %2", operands);
24681 }
24682 break;
24683
24684 case CONST:
24685 /* Compute <address> + 4 for the high order load. */
24686 operands[2] = adjust_address (operands[1], SImode, 4);
24687
24688 output_asm_insn ("ldr\t%0, %1", operands);
24689 output_asm_insn ("ldr\t%H0, %2", operands);
24690 break;
24691
24692 case PLUS:
24693 arg1 = XEXP (addr, 0);
24694 arg2 = XEXP (addr, 1);
24695
24696 if (CONSTANT_P (arg1))
24697 base = arg2, offset = arg1;
24698 else
24699 base = arg1, offset = arg2;
24700
24701 gcc_assert (REG_P (base));
24702
24703 /* Catch the case of <address> = <reg> + <reg> */
24704 if (REG_P (offset))
24705 {
24706 int reg_offset = REGNO (offset);
24707 int reg_base = REGNO (base);
24708 int reg_dest = REGNO (operands[0]);
24709
24710 /* Add the base and offset registers together into the
24711 higher destination register. */
24712 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
24713 reg_dest + 1, reg_base, reg_offset);
24714
24715 /* Load the lower destination register from the address in
24716 the higher destination register. */
24717 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
24718 reg_dest, reg_dest + 1);
24719
24720 /* Load the higher destination register from its own address
24721 plus 4. */
24722 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
24723 reg_dest + 1, reg_dest + 1);
24724 }
24725 else
24726 {
24727 /* Compute <address> + 4 for the high order load. */
24728 operands[2] = adjust_address (operands[1], SImode, 4);
24729
24730 /* If the computed address is held in the low order register
24731 then load the high order register first, otherwise always
24732 load the low order register first. */
24733 if (REGNO (operands[0]) == REGNO (base))
24734 {
24735 output_asm_insn ("ldr\t%H0, %2", operands);
24736 output_asm_insn ("ldr\t%0, %1", operands);
24737 }
24738 else
24739 {
24740 output_asm_insn ("ldr\t%0, %1", operands);
24741 output_asm_insn ("ldr\t%H0, %2", operands);
24742 }
24743 }
24744 break;
24745
24746 case LABEL_REF:
24747 /* With no registers to worry about we can just load the value
24748 directly. */
24749 operands[2] = adjust_address (operands[1], SImode, 4);
24750
24751 output_asm_insn ("ldr\t%H0, %2", operands);
24752 output_asm_insn ("ldr\t%0, %1", operands);
24753 break;
24754
24755 default:
24756 gcc_unreachable ();
24757 }
24758
24759 return "";
24760 }
24761
24762 const char *
24763 thumb_output_move_mem_multiple (int n, rtx *operands)
24764 {
24765 rtx tmp;
24766
24767 switch (n)
24768 {
24769 case 2:
24770 if (REGNO (operands[4]) > REGNO (operands[5]))
24771 {
24772 tmp = operands[4];
24773 operands[4] = operands[5];
24774 operands[5] = tmp;
24775 }
24776 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
24777 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
24778 break;
24779
24780 case 3:
24781 if (REGNO (operands[4]) > REGNO (operands[5]))
24782 {
24783 tmp = operands[4];
24784 operands[4] = operands[5];
24785 operands[5] = tmp;
24786 }
24787 if (REGNO (operands[5]) > REGNO (operands[6]))
24788 {
24789 tmp = operands[5];
24790 operands[5] = operands[6];
24791 operands[6] = tmp;
24792 }
24793 if (REGNO (operands[4]) > REGNO (operands[5]))
24794 {
24795 tmp = operands[4];
24796 operands[4] = operands[5];
24797 operands[5] = tmp;
24798 }
24799
24800 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
24801 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
24802 break;
24803
24804 default:
24805 gcc_unreachable ();
24806 }
24807
24808 return "";
24809 }
24810
24811 /* Output a call-via instruction for thumb state. */
24812 const char *
24813 thumb_call_via_reg (rtx reg)
24814 {
24815 int regno = REGNO (reg);
24816 rtx *labelp;
24817
24818 gcc_assert (regno < LR_REGNUM);
24819
24820 /* If we are in the normal text section we can use a single instance
24821 per compilation unit. If we are doing function sections, then we need
24822 an entry per section, since we can't rely on reachability. */
24823 if (in_section == text_section)
24824 {
24825 thumb_call_reg_needed = 1;
24826
24827 if (thumb_call_via_label[regno] == NULL)
24828 thumb_call_via_label[regno] = gen_label_rtx ();
24829 labelp = thumb_call_via_label + regno;
24830 }
24831 else
24832 {
24833 if (cfun->machine->call_via[regno] == NULL)
24834 cfun->machine->call_via[regno] = gen_label_rtx ();
24835 labelp = cfun->machine->call_via + regno;
24836 }
24837
24838 output_asm_insn ("bl\t%a0", labelp);
24839 return "";
24840 }
24841
24842 /* Routines for generating rtl. */
24843 void
24844 thumb_expand_movmemqi (rtx *operands)
24845 {
24846 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
24847 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
24848 HOST_WIDE_INT len = INTVAL (operands[2]);
24849 HOST_WIDE_INT offset = 0;
24850
24851 while (len >= 12)
24852 {
24853 emit_insn (gen_movmem12b (out, in, out, in));
24854 len -= 12;
24855 }
24856
24857 if (len >= 8)
24858 {
24859 emit_insn (gen_movmem8b (out, in, out, in));
24860 len -= 8;
24861 }
24862
24863 if (len >= 4)
24864 {
24865 rtx reg = gen_reg_rtx (SImode);
24866 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
24867 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
24868 len -= 4;
24869 offset += 4;
24870 }
24871
24872 if (len >= 2)
24873 {
24874 rtx reg = gen_reg_rtx (HImode);
24875 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
24876 plus_constant (Pmode, in,
24877 offset))));
24878 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
24879 offset)),
24880 reg));
24881 len -= 2;
24882 offset += 2;
24883 }
24884
24885 if (len)
24886 {
24887 rtx reg = gen_reg_rtx (QImode);
24888 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
24889 plus_constant (Pmode, in,
24890 offset))));
24891 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
24892 offset)),
24893 reg));
24894 }
24895 }
24896
24897 void
24898 thumb_reload_out_hi (rtx *operands)
24899 {
24900 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
24901 }
24902
24903 /* Handle reading a half-word from memory during reload. */
24904 void
24905 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
24906 {
24907 gcc_unreachable ();
24908 }
24909
24910 /* Return the length of a function name prefix
24911 that starts with the character 'c'. */
24912 static int
24913 arm_get_strip_length (int c)
24914 {
24915 switch (c)
24916 {
24917 ARM_NAME_ENCODING_LENGTHS
24918 default: return 0;
24919 }
24920 }
24921
24922 /* Return a pointer to a function's name with any
24923 and all prefix encodings stripped from it. */
24924 const char *
24925 arm_strip_name_encoding (const char *name)
24926 {
24927 int skip;
24928
24929 while ((skip = arm_get_strip_length (* name)))
24930 name += skip;
24931
24932 return name;
24933 }
24934
24935 /* If there is a '*' anywhere in the name's prefix, then
24936 emit the stripped name verbatim, otherwise prepend an
24937 underscore if leading underscores are being used. */
24938 void
24939 arm_asm_output_labelref (FILE *stream, const char *name)
24940 {
24941 int skip;
24942 int verbatim = 0;
24943
24944 while ((skip = arm_get_strip_length (* name)))
24945 {
24946 verbatim |= (*name == '*');
24947 name += skip;
24948 }
24949
24950 if (verbatim)
24951 fputs (name, stream);
24952 else
24953 asm_fprintf (stream, "%U%s", name);
24954 }
24955
24956 /* This function is used to emit an EABI tag and its associated value.
24957 We emit the numerical value of the tag in case the assembler does not
24958 support textual tags. (Eg gas prior to 2.20). If requested we include
24959 the tag name in a comment so that anyone reading the assembler output
24960 will know which tag is being set.
24961
24962 This function is not static because arm-c.c needs it too. */
24963
24964 void
24965 arm_emit_eabi_attribute (const char *name, int num, int val)
24966 {
24967 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
24968 if (flag_verbose_asm || flag_debug_asm)
24969 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
24970 asm_fprintf (asm_out_file, "\n");
24971 }
24972
24973 static void
24974 arm_file_start (void)
24975 {
24976 int val;
24977
24978 if (TARGET_UNIFIED_ASM)
24979 asm_fprintf (asm_out_file, "\t.syntax unified\n");
24980
24981 if (TARGET_BPABI)
24982 {
24983 const char *fpu_name;
24984 if (arm_selected_arch)
24985 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
24986 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
24987 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
24988 else
24989 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
24990
24991 if (TARGET_SOFT_FLOAT)
24992 {
24993 fpu_name = "softvfp";
24994 }
24995 else
24996 {
24997 fpu_name = arm_fpu_desc->name;
24998 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
24999 {
25000 if (TARGET_HARD_FLOAT)
25001 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
25002 if (TARGET_HARD_FLOAT_ABI)
25003 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25004 }
25005 }
25006 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
25007
25008 /* Some of these attributes only apply when the corresponding features
25009 are used. However we don't have any easy way of figuring this out.
25010 Conservatively record the setting that would have been used. */
25011
25012 if (flag_rounding_math)
25013 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25014
25015 if (!flag_unsafe_math_optimizations)
25016 {
25017 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25018 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25019 }
25020 if (flag_signaling_nans)
25021 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25022
25023 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25024 flag_finite_math_only ? 1 : 3);
25025
25026 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25027 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25028 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25029 flag_short_enums ? 1 : 2);
25030
25031 /* Tag_ABI_optimization_goals. */
25032 if (optimize_size)
25033 val = 4;
25034 else if (optimize >= 2)
25035 val = 2;
25036 else if (optimize)
25037 val = 1;
25038 else
25039 val = 6;
25040 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
25041
25042 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25043 unaligned_access);
25044
25045 if (arm_fp16_format)
25046 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25047 (int) arm_fp16_format);
25048
25049 if (arm_lang_output_object_attributes_hook)
25050 arm_lang_output_object_attributes_hook();
25051 }
25052
25053 default_file_start ();
25054 }
25055
25056 static void
25057 arm_file_end (void)
25058 {
25059 int regno;
25060
25061 if (NEED_INDICATE_EXEC_STACK)
25062 /* Add .note.GNU-stack. */
25063 file_end_indicate_exec_stack ();
25064
25065 if (! thumb_call_reg_needed)
25066 return;
25067
25068 switch_to_section (text_section);
25069 asm_fprintf (asm_out_file, "\t.code 16\n");
25070 ASM_OUTPUT_ALIGN (asm_out_file, 1);
25071
25072 for (regno = 0; regno < LR_REGNUM; regno++)
25073 {
25074 rtx label = thumb_call_via_label[regno];
25075
25076 if (label != 0)
25077 {
25078 targetm.asm_out.internal_label (asm_out_file, "L",
25079 CODE_LABEL_NUMBER (label));
25080 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
25081 }
25082 }
25083 }
25084
25085 #ifndef ARM_PE
25086 /* Symbols in the text segment can be accessed without indirecting via the
25087 constant pool; it may take an extra binary operation, but this is still
25088 faster than indirecting via memory. Don't do this when not optimizing,
25089 since we won't be calculating al of the offsets necessary to do this
25090 simplification. */
25091
25092 static void
25093 arm_encode_section_info (tree decl, rtx rtl, int first)
25094 {
25095 if (optimize > 0 && TREE_CONSTANT (decl))
25096 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
25097
25098 default_encode_section_info (decl, rtl, first);
25099 }
25100 #endif /* !ARM_PE */
25101
25102 static void
25103 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
25104 {
25105 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
25106 && !strcmp (prefix, "L"))
25107 {
25108 arm_ccfsm_state = 0;
25109 arm_target_insn = NULL;
25110 }
25111 default_internal_label (stream, prefix, labelno);
25112 }
25113
25114 /* Output code to add DELTA to the first argument, and then jump
25115 to FUNCTION. Used for C++ multiple inheritance. */
25116 static void
25117 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
25118 HOST_WIDE_INT delta,
25119 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
25120 tree function)
25121 {
25122 static int thunk_label = 0;
25123 char label[256];
25124 char labelpc[256];
25125 int mi_delta = delta;
25126 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
25127 int shift = 0;
25128 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
25129 ? 1 : 0);
25130 if (mi_delta < 0)
25131 mi_delta = - mi_delta;
25132
25133 final_start_function (emit_barrier (), file, 1);
25134
25135 if (TARGET_THUMB1)
25136 {
25137 int labelno = thunk_label++;
25138 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
25139 /* Thunks are entered in arm mode when avaiable. */
25140 if (TARGET_THUMB1_ONLY)
25141 {
25142 /* push r3 so we can use it as a temporary. */
25143 /* TODO: Omit this save if r3 is not used. */
25144 fputs ("\tpush {r3}\n", file);
25145 fputs ("\tldr\tr3, ", file);
25146 }
25147 else
25148 {
25149 fputs ("\tldr\tr12, ", file);
25150 }
25151 assemble_name (file, label);
25152 fputc ('\n', file);
25153 if (flag_pic)
25154 {
25155 /* If we are generating PIC, the ldr instruction below loads
25156 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25157 the address of the add + 8, so we have:
25158
25159 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25160 = target + 1.
25161
25162 Note that we have "+ 1" because some versions of GNU ld
25163 don't set the low bit of the result for R_ARM_REL32
25164 relocations against thumb function symbols.
25165 On ARMv6M this is +4, not +8. */
25166 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
25167 assemble_name (file, labelpc);
25168 fputs (":\n", file);
25169 if (TARGET_THUMB1_ONLY)
25170 {
25171 /* This is 2 insns after the start of the thunk, so we know it
25172 is 4-byte aligned. */
25173 fputs ("\tadd\tr3, pc, r3\n", file);
25174 fputs ("\tmov r12, r3\n", file);
25175 }
25176 else
25177 fputs ("\tadd\tr12, pc, r12\n", file);
25178 }
25179 else if (TARGET_THUMB1_ONLY)
25180 fputs ("\tmov r12, r3\n", file);
25181 }
25182 if (TARGET_THUMB1_ONLY)
25183 {
25184 if (mi_delta > 255)
25185 {
25186 fputs ("\tldr\tr3, ", file);
25187 assemble_name (file, label);
25188 fputs ("+4\n", file);
25189 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
25190 mi_op, this_regno, this_regno);
25191 }
25192 else if (mi_delta != 0)
25193 {
25194 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25195 mi_op, this_regno, this_regno,
25196 mi_delta);
25197 }
25198 }
25199 else
25200 {
25201 /* TODO: Use movw/movt for large constants when available. */
25202 while (mi_delta != 0)
25203 {
25204 if ((mi_delta & (3 << shift)) == 0)
25205 shift += 2;
25206 else
25207 {
25208 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25209 mi_op, this_regno, this_regno,
25210 mi_delta & (0xff << shift));
25211 mi_delta &= ~(0xff << shift);
25212 shift += 8;
25213 }
25214 }
25215 }
25216 if (TARGET_THUMB1)
25217 {
25218 if (TARGET_THUMB1_ONLY)
25219 fputs ("\tpop\t{r3}\n", file);
25220
25221 fprintf (file, "\tbx\tr12\n");
25222 ASM_OUTPUT_ALIGN (file, 2);
25223 assemble_name (file, label);
25224 fputs (":\n", file);
25225 if (flag_pic)
25226 {
25227 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
25228 rtx tem = XEXP (DECL_RTL (function), 0);
25229 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
25230 tem = gen_rtx_MINUS (GET_MODE (tem),
25231 tem,
25232 gen_rtx_SYMBOL_REF (Pmode,
25233 ggc_strdup (labelpc)));
25234 assemble_integer (tem, 4, BITS_PER_WORD, 1);
25235 }
25236 else
25237 /* Output ".word .LTHUNKn". */
25238 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
25239
25240 if (TARGET_THUMB1_ONLY && mi_delta > 255)
25241 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
25242 }
25243 else
25244 {
25245 fputs ("\tb\t", file);
25246 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
25247 if (NEED_PLT_RELOC)
25248 fputs ("(PLT)", file);
25249 fputc ('\n', file);
25250 }
25251
25252 final_end_function ();
25253 }
25254
25255 int
25256 arm_emit_vector_const (FILE *file, rtx x)
25257 {
25258 int i;
25259 const char * pattern;
25260
25261 gcc_assert (GET_CODE (x) == CONST_VECTOR);
25262
25263 switch (GET_MODE (x))
25264 {
25265 case V2SImode: pattern = "%08x"; break;
25266 case V4HImode: pattern = "%04x"; break;
25267 case V8QImode: pattern = "%02x"; break;
25268 default: gcc_unreachable ();
25269 }
25270
25271 fprintf (file, "0x");
25272 for (i = CONST_VECTOR_NUNITS (x); i--;)
25273 {
25274 rtx element;
25275
25276 element = CONST_VECTOR_ELT (x, i);
25277 fprintf (file, pattern, INTVAL (element));
25278 }
25279
25280 return 1;
25281 }
25282
25283 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25284 HFmode constant pool entries are actually loaded with ldr. */
25285 void
25286 arm_emit_fp16_const (rtx c)
25287 {
25288 REAL_VALUE_TYPE r;
25289 long bits;
25290
25291 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
25292 bits = real_to_target (NULL, &r, HFmode);
25293 if (WORDS_BIG_ENDIAN)
25294 assemble_zeros (2);
25295 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
25296 if (!WORDS_BIG_ENDIAN)
25297 assemble_zeros (2);
25298 }
25299
25300 const char *
25301 arm_output_load_gr (rtx *operands)
25302 {
25303 rtx reg;
25304 rtx offset;
25305 rtx wcgr;
25306 rtx sum;
25307
25308 if (!MEM_P (operands [1])
25309 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
25310 || !REG_P (reg = XEXP (sum, 0))
25311 || !CONST_INT_P (offset = XEXP (sum, 1))
25312 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
25313 return "wldrw%?\t%0, %1";
25314
25315 /* Fix up an out-of-range load of a GR register. */
25316 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
25317 wcgr = operands[0];
25318 operands[0] = reg;
25319 output_asm_insn ("ldr%?\t%0, %1", operands);
25320
25321 operands[0] = wcgr;
25322 operands[1] = reg;
25323 output_asm_insn ("tmcr%?\t%0, %1", operands);
25324 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
25325
25326 return "";
25327 }
25328
25329 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25330
25331 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25332 named arg and all anonymous args onto the stack.
25333 XXX I know the prologue shouldn't be pushing registers, but it is faster
25334 that way. */
25335
25336 static void
25337 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
25338 enum machine_mode mode,
25339 tree type,
25340 int *pretend_size,
25341 int second_time ATTRIBUTE_UNUSED)
25342 {
25343 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
25344 int nregs;
25345
25346 cfun->machine->uses_anonymous_args = 1;
25347 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
25348 {
25349 nregs = pcum->aapcs_ncrn;
25350 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
25351 nregs++;
25352 }
25353 else
25354 nregs = pcum->nregs;
25355
25356 if (nregs < NUM_ARG_REGS)
25357 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
25358 }
25359
25360 /* Return nonzero if the CONSUMER instruction (a store) does not need
25361 PRODUCER's value to calculate the address. */
25362
25363 int
25364 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
25365 {
25366 rtx value = PATTERN (producer);
25367 rtx addr = PATTERN (consumer);
25368
25369 if (GET_CODE (value) == COND_EXEC)
25370 value = COND_EXEC_CODE (value);
25371 if (GET_CODE (value) == PARALLEL)
25372 value = XVECEXP (value, 0, 0);
25373 value = XEXP (value, 0);
25374 if (GET_CODE (addr) == COND_EXEC)
25375 addr = COND_EXEC_CODE (addr);
25376 if (GET_CODE (addr) == PARALLEL)
25377 addr = XVECEXP (addr, 0, 0);
25378 addr = XEXP (addr, 0);
25379
25380 return !reg_overlap_mentioned_p (value, addr);
25381 }
25382
25383 /* Return nonzero if the CONSUMER instruction (a store) does need
25384 PRODUCER's value to calculate the address. */
25385
25386 int
25387 arm_early_store_addr_dep (rtx producer, rtx consumer)
25388 {
25389 return !arm_no_early_store_addr_dep (producer, consumer);
25390 }
25391
25392 /* Return nonzero if the CONSUMER instruction (a load) does need
25393 PRODUCER's value to calculate the address. */
25394
25395 int
25396 arm_early_load_addr_dep (rtx producer, rtx consumer)
25397 {
25398 rtx value = PATTERN (producer);
25399 rtx addr = PATTERN (consumer);
25400
25401 if (GET_CODE (value) == COND_EXEC)
25402 value = COND_EXEC_CODE (value);
25403 if (GET_CODE (value) == PARALLEL)
25404 value = XVECEXP (value, 0, 0);
25405 value = XEXP (value, 0);
25406 if (GET_CODE (addr) == COND_EXEC)
25407 addr = COND_EXEC_CODE (addr);
25408 if (GET_CODE (addr) == PARALLEL)
25409 {
25410 if (GET_CODE (XVECEXP (addr, 0, 0)) == RETURN)
25411 addr = XVECEXP (addr, 0, 1);
25412 else
25413 addr = XVECEXP (addr, 0, 0);
25414 }
25415 addr = XEXP (addr, 1);
25416
25417 return reg_overlap_mentioned_p (value, addr);
25418 }
25419
25420 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
25421 have an early register shift value or amount dependency on the
25422 result of PRODUCER. */
25423
25424 int
25425 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
25426 {
25427 rtx value = PATTERN (producer);
25428 rtx op = PATTERN (consumer);
25429 rtx early_op;
25430
25431 if (GET_CODE (value) == COND_EXEC)
25432 value = COND_EXEC_CODE (value);
25433 if (GET_CODE (value) == PARALLEL)
25434 value = XVECEXP (value, 0, 0);
25435 value = XEXP (value, 0);
25436 if (GET_CODE (op) == COND_EXEC)
25437 op = COND_EXEC_CODE (op);
25438 if (GET_CODE (op) == PARALLEL)
25439 op = XVECEXP (op, 0, 0);
25440 op = XEXP (op, 1);
25441
25442 early_op = XEXP (op, 0);
25443 /* This is either an actual independent shift, or a shift applied to
25444 the first operand of another operation. We want the whole shift
25445 operation. */
25446 if (REG_P (early_op))
25447 early_op = op;
25448
25449 return !reg_overlap_mentioned_p (value, early_op);
25450 }
25451
25452 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
25453 have an early register shift value dependency on the result of
25454 PRODUCER. */
25455
25456 int
25457 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
25458 {
25459 rtx value = PATTERN (producer);
25460 rtx op = PATTERN (consumer);
25461 rtx early_op;
25462
25463 if (GET_CODE (value) == COND_EXEC)
25464 value = COND_EXEC_CODE (value);
25465 if (GET_CODE (value) == PARALLEL)
25466 value = XVECEXP (value, 0, 0);
25467 value = XEXP (value, 0);
25468 if (GET_CODE (op) == COND_EXEC)
25469 op = COND_EXEC_CODE (op);
25470 if (GET_CODE (op) == PARALLEL)
25471 op = XVECEXP (op, 0, 0);
25472 op = XEXP (op, 1);
25473
25474 early_op = XEXP (op, 0);
25475
25476 /* This is either an actual independent shift, or a shift applied to
25477 the first operand of another operation. We want the value being
25478 shifted, in either case. */
25479 if (!REG_P (early_op))
25480 early_op = XEXP (early_op, 0);
25481
25482 return !reg_overlap_mentioned_p (value, early_op);
25483 }
25484
25485 /* Return nonzero if the CONSUMER (a mul or mac op) does not
25486 have an early register mult dependency on the result of
25487 PRODUCER. */
25488
25489 int
25490 arm_no_early_mul_dep (rtx producer, rtx consumer)
25491 {
25492 rtx value = PATTERN (producer);
25493 rtx op = PATTERN (consumer);
25494
25495 if (GET_CODE (value) == COND_EXEC)
25496 value = COND_EXEC_CODE (value);
25497 if (GET_CODE (value) == PARALLEL)
25498 value = XVECEXP (value, 0, 0);
25499 value = XEXP (value, 0);
25500 if (GET_CODE (op) == COND_EXEC)
25501 op = COND_EXEC_CODE (op);
25502 if (GET_CODE (op) == PARALLEL)
25503 op = XVECEXP (op, 0, 0);
25504 op = XEXP (op, 1);
25505
25506 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
25507 {
25508 if (GET_CODE (XEXP (op, 0)) == MULT)
25509 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
25510 else
25511 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
25512 }
25513
25514 return 0;
25515 }
25516
25517 /* We can't rely on the caller doing the proper promotion when
25518 using APCS or ATPCS. */
25519
25520 static bool
25521 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
25522 {
25523 return !TARGET_AAPCS_BASED;
25524 }
25525
25526 static enum machine_mode
25527 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
25528 enum machine_mode mode,
25529 int *punsignedp ATTRIBUTE_UNUSED,
25530 const_tree fntype ATTRIBUTE_UNUSED,
25531 int for_return ATTRIBUTE_UNUSED)
25532 {
25533 if (GET_MODE_CLASS (mode) == MODE_INT
25534 && GET_MODE_SIZE (mode) < 4)
25535 return SImode;
25536
25537 return mode;
25538 }
25539
25540 /* AAPCS based ABIs use short enums by default. */
25541
25542 static bool
25543 arm_default_short_enums (void)
25544 {
25545 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
25546 }
25547
25548
25549 /* AAPCS requires that anonymous bitfields affect structure alignment. */
25550
25551 static bool
25552 arm_align_anon_bitfield (void)
25553 {
25554 return TARGET_AAPCS_BASED;
25555 }
25556
25557
25558 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
25559
25560 static tree
25561 arm_cxx_guard_type (void)
25562 {
25563 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
25564 }
25565
25566 /* Return non-zero iff the consumer (a multiply-accumulate or a
25567 multiple-subtract instruction) has an accumulator dependency on the
25568 result of the producer and no other dependency on that result. It
25569 does not check if the producer is multiply-accumulate instruction. */
25570 int
25571 arm_mac_accumulator_is_result (rtx producer, rtx consumer)
25572 {
25573 rtx result;
25574 rtx op0, op1, acc;
25575
25576 producer = PATTERN (producer);
25577 consumer = PATTERN (consumer);
25578
25579 if (GET_CODE (producer) == COND_EXEC)
25580 producer = COND_EXEC_CODE (producer);
25581 if (GET_CODE (consumer) == COND_EXEC)
25582 consumer = COND_EXEC_CODE (consumer);
25583
25584 if (GET_CODE (producer) != SET)
25585 return 0;
25586
25587 result = XEXP (producer, 0);
25588
25589 if (GET_CODE (consumer) != SET)
25590 return 0;
25591
25592 /* Check that the consumer is of the form
25593 (set (...) (plus (mult ...) (...)))
25594 or
25595 (set (...) (minus (...) (mult ...))). */
25596 if (GET_CODE (XEXP (consumer, 1)) == PLUS)
25597 {
25598 if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT)
25599 return 0;
25600
25601 op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0);
25602 op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1);
25603 acc = XEXP (XEXP (consumer, 1), 1);
25604 }
25605 else if (GET_CODE (XEXP (consumer, 1)) == MINUS)
25606 {
25607 if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT)
25608 return 0;
25609
25610 op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0);
25611 op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1);
25612 acc = XEXP (XEXP (consumer, 1), 0);
25613 }
25614 else
25615 return 0;
25616
25617 return (reg_overlap_mentioned_p (result, acc)
25618 && !reg_overlap_mentioned_p (result, op0)
25619 && !reg_overlap_mentioned_p (result, op1));
25620 }
25621
25622 /* Return non-zero if the consumer (a multiply-accumulate instruction)
25623 has an accumulator dependency on the result of the producer (a
25624 multiplication instruction) and no other dependency on that result. */
25625 int
25626 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
25627 {
25628 rtx mul = PATTERN (producer);
25629 rtx mac = PATTERN (consumer);
25630 rtx mul_result;
25631 rtx mac_op0, mac_op1, mac_acc;
25632
25633 if (GET_CODE (mul) == COND_EXEC)
25634 mul = COND_EXEC_CODE (mul);
25635 if (GET_CODE (mac) == COND_EXEC)
25636 mac = COND_EXEC_CODE (mac);
25637
25638 /* Check that mul is of the form (set (...) (mult ...))
25639 and mla is of the form (set (...) (plus (mult ...) (...))). */
25640 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
25641 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
25642 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
25643 return 0;
25644
25645 mul_result = XEXP (mul, 0);
25646 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
25647 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
25648 mac_acc = XEXP (XEXP (mac, 1), 1);
25649
25650 return (reg_overlap_mentioned_p (mul_result, mac_acc)
25651 && !reg_overlap_mentioned_p (mul_result, mac_op0)
25652 && !reg_overlap_mentioned_p (mul_result, mac_op1));
25653 }
25654
25655
25656 /* The EABI says test the least significant bit of a guard variable. */
25657
25658 static bool
25659 arm_cxx_guard_mask_bit (void)
25660 {
25661 return TARGET_AAPCS_BASED;
25662 }
25663
25664
25665 /* The EABI specifies that all array cookies are 8 bytes long. */
25666
25667 static tree
25668 arm_get_cookie_size (tree type)
25669 {
25670 tree size;
25671
25672 if (!TARGET_AAPCS_BASED)
25673 return default_cxx_get_cookie_size (type);
25674
25675 size = build_int_cst (sizetype, 8);
25676 return size;
25677 }
25678
25679
25680 /* The EABI says that array cookies should also contain the element size. */
25681
25682 static bool
25683 arm_cookie_has_size (void)
25684 {
25685 return TARGET_AAPCS_BASED;
25686 }
25687
25688
25689 /* The EABI says constructors and destructors should return a pointer to
25690 the object constructed/destroyed. */
25691
25692 static bool
25693 arm_cxx_cdtor_returns_this (void)
25694 {
25695 return TARGET_AAPCS_BASED;
25696 }
25697
25698 /* The EABI says that an inline function may never be the key
25699 method. */
25700
25701 static bool
25702 arm_cxx_key_method_may_be_inline (void)
25703 {
25704 return !TARGET_AAPCS_BASED;
25705 }
25706
25707 static void
25708 arm_cxx_determine_class_data_visibility (tree decl)
25709 {
25710 if (!TARGET_AAPCS_BASED
25711 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
25712 return;
25713
25714 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
25715 is exported. However, on systems without dynamic vague linkage,
25716 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
25717 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
25718 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
25719 else
25720 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
25721 DECL_VISIBILITY_SPECIFIED (decl) = 1;
25722 }
25723
25724 static bool
25725 arm_cxx_class_data_always_comdat (void)
25726 {
25727 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
25728 vague linkage if the class has no key function. */
25729 return !TARGET_AAPCS_BASED;
25730 }
25731
25732
25733 /* The EABI says __aeabi_atexit should be used to register static
25734 destructors. */
25735
25736 static bool
25737 arm_cxx_use_aeabi_atexit (void)
25738 {
25739 return TARGET_AAPCS_BASED;
25740 }
25741
25742
25743 void
25744 arm_set_return_address (rtx source, rtx scratch)
25745 {
25746 arm_stack_offsets *offsets;
25747 HOST_WIDE_INT delta;
25748 rtx addr;
25749 unsigned long saved_regs;
25750
25751 offsets = arm_get_frame_offsets ();
25752 saved_regs = offsets->saved_regs_mask;
25753
25754 if ((saved_regs & (1 << LR_REGNUM)) == 0)
25755 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
25756 else
25757 {
25758 if (frame_pointer_needed)
25759 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
25760 else
25761 {
25762 /* LR will be the first saved register. */
25763 delta = offsets->outgoing_args - (offsets->frame + 4);
25764
25765
25766 if (delta >= 4096)
25767 {
25768 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
25769 GEN_INT (delta & ~4095)));
25770 addr = scratch;
25771 delta &= 4095;
25772 }
25773 else
25774 addr = stack_pointer_rtx;
25775
25776 addr = plus_constant (Pmode, addr, delta);
25777 }
25778 emit_move_insn (gen_frame_mem (Pmode, addr), source);
25779 }
25780 }
25781
25782
25783 void
25784 thumb_set_return_address (rtx source, rtx scratch)
25785 {
25786 arm_stack_offsets *offsets;
25787 HOST_WIDE_INT delta;
25788 HOST_WIDE_INT limit;
25789 int reg;
25790 rtx addr;
25791 unsigned long mask;
25792
25793 emit_use (source);
25794
25795 offsets = arm_get_frame_offsets ();
25796 mask = offsets->saved_regs_mask;
25797 if (mask & (1 << LR_REGNUM))
25798 {
25799 limit = 1024;
25800 /* Find the saved regs. */
25801 if (frame_pointer_needed)
25802 {
25803 delta = offsets->soft_frame - offsets->saved_args;
25804 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
25805 if (TARGET_THUMB1)
25806 limit = 128;
25807 }
25808 else
25809 {
25810 delta = offsets->outgoing_args - offsets->saved_args;
25811 reg = SP_REGNUM;
25812 }
25813 /* Allow for the stack frame. */
25814 if (TARGET_THUMB1 && TARGET_BACKTRACE)
25815 delta -= 16;
25816 /* The link register is always the first saved register. */
25817 delta -= 4;
25818
25819 /* Construct the address. */
25820 addr = gen_rtx_REG (SImode, reg);
25821 if (delta > limit)
25822 {
25823 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
25824 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
25825 addr = scratch;
25826 }
25827 else
25828 addr = plus_constant (Pmode, addr, delta);
25829
25830 emit_move_insn (gen_frame_mem (Pmode, addr), source);
25831 }
25832 else
25833 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
25834 }
25835
25836 /* Implements target hook vector_mode_supported_p. */
25837 bool
25838 arm_vector_mode_supported_p (enum machine_mode mode)
25839 {
25840 /* Neon also supports V2SImode, etc. listed in the clause below. */
25841 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
25842 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
25843 return true;
25844
25845 if ((TARGET_NEON || TARGET_IWMMXT)
25846 && ((mode == V2SImode)
25847 || (mode == V4HImode)
25848 || (mode == V8QImode)))
25849 return true;
25850
25851 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
25852 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
25853 || mode == V2HAmode))
25854 return true;
25855
25856 return false;
25857 }
25858
25859 /* Implements target hook array_mode_supported_p. */
25860
25861 static bool
25862 arm_array_mode_supported_p (enum machine_mode mode,
25863 unsigned HOST_WIDE_INT nelems)
25864 {
25865 if (TARGET_NEON
25866 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
25867 && (nelems >= 2 && nelems <= 4))
25868 return true;
25869
25870 return false;
25871 }
25872
25873 /* Use the option -mvectorize-with-neon-double to override the use of quardword
25874 registers when autovectorizing for Neon, at least until multiple vector
25875 widths are supported properly by the middle-end. */
25876
25877 static enum machine_mode
25878 arm_preferred_simd_mode (enum machine_mode mode)
25879 {
25880 if (TARGET_NEON)
25881 switch (mode)
25882 {
25883 case SFmode:
25884 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
25885 case SImode:
25886 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
25887 case HImode:
25888 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
25889 case QImode:
25890 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
25891 case DImode:
25892 if (!TARGET_NEON_VECTORIZE_DOUBLE)
25893 return V2DImode;
25894 break;
25895
25896 default:;
25897 }
25898
25899 if (TARGET_REALLY_IWMMXT)
25900 switch (mode)
25901 {
25902 case SImode:
25903 return V2SImode;
25904 case HImode:
25905 return V4HImode;
25906 case QImode:
25907 return V8QImode;
25908
25909 default:;
25910 }
25911
25912 return word_mode;
25913 }
25914
25915 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
25916
25917 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
25918 using r0-r4 for function arguments, r7 for the stack frame and don't have
25919 enough left over to do doubleword arithmetic. For Thumb-2 all the
25920 potentially problematic instructions accept high registers so this is not
25921 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
25922 that require many low registers. */
25923 static bool
25924 arm_class_likely_spilled_p (reg_class_t rclass)
25925 {
25926 if ((TARGET_THUMB1 && rclass == LO_REGS)
25927 || rclass == CC_REG)
25928 return true;
25929
25930 return false;
25931 }
25932
25933 /* Implements target hook small_register_classes_for_mode_p. */
25934 bool
25935 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
25936 {
25937 return TARGET_THUMB1;
25938 }
25939
25940 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
25941 ARM insns and therefore guarantee that the shift count is modulo 256.
25942 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
25943 guarantee no particular behavior for out-of-range counts. */
25944
25945 static unsigned HOST_WIDE_INT
25946 arm_shift_truncation_mask (enum machine_mode mode)
25947 {
25948 return mode == SImode ? 255 : 0;
25949 }
25950
25951
25952 /* Map internal gcc register numbers to DWARF2 register numbers. */
25953
25954 unsigned int
25955 arm_dbx_register_number (unsigned int regno)
25956 {
25957 if (regno < 16)
25958 return regno;
25959
25960 if (IS_VFP_REGNUM (regno))
25961 {
25962 /* See comment in arm_dwarf_register_span. */
25963 if (VFP_REGNO_OK_FOR_SINGLE (regno))
25964 return 64 + regno - FIRST_VFP_REGNUM;
25965 else
25966 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
25967 }
25968
25969 if (IS_IWMMXT_GR_REGNUM (regno))
25970 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
25971
25972 if (IS_IWMMXT_REGNUM (regno))
25973 return 112 + regno - FIRST_IWMMXT_REGNUM;
25974
25975 gcc_unreachable ();
25976 }
25977
25978 /* Dwarf models VFPv3 registers as 32 64-bit registers.
25979 GCC models tham as 64 32-bit registers, so we need to describe this to
25980 the DWARF generation code. Other registers can use the default. */
25981 static rtx
25982 arm_dwarf_register_span (rtx rtl)
25983 {
25984 unsigned regno;
25985 int nregs;
25986 int i;
25987 rtx p;
25988
25989 regno = REGNO (rtl);
25990 if (!IS_VFP_REGNUM (regno))
25991 return NULL_RTX;
25992
25993 /* XXX FIXME: The EABI defines two VFP register ranges:
25994 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
25995 256-287: D0-D31
25996 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
25997 corresponding D register. Until GDB supports this, we shall use the
25998 legacy encodings. We also use these encodings for D0-D15 for
25999 compatibility with older debuggers. */
26000 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26001 return NULL_RTX;
26002
26003 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
26004 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
26005 for (i = 0; i < nregs; i++)
26006 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, regno + i);
26007
26008 return p;
26009 }
26010
26011 #if ARM_UNWIND_INFO
26012 /* Emit unwind directives for a store-multiple instruction or stack pointer
26013 push during alignment.
26014 These should only ever be generated by the function prologue code, so
26015 expect them to have a particular form. */
26016
26017 static void
26018 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26019 {
26020 int i;
26021 HOST_WIDE_INT offset;
26022 HOST_WIDE_INT nregs;
26023 int reg_size;
26024 unsigned reg;
26025 unsigned lastreg;
26026 rtx e;
26027
26028 e = XVECEXP (p, 0, 0);
26029 if (GET_CODE (e) != SET)
26030 abort ();
26031
26032 /* First insn will adjust the stack pointer. */
26033 if (GET_CODE (e) != SET
26034 || !REG_P (XEXP (e, 0))
26035 || REGNO (XEXP (e, 0)) != SP_REGNUM
26036 || GET_CODE (XEXP (e, 1)) != PLUS)
26037 abort ();
26038
26039 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
26040 nregs = XVECLEN (p, 0) - 1;
26041
26042 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
26043 if (reg < 16)
26044 {
26045 /* The function prologue may also push pc, but not annotate it as it is
26046 never restored. We turn this into a stack pointer adjustment. */
26047 if (nregs * 4 == offset - 4)
26048 {
26049 fprintf (asm_out_file, "\t.pad #4\n");
26050 offset -= 4;
26051 }
26052 reg_size = 4;
26053 fprintf (asm_out_file, "\t.save {");
26054 }
26055 else if (IS_VFP_REGNUM (reg))
26056 {
26057 reg_size = 8;
26058 fprintf (asm_out_file, "\t.vsave {");
26059 }
26060 else
26061 /* Unknown register type. */
26062 abort ();
26063
26064 /* If the stack increment doesn't match the size of the saved registers,
26065 something has gone horribly wrong. */
26066 if (offset != nregs * reg_size)
26067 abort ();
26068
26069 offset = 0;
26070 lastreg = 0;
26071 /* The remaining insns will describe the stores. */
26072 for (i = 1; i <= nregs; i++)
26073 {
26074 /* Expect (set (mem <addr>) (reg)).
26075 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26076 e = XVECEXP (p, 0, i);
26077 if (GET_CODE (e) != SET
26078 || !MEM_P (XEXP (e, 0))
26079 || !REG_P (XEXP (e, 1)))
26080 abort ();
26081
26082 reg = REGNO (XEXP (e, 1));
26083 if (reg < lastreg)
26084 abort ();
26085
26086 if (i != 1)
26087 fprintf (asm_out_file, ", ");
26088 /* We can't use %r for vfp because we need to use the
26089 double precision register names. */
26090 if (IS_VFP_REGNUM (reg))
26091 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26092 else
26093 asm_fprintf (asm_out_file, "%r", reg);
26094
26095 #ifdef ENABLE_CHECKING
26096 /* Check that the addresses are consecutive. */
26097 e = XEXP (XEXP (e, 0), 0);
26098 if (GET_CODE (e) == PLUS)
26099 {
26100 offset += reg_size;
26101 if (!REG_P (XEXP (e, 0))
26102 || REGNO (XEXP (e, 0)) != SP_REGNUM
26103 || !CONST_INT_P (XEXP (e, 1))
26104 || offset != INTVAL (XEXP (e, 1)))
26105 abort ();
26106 }
26107 else if (i != 1
26108 || !REG_P (e)
26109 || REGNO (e) != SP_REGNUM)
26110 abort ();
26111 #endif
26112 }
26113 fprintf (asm_out_file, "}\n");
26114 }
26115
26116 /* Emit unwind directives for a SET. */
26117
26118 static void
26119 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
26120 {
26121 rtx e0;
26122 rtx e1;
26123 unsigned reg;
26124
26125 e0 = XEXP (p, 0);
26126 e1 = XEXP (p, 1);
26127 switch (GET_CODE (e0))
26128 {
26129 case MEM:
26130 /* Pushing a single register. */
26131 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
26132 || !REG_P (XEXP (XEXP (e0, 0), 0))
26133 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
26134 abort ();
26135
26136 asm_fprintf (asm_out_file, "\t.save ");
26137 if (IS_VFP_REGNUM (REGNO (e1)))
26138 asm_fprintf(asm_out_file, "{d%d}\n",
26139 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
26140 else
26141 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
26142 break;
26143
26144 case REG:
26145 if (REGNO (e0) == SP_REGNUM)
26146 {
26147 /* A stack increment. */
26148 if (GET_CODE (e1) != PLUS
26149 || !REG_P (XEXP (e1, 0))
26150 || REGNO (XEXP (e1, 0)) != SP_REGNUM
26151 || !CONST_INT_P (XEXP (e1, 1)))
26152 abort ();
26153
26154 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
26155 -INTVAL (XEXP (e1, 1)));
26156 }
26157 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
26158 {
26159 HOST_WIDE_INT offset;
26160
26161 if (GET_CODE (e1) == PLUS)
26162 {
26163 if (!REG_P (XEXP (e1, 0))
26164 || !CONST_INT_P (XEXP (e1, 1)))
26165 abort ();
26166 reg = REGNO (XEXP (e1, 0));
26167 offset = INTVAL (XEXP (e1, 1));
26168 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
26169 HARD_FRAME_POINTER_REGNUM, reg,
26170 offset);
26171 }
26172 else if (REG_P (e1))
26173 {
26174 reg = REGNO (e1);
26175 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
26176 HARD_FRAME_POINTER_REGNUM, reg);
26177 }
26178 else
26179 abort ();
26180 }
26181 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
26182 {
26183 /* Move from sp to reg. */
26184 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
26185 }
26186 else if (GET_CODE (e1) == PLUS
26187 && REG_P (XEXP (e1, 0))
26188 && REGNO (XEXP (e1, 0)) == SP_REGNUM
26189 && CONST_INT_P (XEXP (e1, 1)))
26190 {
26191 /* Set reg to offset from sp. */
26192 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
26193 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
26194 }
26195 else
26196 abort ();
26197 break;
26198
26199 default:
26200 abort ();
26201 }
26202 }
26203
26204
26205 /* Emit unwind directives for the given insn. */
26206
26207 static void
26208 arm_unwind_emit (FILE * asm_out_file, rtx insn)
26209 {
26210 rtx note, pat;
26211 bool handled_one = false;
26212
26213 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26214 return;
26215
26216 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26217 && (TREE_NOTHROW (current_function_decl)
26218 || crtl->all_throwers_are_sibcalls))
26219 return;
26220
26221 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
26222 return;
26223
26224 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
26225 {
26226 pat = XEXP (note, 0);
26227 switch (REG_NOTE_KIND (note))
26228 {
26229 case REG_FRAME_RELATED_EXPR:
26230 goto found;
26231
26232 case REG_CFA_REGISTER:
26233 if (pat == NULL)
26234 {
26235 pat = PATTERN (insn);
26236 if (GET_CODE (pat) == PARALLEL)
26237 pat = XVECEXP (pat, 0, 0);
26238 }
26239
26240 /* Only emitted for IS_STACKALIGN re-alignment. */
26241 {
26242 rtx dest, src;
26243 unsigned reg;
26244
26245 src = SET_SRC (pat);
26246 dest = SET_DEST (pat);
26247
26248 gcc_assert (src == stack_pointer_rtx);
26249 reg = REGNO (dest);
26250 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26251 reg + 0x90, reg);
26252 }
26253 handled_one = true;
26254 break;
26255
26256 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26257 to get correct dwarf information for shrink-wrap. We should not
26258 emit unwind information for it because these are used either for
26259 pretend arguments or notes to adjust sp and restore registers from
26260 stack. */
26261 case REG_CFA_ADJUST_CFA:
26262 case REG_CFA_RESTORE:
26263 return;
26264
26265 case REG_CFA_DEF_CFA:
26266 case REG_CFA_EXPRESSION:
26267 case REG_CFA_OFFSET:
26268 /* ??? Only handling here what we actually emit. */
26269 gcc_unreachable ();
26270
26271 default:
26272 break;
26273 }
26274 }
26275 if (handled_one)
26276 return;
26277 pat = PATTERN (insn);
26278 found:
26279
26280 switch (GET_CODE (pat))
26281 {
26282 case SET:
26283 arm_unwind_emit_set (asm_out_file, pat);
26284 break;
26285
26286 case SEQUENCE:
26287 /* Store multiple. */
26288 arm_unwind_emit_sequence (asm_out_file, pat);
26289 break;
26290
26291 default:
26292 abort();
26293 }
26294 }
26295
26296
26297 /* Output a reference from a function exception table to the type_info
26298 object X. The EABI specifies that the symbol should be relocated by
26299 an R_ARM_TARGET2 relocation. */
26300
26301 static bool
26302 arm_output_ttype (rtx x)
26303 {
26304 fputs ("\t.word\t", asm_out_file);
26305 output_addr_const (asm_out_file, x);
26306 /* Use special relocations for symbol references. */
26307 if (!CONST_INT_P (x))
26308 fputs ("(TARGET2)", asm_out_file);
26309 fputc ('\n', asm_out_file);
26310
26311 return TRUE;
26312 }
26313
26314 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26315
26316 static void
26317 arm_asm_emit_except_personality (rtx personality)
26318 {
26319 fputs ("\t.personality\t", asm_out_file);
26320 output_addr_const (asm_out_file, personality);
26321 fputc ('\n', asm_out_file);
26322 }
26323
26324 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26325
26326 static void
26327 arm_asm_init_sections (void)
26328 {
26329 exception_section = get_unnamed_section (0, output_section_asm_op,
26330 "\t.handlerdata");
26331 }
26332 #endif /* ARM_UNWIND_INFO */
26333
26334 /* Output unwind directives for the start/end of a function. */
26335
26336 void
26337 arm_output_fn_unwind (FILE * f, bool prologue)
26338 {
26339 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26340 return;
26341
26342 if (prologue)
26343 fputs ("\t.fnstart\n", f);
26344 else
26345 {
26346 /* If this function will never be unwound, then mark it as such.
26347 The came condition is used in arm_unwind_emit to suppress
26348 the frame annotations. */
26349 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26350 && (TREE_NOTHROW (current_function_decl)
26351 || crtl->all_throwers_are_sibcalls))
26352 fputs("\t.cantunwind\n", f);
26353
26354 fputs ("\t.fnend\n", f);
26355 }
26356 }
26357
26358 static bool
26359 arm_emit_tls_decoration (FILE *fp, rtx x)
26360 {
26361 enum tls_reloc reloc;
26362 rtx val;
26363
26364 val = XVECEXP (x, 0, 0);
26365 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
26366
26367 output_addr_const (fp, val);
26368
26369 switch (reloc)
26370 {
26371 case TLS_GD32:
26372 fputs ("(tlsgd)", fp);
26373 break;
26374 case TLS_LDM32:
26375 fputs ("(tlsldm)", fp);
26376 break;
26377 case TLS_LDO32:
26378 fputs ("(tlsldo)", fp);
26379 break;
26380 case TLS_IE32:
26381 fputs ("(gottpoff)", fp);
26382 break;
26383 case TLS_LE32:
26384 fputs ("(tpoff)", fp);
26385 break;
26386 case TLS_DESCSEQ:
26387 fputs ("(tlsdesc)", fp);
26388 break;
26389 default:
26390 gcc_unreachable ();
26391 }
26392
26393 switch (reloc)
26394 {
26395 case TLS_GD32:
26396 case TLS_LDM32:
26397 case TLS_IE32:
26398 case TLS_DESCSEQ:
26399 fputs (" + (. - ", fp);
26400 output_addr_const (fp, XVECEXP (x, 0, 2));
26401 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26402 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
26403 output_addr_const (fp, XVECEXP (x, 0, 3));
26404 fputc (')', fp);
26405 break;
26406 default:
26407 break;
26408 }
26409
26410 return TRUE;
26411 }
26412
26413 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26414
26415 static void
26416 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
26417 {
26418 gcc_assert (size == 4);
26419 fputs ("\t.word\t", file);
26420 output_addr_const (file, x);
26421 fputs ("(tlsldo)", file);
26422 }
26423
26424 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26425
26426 static bool
26427 arm_output_addr_const_extra (FILE *fp, rtx x)
26428 {
26429 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
26430 return arm_emit_tls_decoration (fp, x);
26431 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
26432 {
26433 char label[256];
26434 int labelno = INTVAL (XVECEXP (x, 0, 0));
26435
26436 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
26437 assemble_name_raw (fp, label);
26438
26439 return TRUE;
26440 }
26441 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
26442 {
26443 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
26444 if (GOT_PCREL)
26445 fputs ("+.", fp);
26446 fputs ("-(", fp);
26447 output_addr_const (fp, XVECEXP (x, 0, 0));
26448 fputc (')', fp);
26449 return TRUE;
26450 }
26451 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
26452 {
26453 output_addr_const (fp, XVECEXP (x, 0, 0));
26454 if (GOT_PCREL)
26455 fputs ("+.", fp);
26456 fputs ("-(", fp);
26457 output_addr_const (fp, XVECEXP (x, 0, 1));
26458 fputc (')', fp);
26459 return TRUE;
26460 }
26461 else if (GET_CODE (x) == CONST_VECTOR)
26462 return arm_emit_vector_const (fp, x);
26463
26464 return FALSE;
26465 }
26466
26467 /* Output assembly for a shift instruction.
26468 SET_FLAGS determines how the instruction modifies the condition codes.
26469 0 - Do not set condition codes.
26470 1 - Set condition codes.
26471 2 - Use smallest instruction. */
26472 const char *
26473 arm_output_shift(rtx * operands, int set_flags)
26474 {
26475 char pattern[100];
26476 static const char flag_chars[3] = {'?', '.', '!'};
26477 const char *shift;
26478 HOST_WIDE_INT val;
26479 char c;
26480
26481 c = flag_chars[set_flags];
26482 if (TARGET_UNIFIED_ASM)
26483 {
26484 shift = shift_op(operands[3], &val);
26485 if (shift)
26486 {
26487 if (val != -1)
26488 operands[2] = GEN_INT(val);
26489 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
26490 }
26491 else
26492 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
26493 }
26494 else
26495 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
26496 output_asm_insn (pattern, operands);
26497 return "";
26498 }
26499
26500 /* Output assembly for a WMMX immediate shift instruction. */
26501 const char *
26502 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
26503 {
26504 int shift = INTVAL (operands[2]);
26505 char templ[50];
26506 enum machine_mode opmode = GET_MODE (operands[0]);
26507
26508 gcc_assert (shift >= 0);
26509
26510 /* If the shift value in the register versions is > 63 (for D qualifier),
26511 31 (for W qualifier) or 15 (for H qualifier). */
26512 if (((opmode == V4HImode) && (shift > 15))
26513 || ((opmode == V2SImode) && (shift > 31))
26514 || ((opmode == DImode) && (shift > 63)))
26515 {
26516 if (wror_or_wsra)
26517 {
26518 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26519 output_asm_insn (templ, operands);
26520 if (opmode == DImode)
26521 {
26522 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
26523 output_asm_insn (templ, operands);
26524 }
26525 }
26526 else
26527 {
26528 /* The destination register will contain all zeros. */
26529 sprintf (templ, "wzero\t%%0");
26530 output_asm_insn (templ, operands);
26531 }
26532 return "";
26533 }
26534
26535 if ((opmode == DImode) && (shift > 32))
26536 {
26537 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26538 output_asm_insn (templ, operands);
26539 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
26540 output_asm_insn (templ, operands);
26541 }
26542 else
26543 {
26544 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
26545 output_asm_insn (templ, operands);
26546 }
26547 return "";
26548 }
26549
26550 /* Output assembly for a WMMX tinsr instruction. */
26551 const char *
26552 arm_output_iwmmxt_tinsr (rtx *operands)
26553 {
26554 int mask = INTVAL (operands[3]);
26555 int i;
26556 char templ[50];
26557 int units = mode_nunits[GET_MODE (operands[0])];
26558 gcc_assert ((mask & (mask - 1)) == 0);
26559 for (i = 0; i < units; ++i)
26560 {
26561 if ((mask & 0x01) == 1)
26562 {
26563 break;
26564 }
26565 mask >>= 1;
26566 }
26567 gcc_assert (i < units);
26568 {
26569 switch (GET_MODE (operands[0]))
26570 {
26571 case V8QImode:
26572 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
26573 break;
26574 case V4HImode:
26575 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
26576 break;
26577 case V2SImode:
26578 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
26579 break;
26580 default:
26581 gcc_unreachable ();
26582 break;
26583 }
26584 output_asm_insn (templ, operands);
26585 }
26586 return "";
26587 }
26588
26589 /* Output a Thumb-1 casesi dispatch sequence. */
26590 const char *
26591 thumb1_output_casesi (rtx *operands)
26592 {
26593 rtx diff_vec = PATTERN (next_active_insn (operands[0]));
26594
26595 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
26596
26597 switch (GET_MODE(diff_vec))
26598 {
26599 case QImode:
26600 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26601 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
26602 case HImode:
26603 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26604 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
26605 case SImode:
26606 return "bl\t%___gnu_thumb1_case_si";
26607 default:
26608 gcc_unreachable ();
26609 }
26610 }
26611
26612 /* Output a Thumb-2 casesi instruction. */
26613 const char *
26614 thumb2_output_casesi (rtx *operands)
26615 {
26616 rtx diff_vec = PATTERN (next_active_insn (operands[2]));
26617
26618 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
26619
26620 output_asm_insn ("cmp\t%0, %1", operands);
26621 output_asm_insn ("bhi\t%l3", operands);
26622 switch (GET_MODE(diff_vec))
26623 {
26624 case QImode:
26625 return "tbb\t[%|pc, %0]";
26626 case HImode:
26627 return "tbh\t[%|pc, %0, lsl #1]";
26628 case SImode:
26629 if (flag_pic)
26630 {
26631 output_asm_insn ("adr\t%4, %l2", operands);
26632 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
26633 output_asm_insn ("add\t%4, %4, %5", operands);
26634 return "bx\t%4";
26635 }
26636 else
26637 {
26638 output_asm_insn ("adr\t%4, %l2", operands);
26639 return "ldr\t%|pc, [%4, %0, lsl #2]";
26640 }
26641 default:
26642 gcc_unreachable ();
26643 }
26644 }
26645
26646 /* Most ARM cores are single issue, but some newer ones can dual issue.
26647 The scheduler descriptions rely on this being correct. */
26648 static int
26649 arm_issue_rate (void)
26650 {
26651 switch (arm_tune)
26652 {
26653 case cortexa15:
26654 return 3;
26655
26656 case cortexr4:
26657 case cortexr4f:
26658 case cortexr5:
26659 case genericv7a:
26660 case cortexa5:
26661 case cortexa7:
26662 case cortexa8:
26663 case cortexa9:
26664 case cortexa53:
26665 case fa726te:
26666 case marvell_pj4:
26667 return 2;
26668
26669 default:
26670 return 1;
26671 }
26672 }
26673
26674 /* A table and a function to perform ARM-specific name mangling for
26675 NEON vector types in order to conform to the AAPCS (see "Procedure
26676 Call Standard for the ARM Architecture", Appendix A). To qualify
26677 for emission with the mangled names defined in that document, a
26678 vector type must not only be of the correct mode but also be
26679 composed of NEON vector element types (e.g. __builtin_neon_qi). */
26680 typedef struct
26681 {
26682 enum machine_mode mode;
26683 const char *element_type_name;
26684 const char *aapcs_name;
26685 } arm_mangle_map_entry;
26686
26687 static arm_mangle_map_entry arm_mangle_map[] = {
26688 /* 64-bit containerized types. */
26689 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
26690 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
26691 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
26692 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
26693 { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
26694 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
26695 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
26696 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
26697 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
26698 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
26699 /* 128-bit containerized types. */
26700 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
26701 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
26702 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
26703 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
26704 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
26705 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
26706 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
26707 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
26708 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
26709 { VOIDmode, NULL, NULL }
26710 };
26711
26712 const char *
26713 arm_mangle_type (const_tree type)
26714 {
26715 arm_mangle_map_entry *pos = arm_mangle_map;
26716
26717 /* The ARM ABI documents (10th October 2008) say that "__va_list"
26718 has to be managled as if it is in the "std" namespace. */
26719 if (TARGET_AAPCS_BASED
26720 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
26721 return "St9__va_list";
26722
26723 /* Half-precision float. */
26724 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
26725 return "Dh";
26726
26727 if (TREE_CODE (type) != VECTOR_TYPE)
26728 return NULL;
26729
26730 /* Check the mode of the vector type, and the name of the vector
26731 element type, against the table. */
26732 while (pos->mode != VOIDmode)
26733 {
26734 tree elt_type = TREE_TYPE (type);
26735
26736 if (pos->mode == TYPE_MODE (type)
26737 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
26738 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
26739 pos->element_type_name))
26740 return pos->aapcs_name;
26741
26742 pos++;
26743 }
26744
26745 /* Use the default mangling for unrecognized (possibly user-defined)
26746 vector types. */
26747 return NULL;
26748 }
26749
26750 /* Order of allocation of core registers for Thumb: this allocation is
26751 written over the corresponding initial entries of the array
26752 initialized with REG_ALLOC_ORDER. We allocate all low registers
26753 first. Saving and restoring a low register is usually cheaper than
26754 using a call-clobbered high register. */
26755
26756 static const int thumb_core_reg_alloc_order[] =
26757 {
26758 3, 2, 1, 0, 4, 5, 6, 7,
26759 14, 12, 8, 9, 10, 11
26760 };
26761
26762 /* Adjust register allocation order when compiling for Thumb. */
26763
26764 void
26765 arm_order_regs_for_local_alloc (void)
26766 {
26767 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
26768 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
26769 if (TARGET_THUMB)
26770 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
26771 sizeof (thumb_core_reg_alloc_order));
26772 }
26773
26774 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
26775
26776 bool
26777 arm_frame_pointer_required (void)
26778 {
26779 return (cfun->has_nonlocal_label
26780 || SUBTARGET_FRAME_POINTER_REQUIRED
26781 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
26782 }
26783
26784 /* Only thumb1 can't support conditional execution, so return true if
26785 the target is not thumb1. */
26786 static bool
26787 arm_have_conditional_execution (void)
26788 {
26789 return !TARGET_THUMB1;
26790 }
26791
26792 tree
26793 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
26794 {
26795 enum machine_mode in_mode, out_mode;
26796 int in_n, out_n;
26797
26798 if (TREE_CODE (type_out) != VECTOR_TYPE
26799 || TREE_CODE (type_in) != VECTOR_TYPE
26800 || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
26801 return NULL_TREE;
26802
26803 out_mode = TYPE_MODE (TREE_TYPE (type_out));
26804 out_n = TYPE_VECTOR_SUBPARTS (type_out);
26805 in_mode = TYPE_MODE (TREE_TYPE (type_in));
26806 in_n = TYPE_VECTOR_SUBPARTS (type_in);
26807
26808 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
26809 decl of the vectorized builtin for the appropriate vector mode.
26810 NULL_TREE is returned if no such builtin is available. */
26811 #undef ARM_CHECK_BUILTIN_MODE
26812 #define ARM_CHECK_BUILTIN_MODE(C) \
26813 (out_mode == SFmode && out_n == C \
26814 && in_mode == SFmode && in_n == C)
26815
26816 #undef ARM_FIND_VRINT_VARIANT
26817 #define ARM_FIND_VRINT_VARIANT(N) \
26818 (ARM_CHECK_BUILTIN_MODE (2) \
26819 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
26820 : (ARM_CHECK_BUILTIN_MODE (4) \
26821 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
26822 : NULL_TREE))
26823
26824 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
26825 {
26826 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
26827 switch (fn)
26828 {
26829 case BUILT_IN_FLOORF:
26830 return ARM_FIND_VRINT_VARIANT (vrintm);
26831 case BUILT_IN_CEILF:
26832 return ARM_FIND_VRINT_VARIANT (vrintp);
26833 case BUILT_IN_TRUNCF:
26834 return ARM_FIND_VRINT_VARIANT (vrintz);
26835 case BUILT_IN_ROUNDF:
26836 return ARM_FIND_VRINT_VARIANT (vrinta);
26837 default:
26838 return NULL_TREE;
26839 }
26840 }
26841 return NULL_TREE;
26842 }
26843 #undef ARM_CHECK_BUILTIN_MODE
26844 #undef ARM_FIND_VRINT_VARIANT
26845
26846 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
26847 static HOST_WIDE_INT
26848 arm_vector_alignment (const_tree type)
26849 {
26850 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
26851
26852 if (TARGET_AAPCS_BASED)
26853 align = MIN (align, 64);
26854
26855 return align;
26856 }
26857
26858 static unsigned int
26859 arm_autovectorize_vector_sizes (void)
26860 {
26861 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
26862 }
26863
26864 static bool
26865 arm_vector_alignment_reachable (const_tree type, bool is_packed)
26866 {
26867 /* Vectors which aren't in packed structures will not be less aligned than
26868 the natural alignment of their element type, so this is safe. */
26869 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
26870 return !is_packed;
26871
26872 return default_builtin_vector_alignment_reachable (type, is_packed);
26873 }
26874
26875 static bool
26876 arm_builtin_support_vector_misalignment (enum machine_mode mode,
26877 const_tree type, int misalignment,
26878 bool is_packed)
26879 {
26880 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
26881 {
26882 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
26883
26884 if (is_packed)
26885 return align == 1;
26886
26887 /* If the misalignment is unknown, we should be able to handle the access
26888 so long as it is not to a member of a packed data structure. */
26889 if (misalignment == -1)
26890 return true;
26891
26892 /* Return true if the misalignment is a multiple of the natural alignment
26893 of the vector's element type. This is probably always going to be
26894 true in practice, since we've already established that this isn't a
26895 packed access. */
26896 return ((misalignment % align) == 0);
26897 }
26898
26899 return default_builtin_support_vector_misalignment (mode, type, misalignment,
26900 is_packed);
26901 }
26902
26903 static void
26904 arm_conditional_register_usage (void)
26905 {
26906 int regno;
26907
26908 if (TARGET_THUMB1 && optimize_size)
26909 {
26910 /* When optimizing for size on Thumb-1, it's better not
26911 to use the HI regs, because of the overhead of
26912 stacking them. */
26913 for (regno = FIRST_HI_REGNUM;
26914 regno <= LAST_HI_REGNUM; ++regno)
26915 fixed_regs[regno] = call_used_regs[regno] = 1;
26916 }
26917
26918 /* The link register can be clobbered by any branch insn,
26919 but we have no way to track that at present, so mark
26920 it as unavailable. */
26921 if (TARGET_THUMB1)
26922 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
26923
26924 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
26925 {
26926 /* VFPv3 registers are disabled when earlier VFP
26927 versions are selected due to the definition of
26928 LAST_VFP_REGNUM. */
26929 for (regno = FIRST_VFP_REGNUM;
26930 regno <= LAST_VFP_REGNUM; ++ regno)
26931 {
26932 fixed_regs[regno] = 0;
26933 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
26934 || regno >= FIRST_VFP_REGNUM + 32;
26935 }
26936 }
26937
26938 if (TARGET_REALLY_IWMMXT)
26939 {
26940 regno = FIRST_IWMMXT_GR_REGNUM;
26941 /* The 2002/10/09 revision of the XScale ABI has wCG0
26942 and wCG1 as call-preserved registers. The 2002/11/21
26943 revision changed this so that all wCG registers are
26944 scratch registers. */
26945 for (regno = FIRST_IWMMXT_GR_REGNUM;
26946 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
26947 fixed_regs[regno] = 0;
26948 /* The XScale ABI has wR0 - wR9 as scratch registers,
26949 the rest as call-preserved registers. */
26950 for (regno = FIRST_IWMMXT_REGNUM;
26951 regno <= LAST_IWMMXT_REGNUM; ++ regno)
26952 {
26953 fixed_regs[regno] = 0;
26954 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
26955 }
26956 }
26957
26958 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
26959 {
26960 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
26961 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
26962 }
26963 else if (TARGET_APCS_STACK)
26964 {
26965 fixed_regs[10] = 1;
26966 call_used_regs[10] = 1;
26967 }
26968 /* -mcaller-super-interworking reserves r11 for calls to
26969 _interwork_r11_call_via_rN(). Making the register global
26970 is an easy way of ensuring that it remains valid for all
26971 calls. */
26972 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
26973 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
26974 {
26975 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
26976 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
26977 if (TARGET_CALLER_INTERWORKING)
26978 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
26979 }
26980 SUBTARGET_CONDITIONAL_REGISTER_USAGE
26981 }
26982
26983 static reg_class_t
26984 arm_preferred_rename_class (reg_class_t rclass)
26985 {
26986 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
26987 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
26988 and code size can be reduced. */
26989 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
26990 return LO_REGS;
26991 else
26992 return NO_REGS;
26993 }
26994
26995 /* Compute the atrribute "length" of insn "*push_multi".
26996 So this function MUST be kept in sync with that insn pattern. */
26997 int
26998 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
26999 {
27000 int i, regno, hi_reg;
27001 int num_saves = XVECLEN (parallel_op, 0);
27002
27003 /* ARM mode. */
27004 if (TARGET_ARM)
27005 return 4;
27006 /* Thumb1 mode. */
27007 if (TARGET_THUMB1)
27008 return 2;
27009
27010 /* Thumb2 mode. */
27011 regno = REGNO (first_op);
27012 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27013 for (i = 1; i < num_saves && !hi_reg; i++)
27014 {
27015 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27016 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27017 }
27018
27019 if (!hi_reg)
27020 return 2;
27021 return 4;
27022 }
27023
27024 /* Compute the number of instructions emitted by output_move_double. */
27025 int
27026 arm_count_output_move_double_insns (rtx *operands)
27027 {
27028 int count;
27029 rtx ops[2];
27030 /* output_move_double may modify the operands array, so call it
27031 here on a copy of the array. */
27032 ops[0] = operands[0];
27033 ops[1] = operands[1];
27034 output_move_double (ops, false, &count);
27035 return count;
27036 }
27037
27038 int
27039 vfp3_const_double_for_fract_bits (rtx operand)
27040 {
27041 REAL_VALUE_TYPE r0;
27042
27043 if (!CONST_DOUBLE_P (operand))
27044 return 0;
27045
27046 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27047 if (exact_real_inverse (DFmode, &r0))
27048 {
27049 if (exact_real_truncate (DFmode, &r0))
27050 {
27051 HOST_WIDE_INT value = real_to_integer (&r0);
27052 value = value & 0xffffffff;
27053 if ((value != 0) && ( (value & (value - 1)) == 0))
27054 return int_log2 (value);
27055 }
27056 }
27057 return 0;
27058 }
27059 \f
27060 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27061
27062 static void
27063 arm_pre_atomic_barrier (enum memmodel model)
27064 {
27065 if (need_atomic_barrier_p (model, true))
27066 emit_insn (gen_memory_barrier ());
27067 }
27068
27069 static void
27070 arm_post_atomic_barrier (enum memmodel model)
27071 {
27072 if (need_atomic_barrier_p (model, false))
27073 emit_insn (gen_memory_barrier ());
27074 }
27075
27076 /* Emit the load-exclusive and store-exclusive instructions.
27077 Use acquire and release versions if necessary. */
27078
27079 static void
27080 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
27081 {
27082 rtx (*gen) (rtx, rtx);
27083
27084 if (acq)
27085 {
27086 switch (mode)
27087 {
27088 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
27089 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
27090 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
27091 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
27092 default:
27093 gcc_unreachable ();
27094 }
27095 }
27096 else
27097 {
27098 switch (mode)
27099 {
27100 case QImode: gen = gen_arm_load_exclusiveqi; break;
27101 case HImode: gen = gen_arm_load_exclusivehi; break;
27102 case SImode: gen = gen_arm_load_exclusivesi; break;
27103 case DImode: gen = gen_arm_load_exclusivedi; break;
27104 default:
27105 gcc_unreachable ();
27106 }
27107 }
27108
27109 emit_insn (gen (rval, mem));
27110 }
27111
27112 static void
27113 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
27114 rtx mem, bool rel)
27115 {
27116 rtx (*gen) (rtx, rtx, rtx);
27117
27118 if (rel)
27119 {
27120 switch (mode)
27121 {
27122 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
27123 case HImode: gen = gen_arm_store_release_exclusivehi; break;
27124 case SImode: gen = gen_arm_store_release_exclusivesi; break;
27125 case DImode: gen = gen_arm_store_release_exclusivedi; break;
27126 default:
27127 gcc_unreachable ();
27128 }
27129 }
27130 else
27131 {
27132 switch (mode)
27133 {
27134 case QImode: gen = gen_arm_store_exclusiveqi; break;
27135 case HImode: gen = gen_arm_store_exclusivehi; break;
27136 case SImode: gen = gen_arm_store_exclusivesi; break;
27137 case DImode: gen = gen_arm_store_exclusivedi; break;
27138 default:
27139 gcc_unreachable ();
27140 }
27141 }
27142
27143 emit_insn (gen (bval, rval, mem));
27144 }
27145
27146 /* Mark the previous jump instruction as unlikely. */
27147
27148 static void
27149 emit_unlikely_jump (rtx insn)
27150 {
27151 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
27152
27153 insn = emit_jump_insn (insn);
27154 add_reg_note (insn, REG_BR_PROB, very_unlikely);
27155 }
27156
27157 /* Expand a compare and swap pattern. */
27158
27159 void
27160 arm_expand_compare_and_swap (rtx operands[])
27161 {
27162 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
27163 enum machine_mode mode;
27164 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
27165
27166 bval = operands[0];
27167 rval = operands[1];
27168 mem = operands[2];
27169 oldval = operands[3];
27170 newval = operands[4];
27171 is_weak = operands[5];
27172 mod_s = operands[6];
27173 mod_f = operands[7];
27174 mode = GET_MODE (mem);
27175
27176 /* Normally the succ memory model must be stronger than fail, but in the
27177 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27178 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27179
27180 if (TARGET_HAVE_LDACQ
27181 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
27182 && INTVAL (mod_s) == MEMMODEL_RELEASE)
27183 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
27184
27185 switch (mode)
27186 {
27187 case QImode:
27188 case HImode:
27189 /* For narrow modes, we're going to perform the comparison in SImode,
27190 so do the zero-extension now. */
27191 rval = gen_reg_rtx (SImode);
27192 oldval = convert_modes (SImode, mode, oldval, true);
27193 /* FALLTHRU */
27194
27195 case SImode:
27196 /* Force the value into a register if needed. We waited until after
27197 the zero-extension above to do this properly. */
27198 if (!arm_add_operand (oldval, SImode))
27199 oldval = force_reg (SImode, oldval);
27200 break;
27201
27202 case DImode:
27203 if (!cmpdi_operand (oldval, mode))
27204 oldval = force_reg (mode, oldval);
27205 break;
27206
27207 default:
27208 gcc_unreachable ();
27209 }
27210
27211 switch (mode)
27212 {
27213 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
27214 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27215 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27216 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27217 default:
27218 gcc_unreachable ();
27219 }
27220
27221 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27222
27223 if (mode == QImode || mode == HImode)
27224 emit_move_insn (operands[1], gen_lowpart (mode, rval));
27225
27226 /* In all cases, we arrange for success to be signaled by Z set.
27227 This arrangement allows for the boolean result to be used directly
27228 in a subsequent branch, post optimization. */
27229 x = gen_rtx_REG (CCmode, CC_REGNUM);
27230 x = gen_rtx_EQ (SImode, x, const0_rtx);
27231 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
27232 }
27233
27234 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27235 another memory store between the load-exclusive and store-exclusive can
27236 reset the monitor from Exclusive to Open state. This means we must wait
27237 until after reload to split the pattern, lest we get a register spill in
27238 the middle of the atomic sequence. */
27239
27240 void
27241 arm_split_compare_and_swap (rtx operands[])
27242 {
27243 rtx rval, mem, oldval, newval, scratch;
27244 enum machine_mode mode;
27245 enum memmodel mod_s, mod_f;
27246 bool is_weak;
27247 rtx label1, label2, x, cond;
27248
27249 rval = operands[0];
27250 mem = operands[1];
27251 oldval = operands[2];
27252 newval = operands[3];
27253 is_weak = (operands[4] != const0_rtx);
27254 mod_s = (enum memmodel) INTVAL (operands[5]);
27255 mod_f = (enum memmodel) INTVAL (operands[6]);
27256 scratch = operands[7];
27257 mode = GET_MODE (mem);
27258
27259 bool use_acquire = TARGET_HAVE_LDACQ
27260 && !(mod_s == MEMMODEL_RELAXED
27261 || mod_s == MEMMODEL_CONSUME
27262 || mod_s == MEMMODEL_RELEASE);
27263
27264 bool use_release = TARGET_HAVE_LDACQ
27265 && !(mod_s == MEMMODEL_RELAXED
27266 || mod_s == MEMMODEL_CONSUME
27267 || mod_s == MEMMODEL_ACQUIRE);
27268
27269 /* Checks whether a barrier is needed and emits one accordingly. */
27270 if (!(use_acquire || use_release))
27271 arm_pre_atomic_barrier (mod_s);
27272
27273 label1 = NULL_RTX;
27274 if (!is_weak)
27275 {
27276 label1 = gen_label_rtx ();
27277 emit_label (label1);
27278 }
27279 label2 = gen_label_rtx ();
27280
27281 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
27282
27283 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
27284 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27285 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27286 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
27287 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27288
27289 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
27290
27291 /* Weak or strong, we want EQ to be true for success, so that we
27292 match the flags that we got from the compare above. */
27293 cond = gen_rtx_REG (CCmode, CC_REGNUM);
27294 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
27295 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
27296
27297 if (!is_weak)
27298 {
27299 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27300 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27301 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
27302 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27303 }
27304
27305 if (mod_f != MEMMODEL_RELAXED)
27306 emit_label (label2);
27307
27308 /* Checks whether a barrier is needed and emits one accordingly. */
27309 if (!(use_acquire || use_release))
27310 arm_post_atomic_barrier (mod_s);
27311
27312 if (mod_f == MEMMODEL_RELAXED)
27313 emit_label (label2);
27314 }
27315
27316 void
27317 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
27318 rtx value, rtx model_rtx, rtx cond)
27319 {
27320 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
27321 enum machine_mode mode = GET_MODE (mem);
27322 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
27323 rtx label, x;
27324
27325 bool use_acquire = TARGET_HAVE_LDACQ
27326 && !(model == MEMMODEL_RELAXED
27327 || model == MEMMODEL_CONSUME
27328 || model == MEMMODEL_RELEASE);
27329
27330 bool use_release = TARGET_HAVE_LDACQ
27331 && !(model == MEMMODEL_RELAXED
27332 || model == MEMMODEL_CONSUME
27333 || model == MEMMODEL_ACQUIRE);
27334
27335 /* Checks whether a barrier is needed and emits one accordingly. */
27336 if (!(use_acquire || use_release))
27337 arm_pre_atomic_barrier (model);
27338
27339 label = gen_label_rtx ();
27340 emit_label (label);
27341
27342 if (new_out)
27343 new_out = gen_lowpart (wmode, new_out);
27344 if (old_out)
27345 old_out = gen_lowpart (wmode, old_out);
27346 else
27347 old_out = new_out;
27348 value = simplify_gen_subreg (wmode, value, mode, 0);
27349
27350 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
27351
27352 switch (code)
27353 {
27354 case SET:
27355 new_out = value;
27356 break;
27357
27358 case NOT:
27359 x = gen_rtx_AND (wmode, old_out, value);
27360 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27361 x = gen_rtx_NOT (wmode, new_out);
27362 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27363 break;
27364
27365 case MINUS:
27366 if (CONST_INT_P (value))
27367 {
27368 value = GEN_INT (-INTVAL (value));
27369 code = PLUS;
27370 }
27371 /* FALLTHRU */
27372
27373 case PLUS:
27374 if (mode == DImode)
27375 {
27376 /* DImode plus/minus need to clobber flags. */
27377 /* The adddi3 and subdi3 patterns are incorrectly written so that
27378 they require matching operands, even when we could easily support
27379 three operands. Thankfully, this can be fixed up post-splitting,
27380 as the individual add+adc patterns do accept three operands and
27381 post-reload cprop can make these moves go away. */
27382 emit_move_insn (new_out, old_out);
27383 if (code == PLUS)
27384 x = gen_adddi3 (new_out, new_out, value);
27385 else
27386 x = gen_subdi3 (new_out, new_out, value);
27387 emit_insn (x);
27388 break;
27389 }
27390 /* FALLTHRU */
27391
27392 default:
27393 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
27394 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27395 break;
27396 }
27397
27398 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
27399 use_release);
27400
27401 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27402 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
27403
27404 /* Checks whether a barrier is needed and emits one accordingly. */
27405 if (!(use_acquire || use_release))
27406 arm_post_atomic_barrier (model);
27407 }
27408 \f
27409 #define MAX_VECT_LEN 16
27410
27411 struct expand_vec_perm_d
27412 {
27413 rtx target, op0, op1;
27414 unsigned char perm[MAX_VECT_LEN];
27415 enum machine_mode vmode;
27416 unsigned char nelt;
27417 bool one_vector_p;
27418 bool testing_p;
27419 };
27420
27421 /* Generate a variable permutation. */
27422
27423 static void
27424 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
27425 {
27426 enum machine_mode vmode = GET_MODE (target);
27427 bool one_vector_p = rtx_equal_p (op0, op1);
27428
27429 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
27430 gcc_checking_assert (GET_MODE (op0) == vmode);
27431 gcc_checking_assert (GET_MODE (op1) == vmode);
27432 gcc_checking_assert (GET_MODE (sel) == vmode);
27433 gcc_checking_assert (TARGET_NEON);
27434
27435 if (one_vector_p)
27436 {
27437 if (vmode == V8QImode)
27438 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
27439 else
27440 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
27441 }
27442 else
27443 {
27444 rtx pair;
27445
27446 if (vmode == V8QImode)
27447 {
27448 pair = gen_reg_rtx (V16QImode);
27449 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
27450 pair = gen_lowpart (TImode, pair);
27451 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
27452 }
27453 else
27454 {
27455 pair = gen_reg_rtx (OImode);
27456 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
27457 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
27458 }
27459 }
27460 }
27461
27462 void
27463 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
27464 {
27465 enum machine_mode vmode = GET_MODE (target);
27466 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
27467 bool one_vector_p = rtx_equal_p (op0, op1);
27468 rtx rmask[MAX_VECT_LEN], mask;
27469
27470 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27471 numbering of elements for big-endian, we must reverse the order. */
27472 gcc_checking_assert (!BYTES_BIG_ENDIAN);
27473
27474 /* The VTBL instruction does not use a modulo index, so we must take care
27475 of that ourselves. */
27476 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
27477 for (i = 0; i < nelt; ++i)
27478 rmask[i] = mask;
27479 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
27480 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
27481
27482 arm_expand_vec_perm_1 (target, op0, op1, sel);
27483 }
27484
27485 /* Generate or test for an insn that supports a constant permutation. */
27486
27487 /* Recognize patterns for the VUZP insns. */
27488
27489 static bool
27490 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
27491 {
27492 unsigned int i, odd, mask, nelt = d->nelt;
27493 rtx out0, out1, in0, in1, x;
27494 rtx (*gen)(rtx, rtx, rtx, rtx);
27495
27496 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27497 return false;
27498
27499 /* Note that these are little-endian tests. Adjust for big-endian later. */
27500 if (d->perm[0] == 0)
27501 odd = 0;
27502 else if (d->perm[0] == 1)
27503 odd = 1;
27504 else
27505 return false;
27506 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27507
27508 for (i = 0; i < nelt; i++)
27509 {
27510 unsigned elt = (i * 2 + odd) & mask;
27511 if (d->perm[i] != elt)
27512 return false;
27513 }
27514
27515 /* Success! */
27516 if (d->testing_p)
27517 return true;
27518
27519 switch (d->vmode)
27520 {
27521 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
27522 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
27523 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
27524 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
27525 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
27526 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
27527 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
27528 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
27529 default:
27530 gcc_unreachable ();
27531 }
27532
27533 in0 = d->op0;
27534 in1 = d->op1;
27535 if (BYTES_BIG_ENDIAN)
27536 {
27537 x = in0, in0 = in1, in1 = x;
27538 odd = !odd;
27539 }
27540
27541 out0 = d->target;
27542 out1 = gen_reg_rtx (d->vmode);
27543 if (odd)
27544 x = out0, out0 = out1, out1 = x;
27545
27546 emit_insn (gen (out0, in0, in1, out1));
27547 return true;
27548 }
27549
27550 /* Recognize patterns for the VZIP insns. */
27551
27552 static bool
27553 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
27554 {
27555 unsigned int i, high, mask, nelt = d->nelt;
27556 rtx out0, out1, in0, in1, x;
27557 rtx (*gen)(rtx, rtx, rtx, rtx);
27558
27559 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27560 return false;
27561
27562 /* Note that these are little-endian tests. Adjust for big-endian later. */
27563 high = nelt / 2;
27564 if (d->perm[0] == high)
27565 ;
27566 else if (d->perm[0] == 0)
27567 high = 0;
27568 else
27569 return false;
27570 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27571
27572 for (i = 0; i < nelt / 2; i++)
27573 {
27574 unsigned elt = (i + high) & mask;
27575 if (d->perm[i * 2] != elt)
27576 return false;
27577 elt = (elt + nelt) & mask;
27578 if (d->perm[i * 2 + 1] != elt)
27579 return false;
27580 }
27581
27582 /* Success! */
27583 if (d->testing_p)
27584 return true;
27585
27586 switch (d->vmode)
27587 {
27588 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
27589 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
27590 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
27591 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
27592 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
27593 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
27594 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
27595 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
27596 default:
27597 gcc_unreachable ();
27598 }
27599
27600 in0 = d->op0;
27601 in1 = d->op1;
27602 if (BYTES_BIG_ENDIAN)
27603 {
27604 x = in0, in0 = in1, in1 = x;
27605 high = !high;
27606 }
27607
27608 out0 = d->target;
27609 out1 = gen_reg_rtx (d->vmode);
27610 if (high)
27611 x = out0, out0 = out1, out1 = x;
27612
27613 emit_insn (gen (out0, in0, in1, out1));
27614 return true;
27615 }
27616
27617 /* Recognize patterns for the VREV insns. */
27618
27619 static bool
27620 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
27621 {
27622 unsigned int i, j, diff, nelt = d->nelt;
27623 rtx (*gen)(rtx, rtx, rtx);
27624
27625 if (!d->one_vector_p)
27626 return false;
27627
27628 diff = d->perm[0];
27629 switch (diff)
27630 {
27631 case 7:
27632 switch (d->vmode)
27633 {
27634 case V16QImode: gen = gen_neon_vrev64v16qi; break;
27635 case V8QImode: gen = gen_neon_vrev64v8qi; break;
27636 default:
27637 return false;
27638 }
27639 break;
27640 case 3:
27641 switch (d->vmode)
27642 {
27643 case V16QImode: gen = gen_neon_vrev32v16qi; break;
27644 case V8QImode: gen = gen_neon_vrev32v8qi; break;
27645 case V8HImode: gen = gen_neon_vrev64v8hi; break;
27646 case V4HImode: gen = gen_neon_vrev64v4hi; break;
27647 default:
27648 return false;
27649 }
27650 break;
27651 case 1:
27652 switch (d->vmode)
27653 {
27654 case V16QImode: gen = gen_neon_vrev16v16qi; break;
27655 case V8QImode: gen = gen_neon_vrev16v8qi; break;
27656 case V8HImode: gen = gen_neon_vrev32v8hi; break;
27657 case V4HImode: gen = gen_neon_vrev32v4hi; break;
27658 case V4SImode: gen = gen_neon_vrev64v4si; break;
27659 case V2SImode: gen = gen_neon_vrev64v2si; break;
27660 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
27661 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
27662 default:
27663 return false;
27664 }
27665 break;
27666 default:
27667 return false;
27668 }
27669
27670 for (i = 0; i < nelt ; i += diff + 1)
27671 for (j = 0; j <= diff; j += 1)
27672 {
27673 /* This is guaranteed to be true as the value of diff
27674 is 7, 3, 1 and we should have enough elements in the
27675 queue to generate this. Getting a vector mask with a
27676 value of diff other than these values implies that
27677 something is wrong by the time we get here. */
27678 gcc_assert (i + j < nelt);
27679 if (d->perm[i + j] != i + diff - j)
27680 return false;
27681 }
27682
27683 /* Success! */
27684 if (d->testing_p)
27685 return true;
27686
27687 /* ??? The third operand is an artifact of the builtin infrastructure
27688 and is ignored by the actual instruction. */
27689 emit_insn (gen (d->target, d->op0, const0_rtx));
27690 return true;
27691 }
27692
27693 /* Recognize patterns for the VTRN insns. */
27694
27695 static bool
27696 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
27697 {
27698 unsigned int i, odd, mask, nelt = d->nelt;
27699 rtx out0, out1, in0, in1, x;
27700 rtx (*gen)(rtx, rtx, rtx, rtx);
27701
27702 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27703 return false;
27704
27705 /* Note that these are little-endian tests. Adjust for big-endian later. */
27706 if (d->perm[0] == 0)
27707 odd = 0;
27708 else if (d->perm[0] == 1)
27709 odd = 1;
27710 else
27711 return false;
27712 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27713
27714 for (i = 0; i < nelt; i += 2)
27715 {
27716 if (d->perm[i] != i + odd)
27717 return false;
27718 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
27719 return false;
27720 }
27721
27722 /* Success! */
27723 if (d->testing_p)
27724 return true;
27725
27726 switch (d->vmode)
27727 {
27728 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
27729 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
27730 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
27731 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
27732 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
27733 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
27734 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
27735 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
27736 default:
27737 gcc_unreachable ();
27738 }
27739
27740 in0 = d->op0;
27741 in1 = d->op1;
27742 if (BYTES_BIG_ENDIAN)
27743 {
27744 x = in0, in0 = in1, in1 = x;
27745 odd = !odd;
27746 }
27747
27748 out0 = d->target;
27749 out1 = gen_reg_rtx (d->vmode);
27750 if (odd)
27751 x = out0, out0 = out1, out1 = x;
27752
27753 emit_insn (gen (out0, in0, in1, out1));
27754 return true;
27755 }
27756
27757 /* Recognize patterns for the VEXT insns. */
27758
27759 static bool
27760 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
27761 {
27762 unsigned int i, nelt = d->nelt;
27763 rtx (*gen) (rtx, rtx, rtx, rtx);
27764 rtx offset;
27765
27766 unsigned int location;
27767
27768 unsigned int next = d->perm[0] + 1;
27769
27770 /* TODO: Handle GCC's numbering of elements for big-endian. */
27771 if (BYTES_BIG_ENDIAN)
27772 return false;
27773
27774 /* Check if the extracted indexes are increasing by one. */
27775 for (i = 1; i < nelt; next++, i++)
27776 {
27777 /* If we hit the most significant element of the 2nd vector in
27778 the previous iteration, no need to test further. */
27779 if (next == 2 * nelt)
27780 return false;
27781
27782 /* If we are operating on only one vector: it could be a
27783 rotation. If there are only two elements of size < 64, let
27784 arm_evpc_neon_vrev catch it. */
27785 if (d->one_vector_p && (next == nelt))
27786 {
27787 if ((nelt == 2) && (d->vmode != V2DImode))
27788 return false;
27789 else
27790 next = 0;
27791 }
27792
27793 if (d->perm[i] != next)
27794 return false;
27795 }
27796
27797 location = d->perm[0];
27798
27799 switch (d->vmode)
27800 {
27801 case V16QImode: gen = gen_neon_vextv16qi; break;
27802 case V8QImode: gen = gen_neon_vextv8qi; break;
27803 case V4HImode: gen = gen_neon_vextv4hi; break;
27804 case V8HImode: gen = gen_neon_vextv8hi; break;
27805 case V2SImode: gen = gen_neon_vextv2si; break;
27806 case V4SImode: gen = gen_neon_vextv4si; break;
27807 case V2SFmode: gen = gen_neon_vextv2sf; break;
27808 case V4SFmode: gen = gen_neon_vextv4sf; break;
27809 case V2DImode: gen = gen_neon_vextv2di; break;
27810 default:
27811 return false;
27812 }
27813
27814 /* Success! */
27815 if (d->testing_p)
27816 return true;
27817
27818 offset = GEN_INT (location);
27819 emit_insn (gen (d->target, d->op0, d->op1, offset));
27820 return true;
27821 }
27822
27823 /* The NEON VTBL instruction is a fully variable permuation that's even
27824 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
27825 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
27826 can do slightly better by expanding this as a constant where we don't
27827 have to apply a mask. */
27828
27829 static bool
27830 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
27831 {
27832 rtx rperm[MAX_VECT_LEN], sel;
27833 enum machine_mode vmode = d->vmode;
27834 unsigned int i, nelt = d->nelt;
27835
27836 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27837 numbering of elements for big-endian, we must reverse the order. */
27838 if (BYTES_BIG_ENDIAN)
27839 return false;
27840
27841 if (d->testing_p)
27842 return true;
27843
27844 /* Generic code will try constant permutation twice. Once with the
27845 original mode and again with the elements lowered to QImode.
27846 So wait and don't do the selector expansion ourselves. */
27847 if (vmode != V8QImode && vmode != V16QImode)
27848 return false;
27849
27850 for (i = 0; i < nelt; ++i)
27851 rperm[i] = GEN_INT (d->perm[i]);
27852 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
27853 sel = force_reg (vmode, sel);
27854
27855 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
27856 return true;
27857 }
27858
27859 static bool
27860 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
27861 {
27862 /* Check if the input mask matches vext before reordering the
27863 operands. */
27864 if (TARGET_NEON)
27865 if (arm_evpc_neon_vext (d))
27866 return true;
27867
27868 /* The pattern matching functions above are written to look for a small
27869 number to begin the sequence (0, 1, N/2). If we begin with an index
27870 from the second operand, we can swap the operands. */
27871 if (d->perm[0] >= d->nelt)
27872 {
27873 unsigned i, nelt = d->nelt;
27874 rtx x;
27875
27876 for (i = 0; i < nelt; ++i)
27877 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
27878
27879 x = d->op0;
27880 d->op0 = d->op1;
27881 d->op1 = x;
27882 }
27883
27884 if (TARGET_NEON)
27885 {
27886 if (arm_evpc_neon_vuzp (d))
27887 return true;
27888 if (arm_evpc_neon_vzip (d))
27889 return true;
27890 if (arm_evpc_neon_vrev (d))
27891 return true;
27892 if (arm_evpc_neon_vtrn (d))
27893 return true;
27894 return arm_evpc_neon_vtbl (d);
27895 }
27896 return false;
27897 }
27898
27899 /* Expand a vec_perm_const pattern. */
27900
27901 bool
27902 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
27903 {
27904 struct expand_vec_perm_d d;
27905 int i, nelt, which;
27906
27907 d.target = target;
27908 d.op0 = op0;
27909 d.op1 = op1;
27910
27911 d.vmode = GET_MODE (target);
27912 gcc_assert (VECTOR_MODE_P (d.vmode));
27913 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
27914 d.testing_p = false;
27915
27916 for (i = which = 0; i < nelt; ++i)
27917 {
27918 rtx e = XVECEXP (sel, 0, i);
27919 int ei = INTVAL (e) & (2 * nelt - 1);
27920 which |= (ei < nelt ? 1 : 2);
27921 d.perm[i] = ei;
27922 }
27923
27924 switch (which)
27925 {
27926 default:
27927 gcc_unreachable();
27928
27929 case 3:
27930 d.one_vector_p = false;
27931 if (!rtx_equal_p (op0, op1))
27932 break;
27933
27934 /* The elements of PERM do not suggest that only the first operand
27935 is used, but both operands are identical. Allow easier matching
27936 of the permutation by folding the permutation into the single
27937 input vector. */
27938 /* FALLTHRU */
27939 case 2:
27940 for (i = 0; i < nelt; ++i)
27941 d.perm[i] &= nelt - 1;
27942 d.op0 = op1;
27943 d.one_vector_p = true;
27944 break;
27945
27946 case 1:
27947 d.op1 = op0;
27948 d.one_vector_p = true;
27949 break;
27950 }
27951
27952 return arm_expand_vec_perm_const_1 (&d);
27953 }
27954
27955 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
27956
27957 static bool
27958 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
27959 const unsigned char *sel)
27960 {
27961 struct expand_vec_perm_d d;
27962 unsigned int i, nelt, which;
27963 bool ret;
27964
27965 d.vmode = vmode;
27966 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
27967 d.testing_p = true;
27968 memcpy (d.perm, sel, nelt);
27969
27970 /* Categorize the set of elements in the selector. */
27971 for (i = which = 0; i < nelt; ++i)
27972 {
27973 unsigned char e = d.perm[i];
27974 gcc_assert (e < 2 * nelt);
27975 which |= (e < nelt ? 1 : 2);
27976 }
27977
27978 /* For all elements from second vector, fold the elements to first. */
27979 if (which == 2)
27980 for (i = 0; i < nelt; ++i)
27981 d.perm[i] -= nelt;
27982
27983 /* Check whether the mask can be applied to the vector type. */
27984 d.one_vector_p = (which != 3);
27985
27986 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
27987 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
27988 if (!d.one_vector_p)
27989 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
27990
27991 start_sequence ();
27992 ret = arm_expand_vec_perm_const_1 (&d);
27993 end_sequence ();
27994
27995 return ret;
27996 }
27997
27998 bool
27999 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
28000 {
28001 /* If we are soft float and we do not have ldrd
28002 then all auto increment forms are ok. */
28003 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
28004 return true;
28005
28006 switch (code)
28007 {
28008 /* Post increment and Pre Decrement are supported for all
28009 instruction forms except for vector forms. */
28010 case ARM_POST_INC:
28011 case ARM_PRE_DEC:
28012 if (VECTOR_MODE_P (mode))
28013 {
28014 if (code != ARM_PRE_DEC)
28015 return true;
28016 else
28017 return false;
28018 }
28019
28020 return true;
28021
28022 case ARM_POST_DEC:
28023 case ARM_PRE_INC:
28024 /* Without LDRD and mode size greater than
28025 word size, there is no point in auto-incrementing
28026 because ldm and stm will not have these forms. */
28027 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
28028 return false;
28029
28030 /* Vector and floating point modes do not support
28031 these auto increment forms. */
28032 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
28033 return false;
28034
28035 return true;
28036
28037 default:
28038 return false;
28039
28040 }
28041
28042 return false;
28043 }
28044
28045 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28046 on ARM, since we know that shifts by negative amounts are no-ops.
28047 Additionally, the default expansion code is not available or suitable
28048 for post-reload insn splits (this can occur when the register allocator
28049 chooses not to do a shift in NEON).
28050
28051 This function is used in both initial expand and post-reload splits, and
28052 handles all kinds of 64-bit shifts.
28053
28054 Input requirements:
28055 - It is safe for the input and output to be the same register, but
28056 early-clobber rules apply for the shift amount and scratch registers.
28057 - Shift by register requires both scratch registers. In all other cases
28058 the scratch registers may be NULL.
28059 - Ashiftrt by a register also clobbers the CC register. */
28060 void
28061 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
28062 rtx amount, rtx scratch1, rtx scratch2)
28063 {
28064 rtx out_high = gen_highpart (SImode, out);
28065 rtx out_low = gen_lowpart (SImode, out);
28066 rtx in_high = gen_highpart (SImode, in);
28067 rtx in_low = gen_lowpart (SImode, in);
28068
28069 /* Terminology:
28070 in = the register pair containing the input value.
28071 out = the destination register pair.
28072 up = the high- or low-part of each pair.
28073 down = the opposite part to "up".
28074 In a shift, we can consider bits to shift from "up"-stream to
28075 "down"-stream, so in a left-shift "up" is the low-part and "down"
28076 is the high-part of each register pair. */
28077
28078 rtx out_up = code == ASHIFT ? out_low : out_high;
28079 rtx out_down = code == ASHIFT ? out_high : out_low;
28080 rtx in_up = code == ASHIFT ? in_low : in_high;
28081 rtx in_down = code == ASHIFT ? in_high : in_low;
28082
28083 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
28084 gcc_assert (out
28085 && (REG_P (out) || GET_CODE (out) == SUBREG)
28086 && GET_MODE (out) == DImode);
28087 gcc_assert (in
28088 && (REG_P (in) || GET_CODE (in) == SUBREG)
28089 && GET_MODE (in) == DImode);
28090 gcc_assert (amount
28091 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28092 && GET_MODE (amount) == SImode)
28093 || CONST_INT_P (amount)));
28094 gcc_assert (scratch1 == NULL
28095 || (GET_CODE (scratch1) == SCRATCH)
28096 || (GET_MODE (scratch1) == SImode
28097 && REG_P (scratch1)));
28098 gcc_assert (scratch2 == NULL
28099 || (GET_CODE (scratch2) == SCRATCH)
28100 || (GET_MODE (scratch2) == SImode
28101 && REG_P (scratch2)));
28102 gcc_assert (!REG_P (out) || !REG_P (amount)
28103 || !HARD_REGISTER_P (out)
28104 || (REGNO (out) != REGNO (amount)
28105 && REGNO (out) + 1 != REGNO (amount)));
28106
28107 /* Macros to make following code more readable. */
28108 #define SUB_32(DEST,SRC) \
28109 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28110 #define RSB_32(DEST,SRC) \
28111 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28112 #define SUB_S_32(DEST,SRC) \
28113 gen_addsi3_compare0 ((DEST), (SRC), \
28114 GEN_INT (-32))
28115 #define SET(DEST,SRC) \
28116 gen_rtx_SET (SImode, (DEST), (SRC))
28117 #define SHIFT(CODE,SRC,AMOUNT) \
28118 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28119 #define LSHIFT(CODE,SRC,AMOUNT) \
28120 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28121 SImode, (SRC), (AMOUNT))
28122 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28123 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28124 SImode, (SRC), (AMOUNT))
28125 #define ORR(A,B) \
28126 gen_rtx_IOR (SImode, (A), (B))
28127 #define BRANCH(COND,LABEL) \
28128 gen_arm_cond_branch ((LABEL), \
28129 gen_rtx_ ## COND (CCmode, cc_reg, \
28130 const0_rtx), \
28131 cc_reg)
28132
28133 /* Shifts by register and shifts by constant are handled separately. */
28134 if (CONST_INT_P (amount))
28135 {
28136 /* We have a shift-by-constant. */
28137
28138 /* First, handle out-of-range shift amounts.
28139 In both cases we try to match the result an ARM instruction in a
28140 shift-by-register would give. This helps reduce execution
28141 differences between optimization levels, but it won't stop other
28142 parts of the compiler doing different things. This is "undefined
28143 behaviour, in any case. */
28144 if (INTVAL (amount) <= 0)
28145 emit_insn (gen_movdi (out, in));
28146 else if (INTVAL (amount) >= 64)
28147 {
28148 if (code == ASHIFTRT)
28149 {
28150 rtx const31_rtx = GEN_INT (31);
28151 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28152 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28153 }
28154 else
28155 emit_insn (gen_movdi (out, const0_rtx));
28156 }
28157
28158 /* Now handle valid shifts. */
28159 else if (INTVAL (amount) < 32)
28160 {
28161 /* Shifts by a constant less than 32. */
28162 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28163
28164 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28165 emit_insn (SET (out_down,
28166 ORR (REV_LSHIFT (code, in_up, reverse_amount),
28167 out_down)));
28168 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28169 }
28170 else
28171 {
28172 /* Shifts by a constant greater than 31. */
28173 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28174
28175 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28176 if (code == ASHIFTRT)
28177 emit_insn (gen_ashrsi3 (out_up, in_up,
28178 GEN_INT (31)));
28179 else
28180 emit_insn (SET (out_up, const0_rtx));
28181 }
28182 }
28183 else
28184 {
28185 /* We have a shift-by-register. */
28186 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28187
28188 /* This alternative requires the scratch registers. */
28189 gcc_assert (scratch1 && REG_P (scratch1));
28190 gcc_assert (scratch2 && REG_P (scratch2));
28191
28192 /* We will need the values "amount-32" and "32-amount" later.
28193 Swapping them around now allows the later code to be more general. */
28194 switch (code)
28195 {
28196 case ASHIFT:
28197 emit_insn (SUB_32 (scratch1, amount));
28198 emit_insn (RSB_32 (scratch2, amount));
28199 break;
28200 case ASHIFTRT:
28201 emit_insn (RSB_32 (scratch1, amount));
28202 /* Also set CC = amount > 32. */
28203 emit_insn (SUB_S_32 (scratch2, amount));
28204 break;
28205 case LSHIFTRT:
28206 emit_insn (RSB_32 (scratch1, amount));
28207 emit_insn (SUB_32 (scratch2, amount));
28208 break;
28209 default:
28210 gcc_unreachable ();
28211 }
28212
28213 /* Emit code like this:
28214
28215 arithmetic-left:
28216 out_down = in_down << amount;
28217 out_down = (in_up << (amount - 32)) | out_down;
28218 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28219 out_up = in_up << amount;
28220
28221 arithmetic-right:
28222 out_down = in_down >> amount;
28223 out_down = (in_up << (32 - amount)) | out_down;
28224 if (amount < 32)
28225 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28226 out_up = in_up << amount;
28227
28228 logical-right:
28229 out_down = in_down >> amount;
28230 out_down = (in_up << (32 - amount)) | out_down;
28231 if (amount < 32)
28232 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28233 out_up = in_up << amount;
28234
28235 The ARM and Thumb2 variants are the same but implemented slightly
28236 differently. If this were only called during expand we could just
28237 use the Thumb2 case and let combine do the right thing, but this
28238 can also be called from post-reload splitters. */
28239
28240 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28241
28242 if (!TARGET_THUMB2)
28243 {
28244 /* Emit code for ARM mode. */
28245 emit_insn (SET (out_down,
28246 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28247 if (code == ASHIFTRT)
28248 {
28249 rtx done_label = gen_label_rtx ();
28250 emit_jump_insn (BRANCH (LT, done_label));
28251 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28252 out_down)));
28253 emit_label (done_label);
28254 }
28255 else
28256 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28257 out_down)));
28258 }
28259 else
28260 {
28261 /* Emit code for Thumb2 mode.
28262 Thumb2 can't do shift and or in one insn. */
28263 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28264 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28265
28266 if (code == ASHIFTRT)
28267 {
28268 rtx done_label = gen_label_rtx ();
28269 emit_jump_insn (BRANCH (LT, done_label));
28270 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
28271 emit_insn (SET (out_down, ORR (out_down, scratch2)));
28272 emit_label (done_label);
28273 }
28274 else
28275 {
28276 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
28277 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
28278 }
28279 }
28280
28281 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28282 }
28283
28284 #undef SUB_32
28285 #undef RSB_32
28286 #undef SUB_S_32
28287 #undef SET
28288 #undef SHIFT
28289 #undef LSHIFT
28290 #undef REV_LSHIFT
28291 #undef ORR
28292 #undef BRANCH
28293 }
28294
28295
28296 /* Returns true if a valid comparison operation and makes
28297 the operands in a form that is valid. */
28298 bool
28299 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
28300 {
28301 enum rtx_code code = GET_CODE (*comparison);
28302 int code_int;
28303 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
28304 ? GET_MODE (*op2) : GET_MODE (*op1);
28305
28306 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
28307
28308 if (code == UNEQ || code == LTGT)
28309 return false;
28310
28311 code_int = (int)code;
28312 arm_canonicalize_comparison (&code_int, op1, op2, 0);
28313 PUT_CODE (*comparison, (enum rtx_code)code_int);
28314
28315 switch (mode)
28316 {
28317 case SImode:
28318 if (!arm_add_operand (*op1, mode))
28319 *op1 = force_reg (mode, *op1);
28320 if (!arm_add_operand (*op2, mode))
28321 *op2 = force_reg (mode, *op2);
28322 return true;
28323
28324 case DImode:
28325 if (!cmpdi_operand (*op1, mode))
28326 *op1 = force_reg (mode, *op1);
28327 if (!cmpdi_operand (*op2, mode))
28328 *op2 = force_reg (mode, *op2);
28329 return true;
28330
28331 case SFmode:
28332 case DFmode:
28333 if (!arm_float_compare_operand (*op1, mode))
28334 *op1 = force_reg (mode, *op1);
28335 if (!arm_float_compare_operand (*op2, mode))
28336 *op2 = force_reg (mode, *op2);
28337 return true;
28338 default:
28339 break;
28340 }
28341
28342 return false;
28343
28344 }
28345
28346 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
28347
28348 static unsigned HOST_WIDE_INT
28349 arm_asan_shadow_offset (void)
28350 {
28351 return (unsigned HOST_WIDE_INT) 1 << 29;
28352 }
28353
28354 #include "gt-arm.h"