re PR target/54051 (Invalid alignment specifier generated for vld3_lane_* and vld3_du...
[gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8
9 This file is part of GCC.
10
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
15
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "flags.h"
39 #include "reload.h"
40 #include "function.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "diagnostic-core.h"
44 #include "recog.h"
45 #include "cgraph.h"
46 #include "ggc.h"
47 #include "except.h"
48 #include "tm_p.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "debug.h"
52 #include "langhooks.h"
53 #include "df.h"
54 #include "intl.h"
55 #include "libfuncs.h"
56 #include "params.h"
57 #include "opts.h"
58 #include "dumpfile.h"
59
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode;
62 typedef struct minipool_fixup Mfix;
63
64 void (*arm_lang_output_object_attributes_hook)(void);
65
66 struct four_ints
67 {
68 int i[4];
69 };
70
71 /* Forward function declarations. */
72 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
73 static int arm_compute_static_chain_stack_bytes (void);
74 static arm_stack_offsets *arm_get_frame_offsets (void);
75 static void arm_add_gc_roots (void);
76 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
77 HOST_WIDE_INT, rtx, rtx, int, int);
78 static unsigned bit_count (unsigned long);
79 static int arm_address_register_rtx_p (rtx, int);
80 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
81 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
82 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
83 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
84 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
85 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
86 inline static int thumb1_index_register_rtx_p (rtx, int);
87 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
88 static int thumb_far_jump_used_p (void);
89 static bool thumb_force_lr_save (void);
90 static unsigned arm_size_return_regs (void);
91 static bool arm_assemble_integer (rtx, unsigned int, int);
92 static void arm_print_operand (FILE *, rtx, int);
93 static void arm_print_operand_address (FILE *, rtx);
94 static bool arm_print_operand_punct_valid_p (unsigned char code);
95 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
96 static arm_cc get_arm_condition_code (rtx);
97 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
98 static rtx is_jump_table (rtx);
99 static const char *output_multi_immediate (rtx *, const char *, const char *,
100 int, HOST_WIDE_INT);
101 static const char *shift_op (rtx, HOST_WIDE_INT *);
102 static struct machine_function *arm_init_machine_status (void);
103 static void thumb_exit (FILE *, int);
104 static rtx is_jump_table (rtx);
105 static HOST_WIDE_INT get_jump_table_size (rtx);
106 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
107 static Mnode *add_minipool_forward_ref (Mfix *);
108 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
109 static Mnode *add_minipool_backward_ref (Mfix *);
110 static void assign_minipool_offsets (Mfix *);
111 static void arm_print_value (FILE *, rtx);
112 static void dump_minipool (rtx);
113 static int arm_barrier_cost (rtx);
114 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
115 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
116 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
117 rtx);
118 static void arm_reorg (void);
119 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
120 static unsigned long arm_compute_save_reg0_reg12_mask (void);
121 static unsigned long arm_compute_save_reg_mask (void);
122 static unsigned long arm_isr_value (tree);
123 static unsigned long arm_compute_func_type (void);
124 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
125 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
126 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
127 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
128 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
129 #endif
130 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
131 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
132 static int arm_comp_type_attributes (const_tree, const_tree);
133 static void arm_set_default_type_attributes (tree);
134 static int arm_adjust_cost (rtx, rtx, rtx, int);
135 static int optimal_immediate_sequence (enum rtx_code code,
136 unsigned HOST_WIDE_INT val,
137 struct four_ints *return_sequence);
138 static int optimal_immediate_sequence_1 (enum rtx_code code,
139 unsigned HOST_WIDE_INT val,
140 struct four_ints *return_sequence,
141 int i);
142 static int arm_get_strip_length (int);
143 static bool arm_function_ok_for_sibcall (tree, tree);
144 static enum machine_mode arm_promote_function_mode (const_tree,
145 enum machine_mode, int *,
146 const_tree, int);
147 static bool arm_return_in_memory (const_tree, const_tree);
148 static rtx arm_function_value (const_tree, const_tree, bool);
149 static rtx arm_libcall_value_1 (enum machine_mode);
150 static rtx arm_libcall_value (enum machine_mode, const_rtx);
151 static bool arm_function_value_regno_p (const unsigned int);
152 static void arm_internal_label (FILE *, const char *, unsigned long);
153 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
154 tree);
155 static bool arm_have_conditional_execution (void);
156 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
157 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
158 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
159 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
160 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
161 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
162 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
163 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
164 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
165 static int arm_address_cost (rtx, bool);
166 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
167 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
168 static void arm_init_builtins (void);
169 static void arm_init_iwmmxt_builtins (void);
170 static rtx safe_vector_operand (rtx, enum machine_mode);
171 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
172 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
173 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
174 static tree arm_builtin_decl (unsigned, bool);
175 static void emit_constant_insn (rtx cond, rtx pattern);
176 static rtx emit_set_insn (rtx, rtx);
177 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
178 tree, bool);
179 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
180 const_tree, bool);
181 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
182 const_tree, bool);
183 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
184 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
185 const_tree);
186 static rtx aapcs_libcall_value (enum machine_mode);
187 static int aapcs_select_return_coproc (const_tree, const_tree);
188
189 #ifdef OBJECT_FORMAT_ELF
190 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
191 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
192 #endif
193 #ifndef ARM_PE
194 static void arm_encode_section_info (tree, rtx, int);
195 #endif
196
197 static void arm_file_end (void);
198 static void arm_file_start (void);
199
200 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
201 tree, int *, int);
202 static bool arm_pass_by_reference (cumulative_args_t,
203 enum machine_mode, const_tree, bool);
204 static bool arm_promote_prototypes (const_tree);
205 static bool arm_default_short_enums (void);
206 static bool arm_align_anon_bitfield (void);
207 static bool arm_return_in_msb (const_tree);
208 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
209 static bool arm_return_in_memory (const_tree, const_tree);
210 #if ARM_UNWIND_INFO
211 static void arm_unwind_emit (FILE *, rtx);
212 static bool arm_output_ttype (rtx);
213 static void arm_asm_emit_except_personality (rtx);
214 static void arm_asm_init_sections (void);
215 #endif
216 static rtx arm_dwarf_register_span (rtx);
217
218 static tree arm_cxx_guard_type (void);
219 static bool arm_cxx_guard_mask_bit (void);
220 static tree arm_get_cookie_size (tree);
221 static bool arm_cookie_has_size (void);
222 static bool arm_cxx_cdtor_returns_this (void);
223 static bool arm_cxx_key_method_may_be_inline (void);
224 static void arm_cxx_determine_class_data_visibility (tree);
225 static bool arm_cxx_class_data_always_comdat (void);
226 static bool arm_cxx_use_aeabi_atexit (void);
227 static void arm_init_libfuncs (void);
228 static tree arm_build_builtin_va_list (void);
229 static void arm_expand_builtin_va_start (tree, rtx);
230 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
231 static void arm_option_override (void);
232 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
233 static bool arm_cannot_copy_insn_p (rtx);
234 static bool arm_tls_symbol_p (rtx x);
235 static int arm_issue_rate (void);
236 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
237 static bool arm_output_addr_const_extra (FILE *, rtx);
238 static bool arm_allocate_stack_slots_for_args (void);
239 static const char *arm_invalid_parameter_type (const_tree t);
240 static const char *arm_invalid_return_type (const_tree t);
241 static tree arm_promoted_type (const_tree t);
242 static tree arm_convert_to_type (tree type, tree expr);
243 static bool arm_scalar_mode_supported_p (enum machine_mode);
244 static bool arm_frame_pointer_required (void);
245 static bool arm_can_eliminate (const int, const int);
246 static void arm_asm_trampoline_template (FILE *);
247 static void arm_trampoline_init (rtx, tree, rtx);
248 static rtx arm_trampoline_adjust_address (rtx);
249 static rtx arm_pic_static_addr (rtx orig, rtx reg);
250 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
251 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
252 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
253 static bool arm_array_mode_supported_p (enum machine_mode,
254 unsigned HOST_WIDE_INT);
255 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
256 static bool arm_class_likely_spilled_p (reg_class_t);
257 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
258 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
259 const_tree type,
260 int misalignment,
261 bool is_packed);
262 static void arm_conditional_register_usage (void);
263 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
264 static unsigned int arm_autovectorize_vector_sizes (void);
265 static int arm_default_branch_cost (bool, bool);
266 static int arm_cortex_a5_branch_cost (bool, bool);
267
268 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
269 const unsigned char *sel);
270
271 \f
272 /* Table of machine attributes. */
273 static const struct attribute_spec arm_attribute_table[] =
274 {
275 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
276 affects_type_identity } */
277 /* Function calls made to this symbol must be done indirectly, because
278 it may lie outside of the 26 bit addressing range of a normal function
279 call. */
280 { "long_call", 0, 0, false, true, true, NULL, false },
281 /* Whereas these functions are always known to reside within the 26 bit
282 addressing range. */
283 { "short_call", 0, 0, false, true, true, NULL, false },
284 /* Specify the procedure call conventions for a function. */
285 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
286 false },
287 /* Interrupt Service Routines have special prologue and epilogue requirements. */
288 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
289 false },
290 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
291 false },
292 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
293 false },
294 #ifdef ARM_PE
295 /* ARM/PE has three new attributes:
296 interfacearm - ?
297 dllexport - for exporting a function/variable that will live in a dll
298 dllimport - for importing a function/variable from a dll
299
300 Microsoft allows multiple declspecs in one __declspec, separating
301 them with spaces. We do NOT support this. Instead, use __declspec
302 multiple times.
303 */
304 { "dllimport", 0, 0, true, false, false, NULL, false },
305 { "dllexport", 0, 0, true, false, false, NULL, false },
306 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
307 false },
308 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
309 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
310 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
311 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
312 false },
313 #endif
314 { NULL, 0, 0, false, false, false, NULL, false }
315 };
316 \f
317 /* Initialize the GCC target structure. */
318 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
319 #undef TARGET_MERGE_DECL_ATTRIBUTES
320 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
321 #endif
322
323 #undef TARGET_LEGITIMIZE_ADDRESS
324 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
325
326 #undef TARGET_ATTRIBUTE_TABLE
327 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
328
329 #undef TARGET_ASM_FILE_START
330 #define TARGET_ASM_FILE_START arm_file_start
331 #undef TARGET_ASM_FILE_END
332 #define TARGET_ASM_FILE_END arm_file_end
333
334 #undef TARGET_ASM_ALIGNED_SI_OP
335 #define TARGET_ASM_ALIGNED_SI_OP NULL
336 #undef TARGET_ASM_INTEGER
337 #define TARGET_ASM_INTEGER arm_assemble_integer
338
339 #undef TARGET_PRINT_OPERAND
340 #define TARGET_PRINT_OPERAND arm_print_operand
341 #undef TARGET_PRINT_OPERAND_ADDRESS
342 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
343 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
344 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
345
346 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
347 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
348
349 #undef TARGET_ASM_FUNCTION_PROLOGUE
350 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
351
352 #undef TARGET_ASM_FUNCTION_EPILOGUE
353 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
354
355 #undef TARGET_OPTION_OVERRIDE
356 #define TARGET_OPTION_OVERRIDE arm_option_override
357
358 #undef TARGET_COMP_TYPE_ATTRIBUTES
359 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
360
361 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
362 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
363
364 #undef TARGET_SCHED_ADJUST_COST
365 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
366
367 #undef TARGET_REGISTER_MOVE_COST
368 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
369
370 #undef TARGET_MEMORY_MOVE_COST
371 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
372
373 #undef TARGET_ENCODE_SECTION_INFO
374 #ifdef ARM_PE
375 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
376 #else
377 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
378 #endif
379
380 #undef TARGET_STRIP_NAME_ENCODING
381 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
382
383 #undef TARGET_ASM_INTERNAL_LABEL
384 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
385
386 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
387 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
388
389 #undef TARGET_FUNCTION_VALUE
390 #define TARGET_FUNCTION_VALUE arm_function_value
391
392 #undef TARGET_LIBCALL_VALUE
393 #define TARGET_LIBCALL_VALUE arm_libcall_value
394
395 #undef TARGET_FUNCTION_VALUE_REGNO_P
396 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
397
398 #undef TARGET_ASM_OUTPUT_MI_THUNK
399 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
400 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
401 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
402
403 #undef TARGET_RTX_COSTS
404 #define TARGET_RTX_COSTS arm_rtx_costs
405 #undef TARGET_ADDRESS_COST
406 #define TARGET_ADDRESS_COST arm_address_cost
407
408 #undef TARGET_SHIFT_TRUNCATION_MASK
409 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
410 #undef TARGET_VECTOR_MODE_SUPPORTED_P
411 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
412 #undef TARGET_ARRAY_MODE_SUPPORTED_P
413 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
414 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
415 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
416 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
417 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
418 arm_autovectorize_vector_sizes
419
420 #undef TARGET_MACHINE_DEPENDENT_REORG
421 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
422
423 #undef TARGET_INIT_BUILTINS
424 #define TARGET_INIT_BUILTINS arm_init_builtins
425 #undef TARGET_EXPAND_BUILTIN
426 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
427 #undef TARGET_BUILTIN_DECL
428 #define TARGET_BUILTIN_DECL arm_builtin_decl
429
430 #undef TARGET_INIT_LIBFUNCS
431 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
432
433 #undef TARGET_PROMOTE_FUNCTION_MODE
434 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
435 #undef TARGET_PROMOTE_PROTOTYPES
436 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
437 #undef TARGET_PASS_BY_REFERENCE
438 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
439 #undef TARGET_ARG_PARTIAL_BYTES
440 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
441 #undef TARGET_FUNCTION_ARG
442 #define TARGET_FUNCTION_ARG arm_function_arg
443 #undef TARGET_FUNCTION_ARG_ADVANCE
444 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
445 #undef TARGET_FUNCTION_ARG_BOUNDARY
446 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
447
448 #undef TARGET_SETUP_INCOMING_VARARGS
449 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
450
451 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
452 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
453
454 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
455 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
456 #undef TARGET_TRAMPOLINE_INIT
457 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
458 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
459 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
460
461 #undef TARGET_DEFAULT_SHORT_ENUMS
462 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
463
464 #undef TARGET_ALIGN_ANON_BITFIELD
465 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
466
467 #undef TARGET_NARROW_VOLATILE_BITFIELD
468 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
469
470 #undef TARGET_CXX_GUARD_TYPE
471 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
472
473 #undef TARGET_CXX_GUARD_MASK_BIT
474 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
475
476 #undef TARGET_CXX_GET_COOKIE_SIZE
477 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
478
479 #undef TARGET_CXX_COOKIE_HAS_SIZE
480 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
481
482 #undef TARGET_CXX_CDTOR_RETURNS_THIS
483 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
484
485 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
486 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
487
488 #undef TARGET_CXX_USE_AEABI_ATEXIT
489 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
490
491 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
492 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
493 arm_cxx_determine_class_data_visibility
494
495 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
496 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
497
498 #undef TARGET_RETURN_IN_MSB
499 #define TARGET_RETURN_IN_MSB arm_return_in_msb
500
501 #undef TARGET_RETURN_IN_MEMORY
502 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
503
504 #undef TARGET_MUST_PASS_IN_STACK
505 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
506
507 #if ARM_UNWIND_INFO
508 #undef TARGET_ASM_UNWIND_EMIT
509 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
510
511 /* EABI unwinding tables use a different format for the typeinfo tables. */
512 #undef TARGET_ASM_TTYPE
513 #define TARGET_ASM_TTYPE arm_output_ttype
514
515 #undef TARGET_ARM_EABI_UNWINDER
516 #define TARGET_ARM_EABI_UNWINDER true
517
518 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
519 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
520
521 #undef TARGET_ASM_INIT_SECTIONS
522 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
523 #endif /* ARM_UNWIND_INFO */
524
525 #undef TARGET_DWARF_REGISTER_SPAN
526 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
527
528 #undef TARGET_CANNOT_COPY_INSN_P
529 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
530
531 #ifdef HAVE_AS_TLS
532 #undef TARGET_HAVE_TLS
533 #define TARGET_HAVE_TLS true
534 #endif
535
536 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
537 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
538
539 #undef TARGET_LEGITIMATE_CONSTANT_P
540 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
541
542 #undef TARGET_CANNOT_FORCE_CONST_MEM
543 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
544
545 #undef TARGET_MAX_ANCHOR_OFFSET
546 #define TARGET_MAX_ANCHOR_OFFSET 4095
547
548 /* The minimum is set such that the total size of the block
549 for a particular anchor is -4088 + 1 + 4095 bytes, which is
550 divisible by eight, ensuring natural spacing of anchors. */
551 #undef TARGET_MIN_ANCHOR_OFFSET
552 #define TARGET_MIN_ANCHOR_OFFSET -4088
553
554 #undef TARGET_SCHED_ISSUE_RATE
555 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
556
557 #undef TARGET_MANGLE_TYPE
558 #define TARGET_MANGLE_TYPE arm_mangle_type
559
560 #undef TARGET_BUILD_BUILTIN_VA_LIST
561 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
562 #undef TARGET_EXPAND_BUILTIN_VA_START
563 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
564 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
565 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
566
567 #ifdef HAVE_AS_TLS
568 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
569 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
570 #endif
571
572 #undef TARGET_LEGITIMATE_ADDRESS_P
573 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
574
575 #undef TARGET_PREFERRED_RELOAD_CLASS
576 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
577
578 #undef TARGET_INVALID_PARAMETER_TYPE
579 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
580
581 #undef TARGET_INVALID_RETURN_TYPE
582 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
583
584 #undef TARGET_PROMOTED_TYPE
585 #define TARGET_PROMOTED_TYPE arm_promoted_type
586
587 #undef TARGET_CONVERT_TO_TYPE
588 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
589
590 #undef TARGET_SCALAR_MODE_SUPPORTED_P
591 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
592
593 #undef TARGET_FRAME_POINTER_REQUIRED
594 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
595
596 #undef TARGET_CAN_ELIMINATE
597 #define TARGET_CAN_ELIMINATE arm_can_eliminate
598
599 #undef TARGET_CONDITIONAL_REGISTER_USAGE
600 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
601
602 #undef TARGET_CLASS_LIKELY_SPILLED_P
603 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
604
605 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
606 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
607 arm_vector_alignment_reachable
608
609 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
610 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
611 arm_builtin_support_vector_misalignment
612
613 #undef TARGET_PREFERRED_RENAME_CLASS
614 #define TARGET_PREFERRED_RENAME_CLASS \
615 arm_preferred_rename_class
616
617 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
618 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
619 arm_vectorize_vec_perm_const_ok
620
621 struct gcc_target targetm = TARGET_INITIALIZER;
622 \f
623 /* Obstack for minipool constant handling. */
624 static struct obstack minipool_obstack;
625 static char * minipool_startobj;
626
627 /* The maximum number of insns skipped which
628 will be conditionalised if possible. */
629 static int max_insns_skipped = 5;
630
631 extern FILE * asm_out_file;
632
633 /* True if we are currently building a constant table. */
634 int making_const_table;
635
636 /* The processor for which instructions should be scheduled. */
637 enum processor_type arm_tune = arm_none;
638
639 /* The current tuning set. */
640 const struct tune_params *current_tune;
641
642 /* Which floating point hardware to schedule for. */
643 int arm_fpu_attr;
644
645 /* Which floating popint hardware to use. */
646 const struct arm_fpu_desc *arm_fpu_desc;
647
648 /* Used for Thumb call_via trampolines. */
649 rtx thumb_call_via_label[14];
650 static int thumb_call_reg_needed;
651
652 /* Bit values used to identify processor capabilities. */
653 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
654 #define FL_ARCH3M (1 << 1) /* Extended multiply */
655 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
656 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
657 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
658 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
659 #define FL_THUMB (1 << 6) /* Thumb aware */
660 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
661 #define FL_STRONG (1 << 8) /* StrongARM */
662 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
663 #define FL_XSCALE (1 << 10) /* XScale */
664 /* spare (1 << 11) */
665 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
666 media instructions. */
667 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
668 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
669 Note: ARM6 & 7 derivatives only. */
670 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
671 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
672 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
673 profile. */
674 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
675 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
676 #define FL_NEON (1 << 20) /* Neon instructions. */
677 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
678 architecture. */
679 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
680 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
681
682 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
683 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
684
685 /* Flags that only effect tuning, not available instructions. */
686 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
687 | FL_CO_PROC)
688
689 #define FL_FOR_ARCH2 FL_NOTM
690 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
691 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
692 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
693 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
694 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
695 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
696 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
697 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
698 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
699 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
700 #define FL_FOR_ARCH6J FL_FOR_ARCH6
701 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
702 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
703 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
704 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
705 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
706 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
707 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
708 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
709 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
710 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
711
712 /* The bits in this mask specify which
713 instructions we are allowed to generate. */
714 static unsigned long insn_flags = 0;
715
716 /* The bits in this mask specify which instruction scheduling options should
717 be used. */
718 static unsigned long tune_flags = 0;
719
720 /* The highest ARM architecture version supported by the
721 target. */
722 enum base_architecture arm_base_arch = BASE_ARCH_0;
723
724 /* The following are used in the arm.md file as equivalents to bits
725 in the above two flag variables. */
726
727 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
728 int arm_arch3m = 0;
729
730 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
731 int arm_arch4 = 0;
732
733 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
734 int arm_arch4t = 0;
735
736 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
737 int arm_arch5 = 0;
738
739 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
740 int arm_arch5e = 0;
741
742 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
743 int arm_arch6 = 0;
744
745 /* Nonzero if this chip supports the ARM 6K extensions. */
746 int arm_arch6k = 0;
747
748 /* Nonzero if this chip supports the ARM 7 extensions. */
749 int arm_arch7 = 0;
750
751 /* Nonzero if instructions not present in the 'M' profile can be used. */
752 int arm_arch_notm = 0;
753
754 /* Nonzero if instructions present in ARMv7E-M can be used. */
755 int arm_arch7em = 0;
756
757 /* Nonzero if this chip can benefit from load scheduling. */
758 int arm_ld_sched = 0;
759
760 /* Nonzero if this chip is a StrongARM. */
761 int arm_tune_strongarm = 0;
762
763 /* Nonzero if this chip supports Intel Wireless MMX technology. */
764 int arm_arch_iwmmxt = 0;
765
766 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
767 int arm_arch_iwmmxt2 = 0;
768
769 /* Nonzero if this chip is an XScale. */
770 int arm_arch_xscale = 0;
771
772 /* Nonzero if tuning for XScale */
773 int arm_tune_xscale = 0;
774
775 /* Nonzero if we want to tune for stores that access the write-buffer.
776 This typically means an ARM6 or ARM7 with MMU or MPU. */
777 int arm_tune_wbuf = 0;
778
779 /* Nonzero if tuning for Cortex-A9. */
780 int arm_tune_cortex_a9 = 0;
781
782 /* Nonzero if generating Thumb instructions. */
783 int thumb_code = 0;
784
785 /* Nonzero if generating Thumb-1 instructions. */
786 int thumb1_code = 0;
787
788 /* Nonzero if we should define __THUMB_INTERWORK__ in the
789 preprocessor.
790 XXX This is a bit of a hack, it's intended to help work around
791 problems in GLD which doesn't understand that armv5t code is
792 interworking clean. */
793 int arm_cpp_interwork = 0;
794
795 /* Nonzero if chip supports Thumb 2. */
796 int arm_arch_thumb2;
797
798 /* Nonzero if chip supports integer division instruction. */
799 int arm_arch_arm_hwdiv;
800 int arm_arch_thumb_hwdiv;
801
802 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
803 we must report the mode of the memory reference from
804 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
805 enum machine_mode output_memory_reference_mode;
806
807 /* The register number to be used for the PIC offset register. */
808 unsigned arm_pic_register = INVALID_REGNUM;
809
810 /* Set to 1 after arm_reorg has started. Reset to start at the start of
811 the next function. */
812 static int after_arm_reorg = 0;
813
814 enum arm_pcs arm_pcs_default;
815
816 /* For an explanation of these variables, see final_prescan_insn below. */
817 int arm_ccfsm_state;
818 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
819 enum arm_cond_code arm_current_cc;
820
821 rtx arm_target_insn;
822 int arm_target_label;
823 /* The number of conditionally executed insns, including the current insn. */
824 int arm_condexec_count = 0;
825 /* A bitmask specifying the patterns for the IT block.
826 Zero means do not output an IT block before this insn. */
827 int arm_condexec_mask = 0;
828 /* The number of bits used in arm_condexec_mask. */
829 int arm_condexec_masklen = 0;
830
831 /* The condition codes of the ARM, and the inverse function. */
832 static const char * const arm_condition_codes[] =
833 {
834 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
835 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
836 };
837
838 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
839 int arm_regs_in_sequence[] =
840 {
841 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
842 };
843
844 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
845 #define streq(string1, string2) (strcmp (string1, string2) == 0)
846
847 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
848 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
849 | (1 << PIC_OFFSET_TABLE_REGNUM)))
850 \f
851 /* Initialization code. */
852
853 struct processors
854 {
855 const char *const name;
856 enum processor_type core;
857 const char *arch;
858 enum base_architecture base_arch;
859 const unsigned long flags;
860 const struct tune_params *const tune;
861 };
862
863
864 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
865 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
866 prefetch_slots, \
867 l1_size, \
868 l1_line_size
869
870 const struct tune_params arm_slowmul_tune =
871 {
872 arm_slowmul_rtx_costs,
873 NULL,
874 3, /* Constant limit. */
875 5, /* Max cond insns. */
876 ARM_PREFETCH_NOT_BENEFICIAL,
877 true, /* Prefer constant pool. */
878 arm_default_branch_cost,
879 false /* Prefer LDRD/STRD. */
880 };
881
882 const struct tune_params arm_fastmul_tune =
883 {
884 arm_fastmul_rtx_costs,
885 NULL,
886 1, /* Constant limit. */
887 5, /* Max cond insns. */
888 ARM_PREFETCH_NOT_BENEFICIAL,
889 true, /* Prefer constant pool. */
890 arm_default_branch_cost,
891 false /* Prefer LDRD/STRD. */
892 };
893
894 /* StrongARM has early execution of branches, so a sequence that is worth
895 skipping is shorter. Set max_insns_skipped to a lower value. */
896
897 const struct tune_params arm_strongarm_tune =
898 {
899 arm_fastmul_rtx_costs,
900 NULL,
901 1, /* Constant limit. */
902 3, /* Max cond insns. */
903 ARM_PREFETCH_NOT_BENEFICIAL,
904 true, /* Prefer constant pool. */
905 arm_default_branch_cost,
906 false /* Prefer LDRD/STRD. */
907 };
908
909 const struct tune_params arm_xscale_tune =
910 {
911 arm_xscale_rtx_costs,
912 xscale_sched_adjust_cost,
913 2, /* Constant limit. */
914 3, /* Max cond insns. */
915 ARM_PREFETCH_NOT_BENEFICIAL,
916 true, /* Prefer constant pool. */
917 arm_default_branch_cost,
918 false /* Prefer LDRD/STRD. */
919 };
920
921 const struct tune_params arm_9e_tune =
922 {
923 arm_9e_rtx_costs,
924 NULL,
925 1, /* Constant limit. */
926 5, /* Max cond insns. */
927 ARM_PREFETCH_NOT_BENEFICIAL,
928 true, /* Prefer constant pool. */
929 arm_default_branch_cost,
930 false /* Prefer LDRD/STRD. */
931 };
932
933 const struct tune_params arm_v6t2_tune =
934 {
935 arm_9e_rtx_costs,
936 NULL,
937 1, /* Constant limit. */
938 5, /* Max cond insns. */
939 ARM_PREFETCH_NOT_BENEFICIAL,
940 false, /* Prefer constant pool. */
941 arm_default_branch_cost,
942 false /* Prefer LDRD/STRD. */
943 };
944
945 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
946 const struct tune_params arm_cortex_tune =
947 {
948 arm_9e_rtx_costs,
949 NULL,
950 1, /* Constant limit. */
951 5, /* Max cond insns. */
952 ARM_PREFETCH_NOT_BENEFICIAL,
953 false, /* Prefer constant pool. */
954 arm_default_branch_cost,
955 false /* Prefer LDRD/STRD. */
956 };
957
958 const struct tune_params arm_cortex_a15_tune =
959 {
960 arm_9e_rtx_costs,
961 NULL,
962 1, /* Constant limit. */
963 5, /* Max cond insns. */
964 ARM_PREFETCH_NOT_BENEFICIAL,
965 false, /* Prefer constant pool. */
966 arm_default_branch_cost,
967 true /* Prefer LDRD/STRD. */
968 };
969
970 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
971 less appealing. Set max_insns_skipped to a low value. */
972
973 const struct tune_params arm_cortex_a5_tune =
974 {
975 arm_9e_rtx_costs,
976 NULL,
977 1, /* Constant limit. */
978 1, /* Max cond insns. */
979 ARM_PREFETCH_NOT_BENEFICIAL,
980 false, /* Prefer constant pool. */
981 arm_cortex_a5_branch_cost,
982 false /* Prefer LDRD/STRD. */
983 };
984
985 const struct tune_params arm_cortex_a9_tune =
986 {
987 arm_9e_rtx_costs,
988 cortex_a9_sched_adjust_cost,
989 1, /* Constant limit. */
990 5, /* Max cond insns. */
991 ARM_PREFETCH_BENEFICIAL(4,32,32),
992 false, /* Prefer constant pool. */
993 arm_default_branch_cost,
994 false /* Prefer LDRD/STRD. */
995 };
996
997 const struct tune_params arm_fa726te_tune =
998 {
999 arm_9e_rtx_costs,
1000 fa726te_sched_adjust_cost,
1001 1, /* Constant limit. */
1002 5, /* Max cond insns. */
1003 ARM_PREFETCH_NOT_BENEFICIAL,
1004 true, /* Prefer constant pool. */
1005 arm_default_branch_cost,
1006 false /* Prefer LDRD/STRD. */
1007 };
1008
1009
1010 /* Not all of these give usefully different compilation alternatives,
1011 but there is no simple way of generalizing them. */
1012 static const struct processors all_cores[] =
1013 {
1014 /* ARM Cores */
1015 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
1016 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1017 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1018 #include "arm-cores.def"
1019 #undef ARM_CORE
1020 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1021 };
1022
1023 static const struct processors all_architectures[] =
1024 {
1025 /* ARM Architectures */
1026 /* We don't specify tuning costs here as it will be figured out
1027 from the core. */
1028
1029 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1030 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1031 #include "arm-arches.def"
1032 #undef ARM_ARCH
1033 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1034 };
1035
1036
1037 /* These are populated as commandline arguments are processed, or NULL
1038 if not specified. */
1039 static const struct processors *arm_selected_arch;
1040 static const struct processors *arm_selected_cpu;
1041 static const struct processors *arm_selected_tune;
1042
1043 /* The name of the preprocessor macro to define for this architecture. */
1044
1045 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1046
1047 /* Available values for -mfpu=. */
1048
1049 static const struct arm_fpu_desc all_fpus[] =
1050 {
1051 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16) \
1052 { NAME, MODEL, REV, VFP_REGS, NEON, FP16 },
1053 #include "arm-fpus.def"
1054 #undef ARM_FPU
1055 };
1056
1057
1058 /* Supported TLS relocations. */
1059
1060 enum tls_reloc {
1061 TLS_GD32,
1062 TLS_LDM32,
1063 TLS_LDO32,
1064 TLS_IE32,
1065 TLS_LE32,
1066 TLS_DESCSEQ /* GNU scheme */
1067 };
1068
1069 /* The maximum number of insns to be used when loading a constant. */
1070 inline static int
1071 arm_constant_limit (bool size_p)
1072 {
1073 return size_p ? 1 : current_tune->constant_limit;
1074 }
1075
1076 /* Emit an insn that's a simple single-set. Both the operands must be known
1077 to be valid. */
1078 inline static rtx
1079 emit_set_insn (rtx x, rtx y)
1080 {
1081 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1082 }
1083
1084 /* Return the number of bits set in VALUE. */
1085 static unsigned
1086 bit_count (unsigned long value)
1087 {
1088 unsigned long count = 0;
1089
1090 while (value)
1091 {
1092 count++;
1093 value &= value - 1; /* Clear the least-significant set bit. */
1094 }
1095
1096 return count;
1097 }
1098
1099 typedef struct
1100 {
1101 enum machine_mode mode;
1102 const char *name;
1103 } arm_fixed_mode_set;
1104
1105 /* A small helper for setting fixed-point library libfuncs. */
1106
1107 static void
1108 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1109 const char *funcname, const char *modename,
1110 int num_suffix)
1111 {
1112 char buffer[50];
1113
1114 if (num_suffix == 0)
1115 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1116 else
1117 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1118
1119 set_optab_libfunc (optable, mode, buffer);
1120 }
1121
1122 static void
1123 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1124 enum machine_mode from, const char *funcname,
1125 const char *toname, const char *fromname)
1126 {
1127 char buffer[50];
1128 const char *maybe_suffix_2 = "";
1129
1130 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1131 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1132 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1133 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1134 maybe_suffix_2 = "2";
1135
1136 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1137 maybe_suffix_2);
1138
1139 set_conv_libfunc (optable, to, from, buffer);
1140 }
1141
1142 /* Set up library functions unique to ARM. */
1143
1144 static void
1145 arm_init_libfuncs (void)
1146 {
1147 /* For Linux, we have access to kernel support for atomic operations. */
1148 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1149 init_sync_libfuncs (2 * UNITS_PER_WORD);
1150
1151 /* There are no special library functions unless we are using the
1152 ARM BPABI. */
1153 if (!TARGET_BPABI)
1154 return;
1155
1156 /* The functions below are described in Section 4 of the "Run-Time
1157 ABI for the ARM architecture", Version 1.0. */
1158
1159 /* Double-precision floating-point arithmetic. Table 2. */
1160 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1161 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1162 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1163 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1164 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1165
1166 /* Double-precision comparisons. Table 3. */
1167 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1168 set_optab_libfunc (ne_optab, DFmode, NULL);
1169 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1170 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1171 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1172 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1173 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1174
1175 /* Single-precision floating-point arithmetic. Table 4. */
1176 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1177 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1178 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1179 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1180 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1181
1182 /* Single-precision comparisons. Table 5. */
1183 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1184 set_optab_libfunc (ne_optab, SFmode, NULL);
1185 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1186 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1187 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1188 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1189 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1190
1191 /* Floating-point to integer conversions. Table 6. */
1192 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1193 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1194 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1195 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1196 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1197 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1198 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1199 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1200
1201 /* Conversions between floating types. Table 7. */
1202 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1203 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1204
1205 /* Integer to floating-point conversions. Table 8. */
1206 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1207 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1208 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1209 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1210 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1211 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1212 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1213 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1214
1215 /* Long long. Table 9. */
1216 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1217 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1218 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1219 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1220 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1221 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1222 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1223 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1224
1225 /* Integer (32/32->32) division. \S 4.3.1. */
1226 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1227 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1228
1229 /* The divmod functions are designed so that they can be used for
1230 plain division, even though they return both the quotient and the
1231 remainder. The quotient is returned in the usual location (i.e.,
1232 r0 for SImode, {r0, r1} for DImode), just as would be expected
1233 for an ordinary division routine. Because the AAPCS calling
1234 conventions specify that all of { r0, r1, r2, r3 } are
1235 callee-saved registers, there is no need to tell the compiler
1236 explicitly that those registers are clobbered by these
1237 routines. */
1238 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1239 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1240
1241 /* For SImode division the ABI provides div-without-mod routines,
1242 which are faster. */
1243 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1244 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1245
1246 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1247 divmod libcalls instead. */
1248 set_optab_libfunc (smod_optab, DImode, NULL);
1249 set_optab_libfunc (umod_optab, DImode, NULL);
1250 set_optab_libfunc (smod_optab, SImode, NULL);
1251 set_optab_libfunc (umod_optab, SImode, NULL);
1252
1253 /* Half-precision float operations. The compiler handles all operations
1254 with NULL libfuncs by converting the SFmode. */
1255 switch (arm_fp16_format)
1256 {
1257 case ARM_FP16_FORMAT_IEEE:
1258 case ARM_FP16_FORMAT_ALTERNATIVE:
1259
1260 /* Conversions. */
1261 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1262 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1263 ? "__gnu_f2h_ieee"
1264 : "__gnu_f2h_alternative"));
1265 set_conv_libfunc (sext_optab, SFmode, HFmode,
1266 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1267 ? "__gnu_h2f_ieee"
1268 : "__gnu_h2f_alternative"));
1269
1270 /* Arithmetic. */
1271 set_optab_libfunc (add_optab, HFmode, NULL);
1272 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1273 set_optab_libfunc (smul_optab, HFmode, NULL);
1274 set_optab_libfunc (neg_optab, HFmode, NULL);
1275 set_optab_libfunc (sub_optab, HFmode, NULL);
1276
1277 /* Comparisons. */
1278 set_optab_libfunc (eq_optab, HFmode, NULL);
1279 set_optab_libfunc (ne_optab, HFmode, NULL);
1280 set_optab_libfunc (lt_optab, HFmode, NULL);
1281 set_optab_libfunc (le_optab, HFmode, NULL);
1282 set_optab_libfunc (ge_optab, HFmode, NULL);
1283 set_optab_libfunc (gt_optab, HFmode, NULL);
1284 set_optab_libfunc (unord_optab, HFmode, NULL);
1285 break;
1286
1287 default:
1288 break;
1289 }
1290
1291 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1292 {
1293 const arm_fixed_mode_set fixed_arith_modes[] =
1294 {
1295 { QQmode, "qq" },
1296 { UQQmode, "uqq" },
1297 { HQmode, "hq" },
1298 { UHQmode, "uhq" },
1299 { SQmode, "sq" },
1300 { USQmode, "usq" },
1301 { DQmode, "dq" },
1302 { UDQmode, "udq" },
1303 { TQmode, "tq" },
1304 { UTQmode, "utq" },
1305 { HAmode, "ha" },
1306 { UHAmode, "uha" },
1307 { SAmode, "sa" },
1308 { USAmode, "usa" },
1309 { DAmode, "da" },
1310 { UDAmode, "uda" },
1311 { TAmode, "ta" },
1312 { UTAmode, "uta" }
1313 };
1314 const arm_fixed_mode_set fixed_conv_modes[] =
1315 {
1316 { QQmode, "qq" },
1317 { UQQmode, "uqq" },
1318 { HQmode, "hq" },
1319 { UHQmode, "uhq" },
1320 { SQmode, "sq" },
1321 { USQmode, "usq" },
1322 { DQmode, "dq" },
1323 { UDQmode, "udq" },
1324 { TQmode, "tq" },
1325 { UTQmode, "utq" },
1326 { HAmode, "ha" },
1327 { UHAmode, "uha" },
1328 { SAmode, "sa" },
1329 { USAmode, "usa" },
1330 { DAmode, "da" },
1331 { UDAmode, "uda" },
1332 { TAmode, "ta" },
1333 { UTAmode, "uta" },
1334 { QImode, "qi" },
1335 { HImode, "hi" },
1336 { SImode, "si" },
1337 { DImode, "di" },
1338 { TImode, "ti" },
1339 { SFmode, "sf" },
1340 { DFmode, "df" }
1341 };
1342 unsigned int i, j;
1343
1344 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1345 {
1346 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1347 "add", fixed_arith_modes[i].name, 3);
1348 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1349 "ssadd", fixed_arith_modes[i].name, 3);
1350 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1351 "usadd", fixed_arith_modes[i].name, 3);
1352 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1353 "sub", fixed_arith_modes[i].name, 3);
1354 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1355 "sssub", fixed_arith_modes[i].name, 3);
1356 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1357 "ussub", fixed_arith_modes[i].name, 3);
1358 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1359 "mul", fixed_arith_modes[i].name, 3);
1360 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1361 "ssmul", fixed_arith_modes[i].name, 3);
1362 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1363 "usmul", fixed_arith_modes[i].name, 3);
1364 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1365 "div", fixed_arith_modes[i].name, 3);
1366 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1367 "udiv", fixed_arith_modes[i].name, 3);
1368 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1369 "ssdiv", fixed_arith_modes[i].name, 3);
1370 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1371 "usdiv", fixed_arith_modes[i].name, 3);
1372 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1373 "neg", fixed_arith_modes[i].name, 2);
1374 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1375 "ssneg", fixed_arith_modes[i].name, 2);
1376 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1377 "usneg", fixed_arith_modes[i].name, 2);
1378 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1379 "ashl", fixed_arith_modes[i].name, 3);
1380 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1381 "ashr", fixed_arith_modes[i].name, 3);
1382 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1383 "lshr", fixed_arith_modes[i].name, 3);
1384 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1385 "ssashl", fixed_arith_modes[i].name, 3);
1386 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1387 "usashl", fixed_arith_modes[i].name, 3);
1388 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1389 "cmp", fixed_arith_modes[i].name, 2);
1390 }
1391
1392 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1393 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1394 {
1395 if (i == j
1396 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1397 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1398 continue;
1399
1400 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1401 fixed_conv_modes[j].mode, "fract",
1402 fixed_conv_modes[i].name,
1403 fixed_conv_modes[j].name);
1404 arm_set_fixed_conv_libfunc (satfract_optab,
1405 fixed_conv_modes[i].mode,
1406 fixed_conv_modes[j].mode, "satfract",
1407 fixed_conv_modes[i].name,
1408 fixed_conv_modes[j].name);
1409 arm_set_fixed_conv_libfunc (fractuns_optab,
1410 fixed_conv_modes[i].mode,
1411 fixed_conv_modes[j].mode, "fractuns",
1412 fixed_conv_modes[i].name,
1413 fixed_conv_modes[j].name);
1414 arm_set_fixed_conv_libfunc (satfractuns_optab,
1415 fixed_conv_modes[i].mode,
1416 fixed_conv_modes[j].mode, "satfractuns",
1417 fixed_conv_modes[i].name,
1418 fixed_conv_modes[j].name);
1419 }
1420 }
1421
1422 if (TARGET_AAPCS_BASED)
1423 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1424 }
1425
1426 /* On AAPCS systems, this is the "struct __va_list". */
1427 static GTY(()) tree va_list_type;
1428
1429 /* Return the type to use as __builtin_va_list. */
1430 static tree
1431 arm_build_builtin_va_list (void)
1432 {
1433 tree va_list_name;
1434 tree ap_field;
1435
1436 if (!TARGET_AAPCS_BASED)
1437 return std_build_builtin_va_list ();
1438
1439 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1440 defined as:
1441
1442 struct __va_list
1443 {
1444 void *__ap;
1445 };
1446
1447 The C Library ABI further reinforces this definition in \S
1448 4.1.
1449
1450 We must follow this definition exactly. The structure tag
1451 name is visible in C++ mangled names, and thus forms a part
1452 of the ABI. The field name may be used by people who
1453 #include <stdarg.h>. */
1454 /* Create the type. */
1455 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1456 /* Give it the required name. */
1457 va_list_name = build_decl (BUILTINS_LOCATION,
1458 TYPE_DECL,
1459 get_identifier ("__va_list"),
1460 va_list_type);
1461 DECL_ARTIFICIAL (va_list_name) = 1;
1462 TYPE_NAME (va_list_type) = va_list_name;
1463 TYPE_STUB_DECL (va_list_type) = va_list_name;
1464 /* Create the __ap field. */
1465 ap_field = build_decl (BUILTINS_LOCATION,
1466 FIELD_DECL,
1467 get_identifier ("__ap"),
1468 ptr_type_node);
1469 DECL_ARTIFICIAL (ap_field) = 1;
1470 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1471 TYPE_FIELDS (va_list_type) = ap_field;
1472 /* Compute its layout. */
1473 layout_type (va_list_type);
1474
1475 return va_list_type;
1476 }
1477
1478 /* Return an expression of type "void *" pointing to the next
1479 available argument in a variable-argument list. VALIST is the
1480 user-level va_list object, of type __builtin_va_list. */
1481 static tree
1482 arm_extract_valist_ptr (tree valist)
1483 {
1484 if (TREE_TYPE (valist) == error_mark_node)
1485 return error_mark_node;
1486
1487 /* On an AAPCS target, the pointer is stored within "struct
1488 va_list". */
1489 if (TARGET_AAPCS_BASED)
1490 {
1491 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1492 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1493 valist, ap_field, NULL_TREE);
1494 }
1495
1496 return valist;
1497 }
1498
1499 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1500 static void
1501 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1502 {
1503 valist = arm_extract_valist_ptr (valist);
1504 std_expand_builtin_va_start (valist, nextarg);
1505 }
1506
1507 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1508 static tree
1509 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1510 gimple_seq *post_p)
1511 {
1512 valist = arm_extract_valist_ptr (valist);
1513 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1514 }
1515
1516 /* Fix up any incompatible options that the user has specified. */
1517 static void
1518 arm_option_override (void)
1519 {
1520 if (global_options_set.x_arm_arch_option)
1521 arm_selected_arch = &all_architectures[arm_arch_option];
1522
1523 if (global_options_set.x_arm_cpu_option)
1524 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1525
1526 if (global_options_set.x_arm_tune_option)
1527 arm_selected_tune = &all_cores[(int) arm_tune_option];
1528
1529 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1530 SUBTARGET_OVERRIDE_OPTIONS;
1531 #endif
1532
1533 if (arm_selected_arch)
1534 {
1535 if (arm_selected_cpu)
1536 {
1537 /* Check for conflict between mcpu and march. */
1538 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1539 {
1540 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1541 arm_selected_cpu->name, arm_selected_arch->name);
1542 /* -march wins for code generation.
1543 -mcpu wins for default tuning. */
1544 if (!arm_selected_tune)
1545 arm_selected_tune = arm_selected_cpu;
1546
1547 arm_selected_cpu = arm_selected_arch;
1548 }
1549 else
1550 /* -mcpu wins. */
1551 arm_selected_arch = NULL;
1552 }
1553 else
1554 /* Pick a CPU based on the architecture. */
1555 arm_selected_cpu = arm_selected_arch;
1556 }
1557
1558 /* If the user did not specify a processor, choose one for them. */
1559 if (!arm_selected_cpu)
1560 {
1561 const struct processors * sel;
1562 unsigned int sought;
1563
1564 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1565 if (!arm_selected_cpu->name)
1566 {
1567 #ifdef SUBTARGET_CPU_DEFAULT
1568 /* Use the subtarget default CPU if none was specified by
1569 configure. */
1570 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1571 #endif
1572 /* Default to ARM6. */
1573 if (!arm_selected_cpu->name)
1574 arm_selected_cpu = &all_cores[arm6];
1575 }
1576
1577 sel = arm_selected_cpu;
1578 insn_flags = sel->flags;
1579
1580 /* Now check to see if the user has specified some command line
1581 switch that require certain abilities from the cpu. */
1582 sought = 0;
1583
1584 if (TARGET_INTERWORK || TARGET_THUMB)
1585 {
1586 sought |= (FL_THUMB | FL_MODE32);
1587
1588 /* There are no ARM processors that support both APCS-26 and
1589 interworking. Therefore we force FL_MODE26 to be removed
1590 from insn_flags here (if it was set), so that the search
1591 below will always be able to find a compatible processor. */
1592 insn_flags &= ~FL_MODE26;
1593 }
1594
1595 if (sought != 0 && ((sought & insn_flags) != sought))
1596 {
1597 /* Try to locate a CPU type that supports all of the abilities
1598 of the default CPU, plus the extra abilities requested by
1599 the user. */
1600 for (sel = all_cores; sel->name != NULL; sel++)
1601 if ((sel->flags & sought) == (sought | insn_flags))
1602 break;
1603
1604 if (sel->name == NULL)
1605 {
1606 unsigned current_bit_count = 0;
1607 const struct processors * best_fit = NULL;
1608
1609 /* Ideally we would like to issue an error message here
1610 saying that it was not possible to find a CPU compatible
1611 with the default CPU, but which also supports the command
1612 line options specified by the programmer, and so they
1613 ought to use the -mcpu=<name> command line option to
1614 override the default CPU type.
1615
1616 If we cannot find a cpu that has both the
1617 characteristics of the default cpu and the given
1618 command line options we scan the array again looking
1619 for a best match. */
1620 for (sel = all_cores; sel->name != NULL; sel++)
1621 if ((sel->flags & sought) == sought)
1622 {
1623 unsigned count;
1624
1625 count = bit_count (sel->flags & insn_flags);
1626
1627 if (count >= current_bit_count)
1628 {
1629 best_fit = sel;
1630 current_bit_count = count;
1631 }
1632 }
1633
1634 gcc_assert (best_fit);
1635 sel = best_fit;
1636 }
1637
1638 arm_selected_cpu = sel;
1639 }
1640 }
1641
1642 gcc_assert (arm_selected_cpu);
1643 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1644 if (!arm_selected_tune)
1645 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1646
1647 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1648 insn_flags = arm_selected_cpu->flags;
1649 arm_base_arch = arm_selected_cpu->base_arch;
1650
1651 arm_tune = arm_selected_tune->core;
1652 tune_flags = arm_selected_tune->flags;
1653 current_tune = arm_selected_tune->tune;
1654
1655 /* Make sure that the processor choice does not conflict with any of the
1656 other command line choices. */
1657 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1658 error ("target CPU does not support ARM mode");
1659
1660 /* BPABI targets use linker tricks to allow interworking on cores
1661 without thumb support. */
1662 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1663 {
1664 warning (0, "target CPU does not support interworking" );
1665 target_flags &= ~MASK_INTERWORK;
1666 }
1667
1668 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1669 {
1670 warning (0, "target CPU does not support THUMB instructions");
1671 target_flags &= ~MASK_THUMB;
1672 }
1673
1674 if (TARGET_APCS_FRAME && TARGET_THUMB)
1675 {
1676 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1677 target_flags &= ~MASK_APCS_FRAME;
1678 }
1679
1680 /* Callee super interworking implies thumb interworking. Adding
1681 this to the flags here simplifies the logic elsewhere. */
1682 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1683 target_flags |= MASK_INTERWORK;
1684
1685 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1686 from here where no function is being compiled currently. */
1687 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1688 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1689
1690 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1691 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1692
1693 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1694 {
1695 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1696 target_flags |= MASK_APCS_FRAME;
1697 }
1698
1699 if (TARGET_POKE_FUNCTION_NAME)
1700 target_flags |= MASK_APCS_FRAME;
1701
1702 if (TARGET_APCS_REENT && flag_pic)
1703 error ("-fpic and -mapcs-reent are incompatible");
1704
1705 if (TARGET_APCS_REENT)
1706 warning (0, "APCS reentrant code not supported. Ignored");
1707
1708 /* If this target is normally configured to use APCS frames, warn if they
1709 are turned off and debugging is turned on. */
1710 if (TARGET_ARM
1711 && write_symbols != NO_DEBUG
1712 && !TARGET_APCS_FRAME
1713 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1714 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1715
1716 if (TARGET_APCS_FLOAT)
1717 warning (0, "passing floating point arguments in fp regs not yet supported");
1718
1719 if (TARGET_LITTLE_WORDS)
1720 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
1721 "will be removed in a future release");
1722
1723 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1724 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1725 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1726 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1727 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1728 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1729 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1730 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1731 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1732 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1733 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1734 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1735 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1736
1737 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1738 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1739 thumb_code = TARGET_ARM == 0;
1740 thumb1_code = TARGET_THUMB1 != 0;
1741 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1742 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1743 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1744 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
1745 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1746 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1747 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1748
1749 /* If we are not using the default (ARM mode) section anchor offset
1750 ranges, then set the correct ranges now. */
1751 if (TARGET_THUMB1)
1752 {
1753 /* Thumb-1 LDR instructions cannot have negative offsets.
1754 Permissible positive offset ranges are 5-bit (for byte loads),
1755 6-bit (for halfword loads), or 7-bit (for word loads).
1756 Empirical results suggest a 7-bit anchor range gives the best
1757 overall code size. */
1758 targetm.min_anchor_offset = 0;
1759 targetm.max_anchor_offset = 127;
1760 }
1761 else if (TARGET_THUMB2)
1762 {
1763 /* The minimum is set such that the total size of the block
1764 for a particular anchor is 248 + 1 + 4095 bytes, which is
1765 divisible by eight, ensuring natural spacing of anchors. */
1766 targetm.min_anchor_offset = -248;
1767 targetm.max_anchor_offset = 4095;
1768 }
1769
1770 /* V5 code we generate is completely interworking capable, so we turn off
1771 TARGET_INTERWORK here to avoid many tests later on. */
1772
1773 /* XXX However, we must pass the right pre-processor defines to CPP
1774 or GLD can get confused. This is a hack. */
1775 if (TARGET_INTERWORK)
1776 arm_cpp_interwork = 1;
1777
1778 if (arm_arch5)
1779 target_flags &= ~MASK_INTERWORK;
1780
1781 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1782 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1783
1784 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1785 error ("iwmmxt abi requires an iwmmxt capable cpu");
1786
1787 if (!global_options_set.x_arm_fpu_index)
1788 {
1789 const char *target_fpu_name;
1790 bool ok;
1791
1792 #ifdef FPUTYPE_DEFAULT
1793 target_fpu_name = FPUTYPE_DEFAULT;
1794 #else
1795 target_fpu_name = "vfp";
1796 #endif
1797
1798 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1799 CL_TARGET);
1800 gcc_assert (ok);
1801 }
1802
1803 arm_fpu_desc = &all_fpus[arm_fpu_index];
1804
1805 switch (arm_fpu_desc->model)
1806 {
1807 case ARM_FP_MODEL_VFP:
1808 arm_fpu_attr = FPU_VFP;
1809 break;
1810
1811 default:
1812 gcc_unreachable();
1813 }
1814
1815 if (TARGET_AAPCS_BASED)
1816 {
1817 if (TARGET_CALLER_INTERWORKING)
1818 error ("AAPCS does not support -mcaller-super-interworking");
1819 else
1820 if (TARGET_CALLEE_INTERWORKING)
1821 error ("AAPCS does not support -mcallee-super-interworking");
1822 }
1823
1824 /* iWMMXt and NEON are incompatible. */
1825 if (TARGET_IWMMXT && TARGET_NEON)
1826 error ("iWMMXt and NEON are incompatible");
1827
1828 /* iWMMXt unsupported under Thumb mode. */
1829 if (TARGET_THUMB && TARGET_IWMMXT)
1830 error ("iWMMXt unsupported under Thumb mode");
1831
1832 /* __fp16 support currently assumes the core has ldrh. */
1833 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1834 sorry ("__fp16 and no ldrh");
1835
1836 /* If soft-float is specified then don't use FPU. */
1837 if (TARGET_SOFT_FLOAT)
1838 arm_fpu_attr = FPU_NONE;
1839
1840 if (TARGET_AAPCS_BASED)
1841 {
1842 if (arm_abi == ARM_ABI_IWMMXT)
1843 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1844 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1845 && TARGET_HARD_FLOAT
1846 && TARGET_VFP)
1847 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1848 else
1849 arm_pcs_default = ARM_PCS_AAPCS;
1850 }
1851 else
1852 {
1853 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1854 sorry ("-mfloat-abi=hard and VFP");
1855
1856 if (arm_abi == ARM_ABI_APCS)
1857 arm_pcs_default = ARM_PCS_APCS;
1858 else
1859 arm_pcs_default = ARM_PCS_ATPCS;
1860 }
1861
1862 /* For arm2/3 there is no need to do any scheduling if we are doing
1863 software floating-point. */
1864 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
1865 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1866
1867 /* Use the cp15 method if it is available. */
1868 if (target_thread_pointer == TP_AUTO)
1869 {
1870 if (arm_arch6k && !TARGET_THUMB1)
1871 target_thread_pointer = TP_CP15;
1872 else
1873 target_thread_pointer = TP_SOFT;
1874 }
1875
1876 if (TARGET_HARD_TP && TARGET_THUMB1)
1877 error ("can not use -mtp=cp15 with 16-bit Thumb");
1878
1879 /* Override the default structure alignment for AAPCS ABI. */
1880 if (!global_options_set.x_arm_structure_size_boundary)
1881 {
1882 if (TARGET_AAPCS_BASED)
1883 arm_structure_size_boundary = 8;
1884 }
1885 else
1886 {
1887 if (arm_structure_size_boundary != 8
1888 && arm_structure_size_boundary != 32
1889 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
1890 {
1891 if (ARM_DOUBLEWORD_ALIGN)
1892 warning (0,
1893 "structure size boundary can only be set to 8, 32 or 64");
1894 else
1895 warning (0, "structure size boundary can only be set to 8 or 32");
1896 arm_structure_size_boundary
1897 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
1898 }
1899 }
1900
1901 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1902 {
1903 error ("RTP PIC is incompatible with Thumb");
1904 flag_pic = 0;
1905 }
1906
1907 /* If stack checking is disabled, we can use r10 as the PIC register,
1908 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1909 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1910 {
1911 if (TARGET_VXWORKS_RTP)
1912 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1913 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1914 }
1915
1916 if (flag_pic && TARGET_VXWORKS_RTP)
1917 arm_pic_register = 9;
1918
1919 if (arm_pic_register_string != NULL)
1920 {
1921 int pic_register = decode_reg_name (arm_pic_register_string);
1922
1923 if (!flag_pic)
1924 warning (0, "-mpic-register= is useless without -fpic");
1925
1926 /* Prevent the user from choosing an obviously stupid PIC register. */
1927 else if (pic_register < 0 || call_used_regs[pic_register]
1928 || pic_register == HARD_FRAME_POINTER_REGNUM
1929 || pic_register == STACK_POINTER_REGNUM
1930 || pic_register >= PC_REGNUM
1931 || (TARGET_VXWORKS_RTP
1932 && (unsigned int) pic_register != arm_pic_register))
1933 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1934 else
1935 arm_pic_register = pic_register;
1936 }
1937
1938 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1939 if (fix_cm3_ldrd == 2)
1940 {
1941 if (arm_selected_cpu->core == cortexm3)
1942 fix_cm3_ldrd = 1;
1943 else
1944 fix_cm3_ldrd = 0;
1945 }
1946
1947 /* Enable -munaligned-access by default for
1948 - all ARMv6 architecture-based processors
1949 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
1950
1951 Disable -munaligned-access by default for
1952 - all pre-ARMv6 architecture-based processors
1953 - ARMv6-M architecture-based processors. */
1954
1955 if (unaligned_access == 2)
1956 {
1957 if (arm_arch6 && (arm_arch_notm || arm_arch7))
1958 unaligned_access = 1;
1959 else
1960 unaligned_access = 0;
1961 }
1962 else if (unaligned_access == 1
1963 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
1964 {
1965 warning (0, "target CPU does not support unaligned accesses");
1966 unaligned_access = 0;
1967 }
1968
1969 if (TARGET_THUMB1 && flag_schedule_insns)
1970 {
1971 /* Don't warn since it's on by default in -O2. */
1972 flag_schedule_insns = 0;
1973 }
1974
1975 if (optimize_size)
1976 {
1977 /* If optimizing for size, bump the number of instructions that we
1978 are prepared to conditionally execute (even on a StrongARM). */
1979 max_insns_skipped = 6;
1980 }
1981 else
1982 max_insns_skipped = current_tune->max_insns_skipped;
1983
1984 /* Hot/Cold partitioning is not currently supported, since we can't
1985 handle literal pool placement in that case. */
1986 if (flag_reorder_blocks_and_partition)
1987 {
1988 inform (input_location,
1989 "-freorder-blocks-and-partition not supported on this architecture");
1990 flag_reorder_blocks_and_partition = 0;
1991 flag_reorder_blocks = 1;
1992 }
1993
1994 if (flag_pic)
1995 /* Hoisting PIC address calculations more aggressively provides a small,
1996 but measurable, size reduction for PIC code. Therefore, we decrease
1997 the bar for unrestricted expression hoisting to the cost of PIC address
1998 calculation, which is 2 instructions. */
1999 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2000 global_options.x_param_values,
2001 global_options_set.x_param_values);
2002
2003 /* ARM EABI defaults to strict volatile bitfields. */
2004 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2005 && abi_version_at_least(2))
2006 flag_strict_volatile_bitfields = 1;
2007
2008 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2009 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2010 if (flag_prefetch_loop_arrays < 0
2011 && HAVE_prefetch
2012 && optimize >= 3
2013 && current_tune->num_prefetch_slots > 0)
2014 flag_prefetch_loop_arrays = 1;
2015
2016 /* Set up parameters to be used in prefetching algorithm. Do not override the
2017 defaults unless we are tuning for a core we have researched values for. */
2018 if (current_tune->num_prefetch_slots > 0)
2019 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2020 current_tune->num_prefetch_slots,
2021 global_options.x_param_values,
2022 global_options_set.x_param_values);
2023 if (current_tune->l1_cache_line_size >= 0)
2024 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2025 current_tune->l1_cache_line_size,
2026 global_options.x_param_values,
2027 global_options_set.x_param_values);
2028 if (current_tune->l1_cache_size >= 0)
2029 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2030 current_tune->l1_cache_size,
2031 global_options.x_param_values,
2032 global_options_set.x_param_values);
2033
2034 /* Use the alternative scheduling-pressure algorithm by default. */
2035 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
2036 global_options.x_param_values,
2037 global_options_set.x_param_values);
2038
2039 /* Register global variables with the garbage collector. */
2040 arm_add_gc_roots ();
2041 }
2042
2043 static void
2044 arm_add_gc_roots (void)
2045 {
2046 gcc_obstack_init(&minipool_obstack);
2047 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2048 }
2049 \f
2050 /* A table of known ARM exception types.
2051 For use with the interrupt function attribute. */
2052
2053 typedef struct
2054 {
2055 const char *const arg;
2056 const unsigned long return_value;
2057 }
2058 isr_attribute_arg;
2059
2060 static const isr_attribute_arg isr_attribute_args [] =
2061 {
2062 { "IRQ", ARM_FT_ISR },
2063 { "irq", ARM_FT_ISR },
2064 { "FIQ", ARM_FT_FIQ },
2065 { "fiq", ARM_FT_FIQ },
2066 { "ABORT", ARM_FT_ISR },
2067 { "abort", ARM_FT_ISR },
2068 { "ABORT", ARM_FT_ISR },
2069 { "abort", ARM_FT_ISR },
2070 { "UNDEF", ARM_FT_EXCEPTION },
2071 { "undef", ARM_FT_EXCEPTION },
2072 { "SWI", ARM_FT_EXCEPTION },
2073 { "swi", ARM_FT_EXCEPTION },
2074 { NULL, ARM_FT_NORMAL }
2075 };
2076
2077 /* Returns the (interrupt) function type of the current
2078 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2079
2080 static unsigned long
2081 arm_isr_value (tree argument)
2082 {
2083 const isr_attribute_arg * ptr;
2084 const char * arg;
2085
2086 if (!arm_arch_notm)
2087 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2088
2089 /* No argument - default to IRQ. */
2090 if (argument == NULL_TREE)
2091 return ARM_FT_ISR;
2092
2093 /* Get the value of the argument. */
2094 if (TREE_VALUE (argument) == NULL_TREE
2095 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2096 return ARM_FT_UNKNOWN;
2097
2098 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2099
2100 /* Check it against the list of known arguments. */
2101 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2102 if (streq (arg, ptr->arg))
2103 return ptr->return_value;
2104
2105 /* An unrecognized interrupt type. */
2106 return ARM_FT_UNKNOWN;
2107 }
2108
2109 /* Computes the type of the current function. */
2110
2111 static unsigned long
2112 arm_compute_func_type (void)
2113 {
2114 unsigned long type = ARM_FT_UNKNOWN;
2115 tree a;
2116 tree attr;
2117
2118 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2119
2120 /* Decide if the current function is volatile. Such functions
2121 never return, and many memory cycles can be saved by not storing
2122 register values that will never be needed again. This optimization
2123 was added to speed up context switching in a kernel application. */
2124 if (optimize > 0
2125 && (TREE_NOTHROW (current_function_decl)
2126 || !(flag_unwind_tables
2127 || (flag_exceptions
2128 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2129 && TREE_THIS_VOLATILE (current_function_decl))
2130 type |= ARM_FT_VOLATILE;
2131
2132 if (cfun->static_chain_decl != NULL)
2133 type |= ARM_FT_NESTED;
2134
2135 attr = DECL_ATTRIBUTES (current_function_decl);
2136
2137 a = lookup_attribute ("naked", attr);
2138 if (a != NULL_TREE)
2139 type |= ARM_FT_NAKED;
2140
2141 a = lookup_attribute ("isr", attr);
2142 if (a == NULL_TREE)
2143 a = lookup_attribute ("interrupt", attr);
2144
2145 if (a == NULL_TREE)
2146 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2147 else
2148 type |= arm_isr_value (TREE_VALUE (a));
2149
2150 return type;
2151 }
2152
2153 /* Returns the type of the current function. */
2154
2155 unsigned long
2156 arm_current_func_type (void)
2157 {
2158 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2159 cfun->machine->func_type = arm_compute_func_type ();
2160
2161 return cfun->machine->func_type;
2162 }
2163
2164 bool
2165 arm_allocate_stack_slots_for_args (void)
2166 {
2167 /* Naked functions should not allocate stack slots for arguments. */
2168 return !IS_NAKED (arm_current_func_type ());
2169 }
2170
2171 \f
2172 /* Output assembler code for a block containing the constant parts
2173 of a trampoline, leaving space for the variable parts.
2174
2175 On the ARM, (if r8 is the static chain regnum, and remembering that
2176 referencing pc adds an offset of 8) the trampoline looks like:
2177 ldr r8, [pc, #0]
2178 ldr pc, [pc]
2179 .word static chain value
2180 .word function's address
2181 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2182
2183 static void
2184 arm_asm_trampoline_template (FILE *f)
2185 {
2186 if (TARGET_ARM)
2187 {
2188 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2189 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2190 }
2191 else if (TARGET_THUMB2)
2192 {
2193 /* The Thumb-2 trampoline is similar to the arm implementation.
2194 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2195 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2196 STATIC_CHAIN_REGNUM, PC_REGNUM);
2197 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2198 }
2199 else
2200 {
2201 ASM_OUTPUT_ALIGN (f, 2);
2202 fprintf (f, "\t.code\t16\n");
2203 fprintf (f, ".Ltrampoline_start:\n");
2204 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2205 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2206 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2207 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2208 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2209 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2210 }
2211 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2212 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2213 }
2214
2215 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2216
2217 static void
2218 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2219 {
2220 rtx fnaddr, mem, a_tramp;
2221
2222 emit_block_move (m_tramp, assemble_trampoline_template (),
2223 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2224
2225 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2226 emit_move_insn (mem, chain_value);
2227
2228 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2229 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2230 emit_move_insn (mem, fnaddr);
2231
2232 a_tramp = XEXP (m_tramp, 0);
2233 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2234 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2235 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
2236 }
2237
2238 /* Thumb trampolines should be entered in thumb mode, so set
2239 the bottom bit of the address. */
2240
2241 static rtx
2242 arm_trampoline_adjust_address (rtx addr)
2243 {
2244 if (TARGET_THUMB)
2245 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2246 NULL, 0, OPTAB_LIB_WIDEN);
2247 return addr;
2248 }
2249 \f
2250 /* Return 1 if it is possible to return using a single instruction.
2251 If SIBLING is non-null, this is a test for a return before a sibling
2252 call. SIBLING is the call insn, so we can examine its register usage. */
2253
2254 int
2255 use_return_insn (int iscond, rtx sibling)
2256 {
2257 int regno;
2258 unsigned int func_type;
2259 unsigned long saved_int_regs;
2260 unsigned HOST_WIDE_INT stack_adjust;
2261 arm_stack_offsets *offsets;
2262
2263 /* Never use a return instruction before reload has run. */
2264 if (!reload_completed)
2265 return 0;
2266
2267 func_type = arm_current_func_type ();
2268
2269 /* Naked, volatile and stack alignment functions need special
2270 consideration. */
2271 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2272 return 0;
2273
2274 /* So do interrupt functions that use the frame pointer and Thumb
2275 interrupt functions. */
2276 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2277 return 0;
2278
2279 offsets = arm_get_frame_offsets ();
2280 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2281
2282 /* As do variadic functions. */
2283 if (crtl->args.pretend_args_size
2284 || cfun->machine->uses_anonymous_args
2285 /* Or if the function calls __builtin_eh_return () */
2286 || crtl->calls_eh_return
2287 /* Or if the function calls alloca */
2288 || cfun->calls_alloca
2289 /* Or if there is a stack adjustment. However, if the stack pointer
2290 is saved on the stack, we can use a pre-incrementing stack load. */
2291 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2292 && stack_adjust == 4)))
2293 return 0;
2294
2295 saved_int_regs = offsets->saved_regs_mask;
2296
2297 /* Unfortunately, the insn
2298
2299 ldmib sp, {..., sp, ...}
2300
2301 triggers a bug on most SA-110 based devices, such that the stack
2302 pointer won't be correctly restored if the instruction takes a
2303 page fault. We work around this problem by popping r3 along with
2304 the other registers, since that is never slower than executing
2305 another instruction.
2306
2307 We test for !arm_arch5 here, because code for any architecture
2308 less than this could potentially be run on one of the buggy
2309 chips. */
2310 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2311 {
2312 /* Validate that r3 is a call-clobbered register (always true in
2313 the default abi) ... */
2314 if (!call_used_regs[3])
2315 return 0;
2316
2317 /* ... that it isn't being used for a return value ... */
2318 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2319 return 0;
2320
2321 /* ... or for a tail-call argument ... */
2322 if (sibling)
2323 {
2324 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2325
2326 if (find_regno_fusage (sibling, USE, 3))
2327 return 0;
2328 }
2329
2330 /* ... and that there are no call-saved registers in r0-r2
2331 (always true in the default ABI). */
2332 if (saved_int_regs & 0x7)
2333 return 0;
2334 }
2335
2336 /* Can't be done if interworking with Thumb, and any registers have been
2337 stacked. */
2338 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2339 return 0;
2340
2341 /* On StrongARM, conditional returns are expensive if they aren't
2342 taken and multiple registers have been stacked. */
2343 if (iscond && arm_tune_strongarm)
2344 {
2345 /* Conditional return when just the LR is stored is a simple
2346 conditional-load instruction, that's not expensive. */
2347 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2348 return 0;
2349
2350 if (flag_pic
2351 && arm_pic_register != INVALID_REGNUM
2352 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2353 return 0;
2354 }
2355
2356 /* If there are saved registers but the LR isn't saved, then we need
2357 two instructions for the return. */
2358 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2359 return 0;
2360
2361 /* Can't be done if any of the VFP regs are pushed,
2362 since this also requires an insn. */
2363 if (TARGET_HARD_FLOAT && TARGET_VFP)
2364 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2365 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2366 return 0;
2367
2368 if (TARGET_REALLY_IWMMXT)
2369 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2370 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2371 return 0;
2372
2373 return 1;
2374 }
2375
2376 /* Return TRUE if int I is a valid immediate ARM constant. */
2377
2378 int
2379 const_ok_for_arm (HOST_WIDE_INT i)
2380 {
2381 int lowbit;
2382
2383 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2384 be all zero, or all one. */
2385 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2386 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2387 != ((~(unsigned HOST_WIDE_INT) 0)
2388 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2389 return FALSE;
2390
2391 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2392
2393 /* Fast return for 0 and small values. We must do this for zero, since
2394 the code below can't handle that one case. */
2395 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2396 return TRUE;
2397
2398 /* Get the number of trailing zeros. */
2399 lowbit = ffs((int) i) - 1;
2400
2401 /* Only even shifts are allowed in ARM mode so round down to the
2402 nearest even number. */
2403 if (TARGET_ARM)
2404 lowbit &= ~1;
2405
2406 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2407 return TRUE;
2408
2409 if (TARGET_ARM)
2410 {
2411 /* Allow rotated constants in ARM mode. */
2412 if (lowbit <= 4
2413 && ((i & ~0xc000003f) == 0
2414 || (i & ~0xf000000f) == 0
2415 || (i & ~0xfc000003) == 0))
2416 return TRUE;
2417 }
2418 else
2419 {
2420 HOST_WIDE_INT v;
2421
2422 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2423 v = i & 0xff;
2424 v |= v << 16;
2425 if (i == v || i == (v | (v << 8)))
2426 return TRUE;
2427
2428 /* Allow repeated pattern 0xXY00XY00. */
2429 v = i & 0xff00;
2430 v |= v << 16;
2431 if (i == v)
2432 return TRUE;
2433 }
2434
2435 return FALSE;
2436 }
2437
2438 /* Return true if I is a valid constant for the operation CODE. */
2439 int
2440 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2441 {
2442 if (const_ok_for_arm (i))
2443 return 1;
2444
2445 switch (code)
2446 {
2447 case SET:
2448 /* See if we can use movw. */
2449 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2450 return 1;
2451 else
2452 /* Otherwise, try mvn. */
2453 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2454
2455 case PLUS:
2456 /* See if we can use addw or subw. */
2457 if (TARGET_THUMB2
2458 && ((i & 0xfffff000) == 0
2459 || ((-i) & 0xfffff000) == 0))
2460 return 1;
2461 /* else fall through. */
2462
2463 case COMPARE:
2464 case EQ:
2465 case NE:
2466 case GT:
2467 case LE:
2468 case LT:
2469 case GE:
2470 case GEU:
2471 case LTU:
2472 case GTU:
2473 case LEU:
2474 case UNORDERED:
2475 case ORDERED:
2476 case UNEQ:
2477 case UNGE:
2478 case UNLT:
2479 case UNGT:
2480 case UNLE:
2481 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2482
2483 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2484 case XOR:
2485 return 0;
2486
2487 case IOR:
2488 if (TARGET_THUMB2)
2489 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2490 return 0;
2491
2492 case AND:
2493 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2494
2495 default:
2496 gcc_unreachable ();
2497 }
2498 }
2499
2500 /* Return true if I is a valid di mode constant for the operation CODE. */
2501 int
2502 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
2503 {
2504 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
2505 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
2506 rtx hi = GEN_INT (hi_val);
2507 rtx lo = GEN_INT (lo_val);
2508
2509 if (TARGET_THUMB1)
2510 return 0;
2511
2512 switch (code)
2513 {
2514 case PLUS:
2515 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
2516
2517 default:
2518 return 0;
2519 }
2520 }
2521
2522 /* Emit a sequence of insns to handle a large constant.
2523 CODE is the code of the operation required, it can be any of SET, PLUS,
2524 IOR, AND, XOR, MINUS;
2525 MODE is the mode in which the operation is being performed;
2526 VAL is the integer to operate on;
2527 SOURCE is the other operand (a register, or a null-pointer for SET);
2528 SUBTARGETS means it is safe to create scratch registers if that will
2529 either produce a simpler sequence, or we will want to cse the values.
2530 Return value is the number of insns emitted. */
2531
2532 /* ??? Tweak this for thumb2. */
2533 int
2534 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2535 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2536 {
2537 rtx cond;
2538
2539 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2540 cond = COND_EXEC_TEST (PATTERN (insn));
2541 else
2542 cond = NULL_RTX;
2543
2544 if (subtargets || code == SET
2545 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2546 && REGNO (target) != REGNO (source)))
2547 {
2548 /* After arm_reorg has been called, we can't fix up expensive
2549 constants by pushing them into memory so we must synthesize
2550 them in-line, regardless of the cost. This is only likely to
2551 be more costly on chips that have load delay slots and we are
2552 compiling without running the scheduler (so no splitting
2553 occurred before the final instruction emission).
2554
2555 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2556 */
2557 if (!after_arm_reorg
2558 && !cond
2559 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2560 1, 0)
2561 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2562 + (code != SET))))
2563 {
2564 if (code == SET)
2565 {
2566 /* Currently SET is the only monadic value for CODE, all
2567 the rest are diadic. */
2568 if (TARGET_USE_MOVT)
2569 arm_emit_movpair (target, GEN_INT (val));
2570 else
2571 emit_set_insn (target, GEN_INT (val));
2572
2573 return 1;
2574 }
2575 else
2576 {
2577 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2578
2579 if (TARGET_USE_MOVT)
2580 arm_emit_movpair (temp, GEN_INT (val));
2581 else
2582 emit_set_insn (temp, GEN_INT (val));
2583
2584 /* For MINUS, the value is subtracted from, since we never
2585 have subtraction of a constant. */
2586 if (code == MINUS)
2587 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2588 else
2589 emit_set_insn (target,
2590 gen_rtx_fmt_ee (code, mode, source, temp));
2591 return 2;
2592 }
2593 }
2594 }
2595
2596 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2597 1);
2598 }
2599
2600 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2601 ARM/THUMB2 immediates, and add up to VAL.
2602 Thr function return value gives the number of insns required. */
2603 static int
2604 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
2605 struct four_ints *return_sequence)
2606 {
2607 int best_consecutive_zeros = 0;
2608 int i;
2609 int best_start = 0;
2610 int insns1, insns2;
2611 struct four_ints tmp_sequence;
2612
2613 /* If we aren't targeting ARM, the best place to start is always at
2614 the bottom, otherwise look more closely. */
2615 if (TARGET_ARM)
2616 {
2617 for (i = 0; i < 32; i += 2)
2618 {
2619 int consecutive_zeros = 0;
2620
2621 if (!(val & (3 << i)))
2622 {
2623 while ((i < 32) && !(val & (3 << i)))
2624 {
2625 consecutive_zeros += 2;
2626 i += 2;
2627 }
2628 if (consecutive_zeros > best_consecutive_zeros)
2629 {
2630 best_consecutive_zeros = consecutive_zeros;
2631 best_start = i - consecutive_zeros;
2632 }
2633 i -= 2;
2634 }
2635 }
2636 }
2637
2638 /* So long as it won't require any more insns to do so, it's
2639 desirable to emit a small constant (in bits 0...9) in the last
2640 insn. This way there is more chance that it can be combined with
2641 a later addressing insn to form a pre-indexed load or store
2642 operation. Consider:
2643
2644 *((volatile int *)0xe0000100) = 1;
2645 *((volatile int *)0xe0000110) = 2;
2646
2647 We want this to wind up as:
2648
2649 mov rA, #0xe0000000
2650 mov rB, #1
2651 str rB, [rA, #0x100]
2652 mov rB, #2
2653 str rB, [rA, #0x110]
2654
2655 rather than having to synthesize both large constants from scratch.
2656
2657 Therefore, we calculate how many insns would be required to emit
2658 the constant starting from `best_start', and also starting from
2659 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2660 yield a shorter sequence, we may as well use zero. */
2661 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
2662 if (best_start != 0
2663 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
2664 {
2665 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
2666 if (insns2 <= insns1)
2667 {
2668 *return_sequence = tmp_sequence;
2669 insns1 = insns2;
2670 }
2671 }
2672
2673 return insns1;
2674 }
2675
2676 /* As for optimal_immediate_sequence, but starting at bit-position I. */
2677 static int
2678 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
2679 struct four_ints *return_sequence, int i)
2680 {
2681 int remainder = val & 0xffffffff;
2682 int insns = 0;
2683
2684 /* Try and find a way of doing the job in either two or three
2685 instructions.
2686
2687 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2688 location. We start at position I. This may be the MSB, or
2689 optimial_immediate_sequence may have positioned it at the largest block
2690 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2691 wrapping around to the top of the word when we drop off the bottom.
2692 In the worst case this code should produce no more than four insns.
2693
2694 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2695 constants, shifted to any arbitrary location. We should always start
2696 at the MSB. */
2697 do
2698 {
2699 int end;
2700 unsigned int b1, b2, b3, b4;
2701 unsigned HOST_WIDE_INT result;
2702 int loc;
2703
2704 gcc_assert (insns < 4);
2705
2706 if (i <= 0)
2707 i += 32;
2708
2709 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
2710 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
2711 {
2712 loc = i;
2713 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
2714 /* We can use addw/subw for the last 12 bits. */
2715 result = remainder;
2716 else
2717 {
2718 /* Use an 8-bit shifted/rotated immediate. */
2719 end = i - 8;
2720 if (end < 0)
2721 end += 32;
2722 result = remainder & ((0x0ff << end)
2723 | ((i < end) ? (0xff >> (32 - end))
2724 : 0));
2725 i -= 8;
2726 }
2727 }
2728 else
2729 {
2730 /* Arm allows rotates by a multiple of two. Thumb-2 allows
2731 arbitrary shifts. */
2732 i -= TARGET_ARM ? 2 : 1;
2733 continue;
2734 }
2735
2736 /* Next, see if we can do a better job with a thumb2 replicated
2737 constant.
2738
2739 We do it this way around to catch the cases like 0x01F001E0 where
2740 two 8-bit immediates would work, but a replicated constant would
2741 make it worse.
2742
2743 TODO: 16-bit constants that don't clear all the bits, but still win.
2744 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
2745 if (TARGET_THUMB2)
2746 {
2747 b1 = (remainder & 0xff000000) >> 24;
2748 b2 = (remainder & 0x00ff0000) >> 16;
2749 b3 = (remainder & 0x0000ff00) >> 8;
2750 b4 = remainder & 0xff;
2751
2752 if (loc > 24)
2753 {
2754 /* The 8-bit immediate already found clears b1 (and maybe b2),
2755 but must leave b3 and b4 alone. */
2756
2757 /* First try to find a 32-bit replicated constant that clears
2758 almost everything. We can assume that we can't do it in one,
2759 or else we wouldn't be here. */
2760 unsigned int tmp = b1 & b2 & b3 & b4;
2761 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
2762 + (tmp << 24);
2763 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
2764 + (tmp == b3) + (tmp == b4);
2765 if (tmp
2766 && (matching_bytes >= 3
2767 || (matching_bytes == 2
2768 && const_ok_for_op (remainder & ~tmp2, code))))
2769 {
2770 /* At least 3 of the bytes match, and the fourth has at
2771 least as many bits set, or two of the bytes match
2772 and it will only require one more insn to finish. */
2773 result = tmp2;
2774 i = tmp != b1 ? 32
2775 : tmp != b2 ? 24
2776 : tmp != b3 ? 16
2777 : 8;
2778 }
2779
2780 /* Second, try to find a 16-bit replicated constant that can
2781 leave three of the bytes clear. If b2 or b4 is already
2782 zero, then we can. If the 8-bit from above would not
2783 clear b2 anyway, then we still win. */
2784 else if (b1 == b3 && (!b2 || !b4
2785 || (remainder & 0x00ff0000 & ~result)))
2786 {
2787 result = remainder & 0xff00ff00;
2788 i = 24;
2789 }
2790 }
2791 else if (loc > 16)
2792 {
2793 /* The 8-bit immediate already found clears b2 (and maybe b3)
2794 and we don't get here unless b1 is alredy clear, but it will
2795 leave b4 unchanged. */
2796
2797 /* If we can clear b2 and b4 at once, then we win, since the
2798 8-bits couldn't possibly reach that far. */
2799 if (b2 == b4)
2800 {
2801 result = remainder & 0x00ff00ff;
2802 i = 16;
2803 }
2804 }
2805 }
2806
2807 return_sequence->i[insns++] = result;
2808 remainder &= ~result;
2809
2810 if (code == SET || code == MINUS)
2811 code = PLUS;
2812 }
2813 while (remainder);
2814
2815 return insns;
2816 }
2817
2818 /* Emit an instruction with the indicated PATTERN. If COND is
2819 non-NULL, conditionalize the execution of the instruction on COND
2820 being true. */
2821
2822 static void
2823 emit_constant_insn (rtx cond, rtx pattern)
2824 {
2825 if (cond)
2826 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2827 emit_insn (pattern);
2828 }
2829
2830 /* As above, but extra parameter GENERATE which, if clear, suppresses
2831 RTL generation. */
2832
2833 static int
2834 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2835 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2836 int generate)
2837 {
2838 int can_invert = 0;
2839 int can_negate = 0;
2840 int final_invert = 0;
2841 int i;
2842 int set_sign_bit_copies = 0;
2843 int clear_sign_bit_copies = 0;
2844 int clear_zero_bit_copies = 0;
2845 int set_zero_bit_copies = 0;
2846 int insns = 0, neg_insns, inv_insns;
2847 unsigned HOST_WIDE_INT temp1, temp2;
2848 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2849 struct four_ints *immediates;
2850 struct four_ints pos_immediates, neg_immediates, inv_immediates;
2851
2852 /* Find out which operations are safe for a given CODE. Also do a quick
2853 check for degenerate cases; these can occur when DImode operations
2854 are split. */
2855 switch (code)
2856 {
2857 case SET:
2858 can_invert = 1;
2859 break;
2860
2861 case PLUS:
2862 can_negate = 1;
2863 break;
2864
2865 case IOR:
2866 if (remainder == 0xffffffff)
2867 {
2868 if (generate)
2869 emit_constant_insn (cond,
2870 gen_rtx_SET (VOIDmode, target,
2871 GEN_INT (ARM_SIGN_EXTEND (val))));
2872 return 1;
2873 }
2874
2875 if (remainder == 0)
2876 {
2877 if (reload_completed && rtx_equal_p (target, source))
2878 return 0;
2879
2880 if (generate)
2881 emit_constant_insn (cond,
2882 gen_rtx_SET (VOIDmode, target, source));
2883 return 1;
2884 }
2885 break;
2886
2887 case AND:
2888 if (remainder == 0)
2889 {
2890 if (generate)
2891 emit_constant_insn (cond,
2892 gen_rtx_SET (VOIDmode, target, const0_rtx));
2893 return 1;
2894 }
2895 if (remainder == 0xffffffff)
2896 {
2897 if (reload_completed && rtx_equal_p (target, source))
2898 return 0;
2899 if (generate)
2900 emit_constant_insn (cond,
2901 gen_rtx_SET (VOIDmode, target, source));
2902 return 1;
2903 }
2904 can_invert = 1;
2905 break;
2906
2907 case XOR:
2908 if (remainder == 0)
2909 {
2910 if (reload_completed && rtx_equal_p (target, source))
2911 return 0;
2912 if (generate)
2913 emit_constant_insn (cond,
2914 gen_rtx_SET (VOIDmode, target, source));
2915 return 1;
2916 }
2917
2918 if (remainder == 0xffffffff)
2919 {
2920 if (generate)
2921 emit_constant_insn (cond,
2922 gen_rtx_SET (VOIDmode, target,
2923 gen_rtx_NOT (mode, source)));
2924 return 1;
2925 }
2926 final_invert = 1;
2927 break;
2928
2929 case MINUS:
2930 /* We treat MINUS as (val - source), since (source - val) is always
2931 passed as (source + (-val)). */
2932 if (remainder == 0)
2933 {
2934 if (generate)
2935 emit_constant_insn (cond,
2936 gen_rtx_SET (VOIDmode, target,
2937 gen_rtx_NEG (mode, source)));
2938 return 1;
2939 }
2940 if (const_ok_for_arm (val))
2941 {
2942 if (generate)
2943 emit_constant_insn (cond,
2944 gen_rtx_SET (VOIDmode, target,
2945 gen_rtx_MINUS (mode, GEN_INT (val),
2946 source)));
2947 return 1;
2948 }
2949
2950 break;
2951
2952 default:
2953 gcc_unreachable ();
2954 }
2955
2956 /* If we can do it in one insn get out quickly. */
2957 if (const_ok_for_op (val, code))
2958 {
2959 if (generate)
2960 emit_constant_insn (cond,
2961 gen_rtx_SET (VOIDmode, target,
2962 (source
2963 ? gen_rtx_fmt_ee (code, mode, source,
2964 GEN_INT (val))
2965 : GEN_INT (val))));
2966 return 1;
2967 }
2968
2969 /* Calculate a few attributes that may be useful for specific
2970 optimizations. */
2971 /* Count number of leading zeros. */
2972 for (i = 31; i >= 0; i--)
2973 {
2974 if ((remainder & (1 << i)) == 0)
2975 clear_sign_bit_copies++;
2976 else
2977 break;
2978 }
2979
2980 /* Count number of leading 1's. */
2981 for (i = 31; i >= 0; i--)
2982 {
2983 if ((remainder & (1 << i)) != 0)
2984 set_sign_bit_copies++;
2985 else
2986 break;
2987 }
2988
2989 /* Count number of trailing zero's. */
2990 for (i = 0; i <= 31; i++)
2991 {
2992 if ((remainder & (1 << i)) == 0)
2993 clear_zero_bit_copies++;
2994 else
2995 break;
2996 }
2997
2998 /* Count number of trailing 1's. */
2999 for (i = 0; i <= 31; i++)
3000 {
3001 if ((remainder & (1 << i)) != 0)
3002 set_zero_bit_copies++;
3003 else
3004 break;
3005 }
3006
3007 switch (code)
3008 {
3009 case SET:
3010 /* See if we can do this by sign_extending a constant that is known
3011 to be negative. This is a good, way of doing it, since the shift
3012 may well merge into a subsequent insn. */
3013 if (set_sign_bit_copies > 1)
3014 {
3015 if (const_ok_for_arm
3016 (temp1 = ARM_SIGN_EXTEND (remainder
3017 << (set_sign_bit_copies - 1))))
3018 {
3019 if (generate)
3020 {
3021 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3022 emit_constant_insn (cond,
3023 gen_rtx_SET (VOIDmode, new_src,
3024 GEN_INT (temp1)));
3025 emit_constant_insn (cond,
3026 gen_ashrsi3 (target, new_src,
3027 GEN_INT (set_sign_bit_copies - 1)));
3028 }
3029 return 2;
3030 }
3031 /* For an inverted constant, we will need to set the low bits,
3032 these will be shifted out of harm's way. */
3033 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3034 if (const_ok_for_arm (~temp1))
3035 {
3036 if (generate)
3037 {
3038 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3039 emit_constant_insn (cond,
3040 gen_rtx_SET (VOIDmode, new_src,
3041 GEN_INT (temp1)));
3042 emit_constant_insn (cond,
3043 gen_ashrsi3 (target, new_src,
3044 GEN_INT (set_sign_bit_copies - 1)));
3045 }
3046 return 2;
3047 }
3048 }
3049
3050 /* See if we can calculate the value as the difference between two
3051 valid immediates. */
3052 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3053 {
3054 int topshift = clear_sign_bit_copies & ~1;
3055
3056 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3057 & (0xff000000 >> topshift));
3058
3059 /* If temp1 is zero, then that means the 9 most significant
3060 bits of remainder were 1 and we've caused it to overflow.
3061 When topshift is 0 we don't need to do anything since we
3062 can borrow from 'bit 32'. */
3063 if (temp1 == 0 && topshift != 0)
3064 temp1 = 0x80000000 >> (topshift - 1);
3065
3066 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3067
3068 if (const_ok_for_arm (temp2))
3069 {
3070 if (generate)
3071 {
3072 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3073 emit_constant_insn (cond,
3074 gen_rtx_SET (VOIDmode, new_src,
3075 GEN_INT (temp1)));
3076 emit_constant_insn (cond,
3077 gen_addsi3 (target, new_src,
3078 GEN_INT (-temp2)));
3079 }
3080
3081 return 2;
3082 }
3083 }
3084
3085 /* See if we can generate this by setting the bottom (or the top)
3086 16 bits, and then shifting these into the other half of the
3087 word. We only look for the simplest cases, to do more would cost
3088 too much. Be careful, however, not to generate this when the
3089 alternative would take fewer insns. */
3090 if (val & 0xffff0000)
3091 {
3092 temp1 = remainder & 0xffff0000;
3093 temp2 = remainder & 0x0000ffff;
3094
3095 /* Overlaps outside this range are best done using other methods. */
3096 for (i = 9; i < 24; i++)
3097 {
3098 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3099 && !const_ok_for_arm (temp2))
3100 {
3101 rtx new_src = (subtargets
3102 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3103 : target);
3104 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3105 source, subtargets, generate);
3106 source = new_src;
3107 if (generate)
3108 emit_constant_insn
3109 (cond,
3110 gen_rtx_SET
3111 (VOIDmode, target,
3112 gen_rtx_IOR (mode,
3113 gen_rtx_ASHIFT (mode, source,
3114 GEN_INT (i)),
3115 source)));
3116 return insns + 1;
3117 }
3118 }
3119
3120 /* Don't duplicate cases already considered. */
3121 for (i = 17; i < 24; i++)
3122 {
3123 if (((temp1 | (temp1 >> i)) == remainder)
3124 && !const_ok_for_arm (temp1))
3125 {
3126 rtx new_src = (subtargets
3127 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3128 : target);
3129 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3130 source, subtargets, generate);
3131 source = new_src;
3132 if (generate)
3133 emit_constant_insn
3134 (cond,
3135 gen_rtx_SET (VOIDmode, target,
3136 gen_rtx_IOR
3137 (mode,
3138 gen_rtx_LSHIFTRT (mode, source,
3139 GEN_INT (i)),
3140 source)));
3141 return insns + 1;
3142 }
3143 }
3144 }
3145 break;
3146
3147 case IOR:
3148 case XOR:
3149 /* If we have IOR or XOR, and the constant can be loaded in a
3150 single instruction, and we can find a temporary to put it in,
3151 then this can be done in two instructions instead of 3-4. */
3152 if (subtargets
3153 /* TARGET can't be NULL if SUBTARGETS is 0 */
3154 || (reload_completed && !reg_mentioned_p (target, source)))
3155 {
3156 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3157 {
3158 if (generate)
3159 {
3160 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3161
3162 emit_constant_insn (cond,
3163 gen_rtx_SET (VOIDmode, sub,
3164 GEN_INT (val)));
3165 emit_constant_insn (cond,
3166 gen_rtx_SET (VOIDmode, target,
3167 gen_rtx_fmt_ee (code, mode,
3168 source, sub)));
3169 }
3170 return 2;
3171 }
3172 }
3173
3174 if (code == XOR)
3175 break;
3176
3177 /* Convert.
3178 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3179 and the remainder 0s for e.g. 0xfff00000)
3180 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3181
3182 This can be done in 2 instructions by using shifts with mov or mvn.
3183 e.g. for
3184 x = x | 0xfff00000;
3185 we generate.
3186 mvn r0, r0, asl #12
3187 mvn r0, r0, lsr #12 */
3188 if (set_sign_bit_copies > 8
3189 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3190 {
3191 if (generate)
3192 {
3193 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3194 rtx shift = GEN_INT (set_sign_bit_copies);
3195
3196 emit_constant_insn
3197 (cond,
3198 gen_rtx_SET (VOIDmode, sub,
3199 gen_rtx_NOT (mode,
3200 gen_rtx_ASHIFT (mode,
3201 source,
3202 shift))));
3203 emit_constant_insn
3204 (cond,
3205 gen_rtx_SET (VOIDmode, target,
3206 gen_rtx_NOT (mode,
3207 gen_rtx_LSHIFTRT (mode, sub,
3208 shift))));
3209 }
3210 return 2;
3211 }
3212
3213 /* Convert
3214 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3215 to
3216 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3217
3218 For eg. r0 = r0 | 0xfff
3219 mvn r0, r0, lsr #12
3220 mvn r0, r0, asl #12
3221
3222 */
3223 if (set_zero_bit_copies > 8
3224 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3225 {
3226 if (generate)
3227 {
3228 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3229 rtx shift = GEN_INT (set_zero_bit_copies);
3230
3231 emit_constant_insn
3232 (cond,
3233 gen_rtx_SET (VOIDmode, sub,
3234 gen_rtx_NOT (mode,
3235 gen_rtx_LSHIFTRT (mode,
3236 source,
3237 shift))));
3238 emit_constant_insn
3239 (cond,
3240 gen_rtx_SET (VOIDmode, target,
3241 gen_rtx_NOT (mode,
3242 gen_rtx_ASHIFT (mode, sub,
3243 shift))));
3244 }
3245 return 2;
3246 }
3247
3248 /* This will never be reached for Thumb2 because orn is a valid
3249 instruction. This is for Thumb1 and the ARM 32 bit cases.
3250
3251 x = y | constant (such that ~constant is a valid constant)
3252 Transform this to
3253 x = ~(~y & ~constant).
3254 */
3255 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3256 {
3257 if (generate)
3258 {
3259 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3260 emit_constant_insn (cond,
3261 gen_rtx_SET (VOIDmode, sub,
3262 gen_rtx_NOT (mode, source)));
3263 source = sub;
3264 if (subtargets)
3265 sub = gen_reg_rtx (mode);
3266 emit_constant_insn (cond,
3267 gen_rtx_SET (VOIDmode, sub,
3268 gen_rtx_AND (mode, source,
3269 GEN_INT (temp1))));
3270 emit_constant_insn (cond,
3271 gen_rtx_SET (VOIDmode, target,
3272 gen_rtx_NOT (mode, sub)));
3273 }
3274 return 3;
3275 }
3276 break;
3277
3278 case AND:
3279 /* See if two shifts will do 2 or more insn's worth of work. */
3280 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3281 {
3282 HOST_WIDE_INT shift_mask = ((0xffffffff
3283 << (32 - clear_sign_bit_copies))
3284 & 0xffffffff);
3285
3286 if ((remainder | shift_mask) != 0xffffffff)
3287 {
3288 if (generate)
3289 {
3290 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3291 insns = arm_gen_constant (AND, mode, cond,
3292 remainder | shift_mask,
3293 new_src, source, subtargets, 1);
3294 source = new_src;
3295 }
3296 else
3297 {
3298 rtx targ = subtargets ? NULL_RTX : target;
3299 insns = arm_gen_constant (AND, mode, cond,
3300 remainder | shift_mask,
3301 targ, source, subtargets, 0);
3302 }
3303 }
3304
3305 if (generate)
3306 {
3307 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3308 rtx shift = GEN_INT (clear_sign_bit_copies);
3309
3310 emit_insn (gen_ashlsi3 (new_src, source, shift));
3311 emit_insn (gen_lshrsi3 (target, new_src, shift));
3312 }
3313
3314 return insns + 2;
3315 }
3316
3317 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3318 {
3319 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3320
3321 if ((remainder | shift_mask) != 0xffffffff)
3322 {
3323 if (generate)
3324 {
3325 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3326
3327 insns = arm_gen_constant (AND, mode, cond,
3328 remainder | shift_mask,
3329 new_src, source, subtargets, 1);
3330 source = new_src;
3331 }
3332 else
3333 {
3334 rtx targ = subtargets ? NULL_RTX : target;
3335
3336 insns = arm_gen_constant (AND, mode, cond,
3337 remainder | shift_mask,
3338 targ, source, subtargets, 0);
3339 }
3340 }
3341
3342 if (generate)
3343 {
3344 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3345 rtx shift = GEN_INT (clear_zero_bit_copies);
3346
3347 emit_insn (gen_lshrsi3 (new_src, source, shift));
3348 emit_insn (gen_ashlsi3 (target, new_src, shift));
3349 }
3350
3351 return insns + 2;
3352 }
3353
3354 break;
3355
3356 default:
3357 break;
3358 }
3359
3360 /* Calculate what the instruction sequences would be if we generated it
3361 normally, negated, or inverted. */
3362 if (code == AND)
3363 /* AND cannot be split into multiple insns, so invert and use BIC. */
3364 insns = 99;
3365 else
3366 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3367
3368 if (can_negate)
3369 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3370 &neg_immediates);
3371 else
3372 neg_insns = 99;
3373
3374 if (can_invert || final_invert)
3375 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3376 &inv_immediates);
3377 else
3378 inv_insns = 99;
3379
3380 immediates = &pos_immediates;
3381
3382 /* Is the negated immediate sequence more efficient? */
3383 if (neg_insns < insns && neg_insns <= inv_insns)
3384 {
3385 insns = neg_insns;
3386 immediates = &neg_immediates;
3387 }
3388 else
3389 can_negate = 0;
3390
3391 /* Is the inverted immediate sequence more efficient?
3392 We must allow for an extra NOT instruction for XOR operations, although
3393 there is some chance that the final 'mvn' will get optimized later. */
3394 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
3395 {
3396 insns = inv_insns;
3397 immediates = &inv_immediates;
3398 }
3399 else
3400 {
3401 can_invert = 0;
3402 final_invert = 0;
3403 }
3404
3405 /* Now output the chosen sequence as instructions. */
3406 if (generate)
3407 {
3408 for (i = 0; i < insns; i++)
3409 {
3410 rtx new_src, temp1_rtx;
3411
3412 temp1 = immediates->i[i];
3413
3414 if (code == SET || code == MINUS)
3415 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3416 else if ((final_invert || i < (insns - 1)) && subtargets)
3417 new_src = gen_reg_rtx (mode);
3418 else
3419 new_src = target;
3420
3421 if (can_invert)
3422 temp1 = ~temp1;
3423 else if (can_negate)
3424 temp1 = -temp1;
3425
3426 temp1 = trunc_int_for_mode (temp1, mode);
3427 temp1_rtx = GEN_INT (temp1);
3428
3429 if (code == SET)
3430 ;
3431 else if (code == MINUS)
3432 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3433 else
3434 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3435
3436 emit_constant_insn (cond,
3437 gen_rtx_SET (VOIDmode, new_src,
3438 temp1_rtx));
3439 source = new_src;
3440
3441 if (code == SET)
3442 {
3443 can_negate = can_invert;
3444 can_invert = 0;
3445 code = PLUS;
3446 }
3447 else if (code == MINUS)
3448 code = PLUS;
3449 }
3450 }
3451
3452 if (final_invert)
3453 {
3454 if (generate)
3455 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3456 gen_rtx_NOT (mode, source)));
3457 insns++;
3458 }
3459
3460 return insns;
3461 }
3462
3463 /* Canonicalize a comparison so that we are more likely to recognize it.
3464 This can be done for a few constant compares, where we can make the
3465 immediate value easier to load. */
3466
3467 enum rtx_code
3468 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3469 {
3470 enum machine_mode mode;
3471 unsigned HOST_WIDE_INT i, maxval;
3472
3473 mode = GET_MODE (*op0);
3474 if (mode == VOIDmode)
3475 mode = GET_MODE (*op1);
3476
3477 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3478
3479 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3480 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3481 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3482 for GTU/LEU in Thumb mode. */
3483 if (mode == DImode)
3484 {
3485 rtx tem;
3486
3487 if (code == GT || code == LE
3488 || (!TARGET_ARM && (code == GTU || code == LEU)))
3489 {
3490 /* Missing comparison. First try to use an available
3491 comparison. */
3492 if (GET_CODE (*op1) == CONST_INT)
3493 {
3494 i = INTVAL (*op1);
3495 switch (code)
3496 {
3497 case GT:
3498 case LE:
3499 if (i != maxval
3500 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3501 {
3502 *op1 = GEN_INT (i + 1);
3503 return code == GT ? GE : LT;
3504 }
3505 break;
3506 case GTU:
3507 case LEU:
3508 if (i != ~((unsigned HOST_WIDE_INT) 0)
3509 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3510 {
3511 *op1 = GEN_INT (i + 1);
3512 return code == GTU ? GEU : LTU;
3513 }
3514 break;
3515 default:
3516 gcc_unreachable ();
3517 }
3518 }
3519
3520 /* If that did not work, reverse the condition. */
3521 tem = *op0;
3522 *op0 = *op1;
3523 *op1 = tem;
3524 return swap_condition (code);
3525 }
3526
3527 return code;
3528 }
3529
3530 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3531 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3532 to facilitate possible combining with a cmp into 'ands'. */
3533 if (mode == SImode
3534 && GET_CODE (*op0) == ZERO_EXTEND
3535 && GET_CODE (XEXP (*op0, 0)) == SUBREG
3536 && GET_MODE (XEXP (*op0, 0)) == QImode
3537 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
3538 && subreg_lowpart_p (XEXP (*op0, 0))
3539 && *op1 == const0_rtx)
3540 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
3541 GEN_INT (255));
3542
3543 /* Comparisons smaller than DImode. Only adjust comparisons against
3544 an out-of-range constant. */
3545 if (GET_CODE (*op1) != CONST_INT
3546 || const_ok_for_arm (INTVAL (*op1))
3547 || const_ok_for_arm (- INTVAL (*op1)))
3548 return code;
3549
3550 i = INTVAL (*op1);
3551
3552 switch (code)
3553 {
3554 case EQ:
3555 case NE:
3556 return code;
3557
3558 case GT:
3559 case LE:
3560 if (i != maxval
3561 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3562 {
3563 *op1 = GEN_INT (i + 1);
3564 return code == GT ? GE : LT;
3565 }
3566 break;
3567
3568 case GE:
3569 case LT:
3570 if (i != ~maxval
3571 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3572 {
3573 *op1 = GEN_INT (i - 1);
3574 return code == GE ? GT : LE;
3575 }
3576 break;
3577
3578 case GTU:
3579 case LEU:
3580 if (i != ~((unsigned HOST_WIDE_INT) 0)
3581 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3582 {
3583 *op1 = GEN_INT (i + 1);
3584 return code == GTU ? GEU : LTU;
3585 }
3586 break;
3587
3588 case GEU:
3589 case LTU:
3590 if (i != 0
3591 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3592 {
3593 *op1 = GEN_INT (i - 1);
3594 return code == GEU ? GTU : LEU;
3595 }
3596 break;
3597
3598 default:
3599 gcc_unreachable ();
3600 }
3601
3602 return code;
3603 }
3604
3605
3606 /* Define how to find the value returned by a function. */
3607
3608 static rtx
3609 arm_function_value(const_tree type, const_tree func,
3610 bool outgoing ATTRIBUTE_UNUSED)
3611 {
3612 enum machine_mode mode;
3613 int unsignedp ATTRIBUTE_UNUSED;
3614 rtx r ATTRIBUTE_UNUSED;
3615
3616 mode = TYPE_MODE (type);
3617
3618 if (TARGET_AAPCS_BASED)
3619 return aapcs_allocate_return_reg (mode, type, func);
3620
3621 /* Promote integer types. */
3622 if (INTEGRAL_TYPE_P (type))
3623 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3624
3625 /* Promotes small structs returned in a register to full-word size
3626 for big-endian AAPCS. */
3627 if (arm_return_in_msb (type))
3628 {
3629 HOST_WIDE_INT size = int_size_in_bytes (type);
3630 if (size % UNITS_PER_WORD != 0)
3631 {
3632 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3633 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3634 }
3635 }
3636
3637 return arm_libcall_value_1 (mode);
3638 }
3639
3640 static int
3641 libcall_eq (const void *p1, const void *p2)
3642 {
3643 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3644 }
3645
3646 static hashval_t
3647 libcall_hash (const void *p1)
3648 {
3649 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3650 }
3651
3652 static void
3653 add_libcall (htab_t htab, rtx libcall)
3654 {
3655 *htab_find_slot (htab, libcall, INSERT) = libcall;
3656 }
3657
3658 static bool
3659 arm_libcall_uses_aapcs_base (const_rtx libcall)
3660 {
3661 static bool init_done = false;
3662 static htab_t libcall_htab;
3663
3664 if (!init_done)
3665 {
3666 init_done = true;
3667
3668 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3669 NULL);
3670 add_libcall (libcall_htab,
3671 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3672 add_libcall (libcall_htab,
3673 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3674 add_libcall (libcall_htab,
3675 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3676 add_libcall (libcall_htab,
3677 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3678
3679 add_libcall (libcall_htab,
3680 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3681 add_libcall (libcall_htab,
3682 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3683 add_libcall (libcall_htab,
3684 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3685 add_libcall (libcall_htab,
3686 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3687
3688 add_libcall (libcall_htab,
3689 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3690 add_libcall (libcall_htab,
3691 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3692 add_libcall (libcall_htab,
3693 convert_optab_libfunc (sfix_optab, SImode, DFmode));
3694 add_libcall (libcall_htab,
3695 convert_optab_libfunc (ufix_optab, SImode, DFmode));
3696 add_libcall (libcall_htab,
3697 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3698 add_libcall (libcall_htab,
3699 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3700 add_libcall (libcall_htab,
3701 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3702 add_libcall (libcall_htab,
3703 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3704
3705 /* Values from double-precision helper functions are returned in core
3706 registers if the selected core only supports single-precision
3707 arithmetic, even if we are using the hard-float ABI. The same is
3708 true for single-precision helpers, but we will never be using the
3709 hard-float ABI on a CPU which doesn't support single-precision
3710 operations in hardware. */
3711 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3712 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3713 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3714 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3715 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3716 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3717 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3718 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3719 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3720 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3721 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3722 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3723 SFmode));
3724 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3725 DFmode));
3726 }
3727
3728 return libcall && htab_find (libcall_htab, libcall) != NULL;
3729 }
3730
3731 static rtx
3732 arm_libcall_value_1 (enum machine_mode mode)
3733 {
3734 if (TARGET_AAPCS_BASED)
3735 return aapcs_libcall_value (mode);
3736 else if (TARGET_IWMMXT_ABI
3737 && arm_vector_mode_supported_p (mode))
3738 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
3739 else
3740 return gen_rtx_REG (mode, ARG_REGISTER (1));
3741 }
3742
3743 /* Define how to find the value returned by a library function
3744 assuming the value has mode MODE. */
3745
3746 static rtx
3747 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3748 {
3749 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3750 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3751 {
3752 /* The following libcalls return their result in integer registers,
3753 even though they return a floating point value. */
3754 if (arm_libcall_uses_aapcs_base (libcall))
3755 return gen_rtx_REG (mode, ARG_REGISTER(1));
3756
3757 }
3758
3759 return arm_libcall_value_1 (mode);
3760 }
3761
3762 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
3763
3764 static bool
3765 arm_function_value_regno_p (const unsigned int regno)
3766 {
3767 if (regno == ARG_REGISTER (1)
3768 || (TARGET_32BIT
3769 && TARGET_AAPCS_BASED
3770 && TARGET_VFP
3771 && TARGET_HARD_FLOAT
3772 && regno == FIRST_VFP_REGNUM)
3773 || (TARGET_IWMMXT_ABI
3774 && regno == FIRST_IWMMXT_REGNUM))
3775 return true;
3776
3777 return false;
3778 }
3779
3780 /* Determine the amount of memory needed to store the possible return
3781 registers of an untyped call. */
3782 int
3783 arm_apply_result_size (void)
3784 {
3785 int size = 16;
3786
3787 if (TARGET_32BIT)
3788 {
3789 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
3790 size += 32;
3791 if (TARGET_IWMMXT_ABI)
3792 size += 8;
3793 }
3794
3795 return size;
3796 }
3797
3798 /* Decide whether TYPE should be returned in memory (true)
3799 or in a register (false). FNTYPE is the type of the function making
3800 the call. */
3801 static bool
3802 arm_return_in_memory (const_tree type, const_tree fntype)
3803 {
3804 HOST_WIDE_INT size;
3805
3806 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3807
3808 if (TARGET_AAPCS_BASED)
3809 {
3810 /* Simple, non-aggregate types (ie not including vectors and
3811 complex) are always returned in a register (or registers).
3812 We don't care about which register here, so we can short-cut
3813 some of the detail. */
3814 if (!AGGREGATE_TYPE_P (type)
3815 && TREE_CODE (type) != VECTOR_TYPE
3816 && TREE_CODE (type) != COMPLEX_TYPE)
3817 return false;
3818
3819 /* Any return value that is no larger than one word can be
3820 returned in r0. */
3821 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3822 return false;
3823
3824 /* Check any available co-processors to see if they accept the
3825 type as a register candidate (VFP, for example, can return
3826 some aggregates in consecutive registers). These aren't
3827 available if the call is variadic. */
3828 if (aapcs_select_return_coproc (type, fntype) >= 0)
3829 return false;
3830
3831 /* Vector values should be returned using ARM registers, not
3832 memory (unless they're over 16 bytes, which will break since
3833 we only have four call-clobbered registers to play with). */
3834 if (TREE_CODE (type) == VECTOR_TYPE)
3835 return (size < 0 || size > (4 * UNITS_PER_WORD));
3836
3837 /* The rest go in memory. */
3838 return true;
3839 }
3840
3841 if (TREE_CODE (type) == VECTOR_TYPE)
3842 return (size < 0 || size > (4 * UNITS_PER_WORD));
3843
3844 if (!AGGREGATE_TYPE_P (type) &&
3845 (TREE_CODE (type) != VECTOR_TYPE))
3846 /* All simple types are returned in registers. */
3847 return false;
3848
3849 if (arm_abi != ARM_ABI_APCS)
3850 {
3851 /* ATPCS and later return aggregate types in memory only if they are
3852 larger than a word (or are variable size). */
3853 return (size < 0 || size > UNITS_PER_WORD);
3854 }
3855
3856 /* For the arm-wince targets we choose to be compatible with Microsoft's
3857 ARM and Thumb compilers, which always return aggregates in memory. */
3858 #ifndef ARM_WINCE
3859 /* All structures/unions bigger than one word are returned in memory.
3860 Also catch the case where int_size_in_bytes returns -1. In this case
3861 the aggregate is either huge or of variable size, and in either case
3862 we will want to return it via memory and not in a register. */
3863 if (size < 0 || size > UNITS_PER_WORD)
3864 return true;
3865
3866 if (TREE_CODE (type) == RECORD_TYPE)
3867 {
3868 tree field;
3869
3870 /* For a struct the APCS says that we only return in a register
3871 if the type is 'integer like' and every addressable element
3872 has an offset of zero. For practical purposes this means
3873 that the structure can have at most one non bit-field element
3874 and that this element must be the first one in the structure. */
3875
3876 /* Find the first field, ignoring non FIELD_DECL things which will
3877 have been created by C++. */
3878 for (field = TYPE_FIELDS (type);
3879 field && TREE_CODE (field) != FIELD_DECL;
3880 field = DECL_CHAIN (field))
3881 continue;
3882
3883 if (field == NULL)
3884 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3885
3886 /* Check that the first field is valid for returning in a register. */
3887
3888 /* ... Floats are not allowed */
3889 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3890 return true;
3891
3892 /* ... Aggregates that are not themselves valid for returning in
3893 a register are not allowed. */
3894 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3895 return true;
3896
3897 /* Now check the remaining fields, if any. Only bitfields are allowed,
3898 since they are not addressable. */
3899 for (field = DECL_CHAIN (field);
3900 field;
3901 field = DECL_CHAIN (field))
3902 {
3903 if (TREE_CODE (field) != FIELD_DECL)
3904 continue;
3905
3906 if (!DECL_BIT_FIELD_TYPE (field))
3907 return true;
3908 }
3909
3910 return false;
3911 }
3912
3913 if (TREE_CODE (type) == UNION_TYPE)
3914 {
3915 tree field;
3916
3917 /* Unions can be returned in registers if every element is
3918 integral, or can be returned in an integer register. */
3919 for (field = TYPE_FIELDS (type);
3920 field;
3921 field = DECL_CHAIN (field))
3922 {
3923 if (TREE_CODE (field) != FIELD_DECL)
3924 continue;
3925
3926 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3927 return true;
3928
3929 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3930 return true;
3931 }
3932
3933 return false;
3934 }
3935 #endif /* not ARM_WINCE */
3936
3937 /* Return all other types in memory. */
3938 return true;
3939 }
3940
3941 const struct pcs_attribute_arg
3942 {
3943 const char *arg;
3944 enum arm_pcs value;
3945 } pcs_attribute_args[] =
3946 {
3947 {"aapcs", ARM_PCS_AAPCS},
3948 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3949 #if 0
3950 /* We could recognize these, but changes would be needed elsewhere
3951 * to implement them. */
3952 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3953 {"atpcs", ARM_PCS_ATPCS},
3954 {"apcs", ARM_PCS_APCS},
3955 #endif
3956 {NULL, ARM_PCS_UNKNOWN}
3957 };
3958
3959 static enum arm_pcs
3960 arm_pcs_from_attribute (tree attr)
3961 {
3962 const struct pcs_attribute_arg *ptr;
3963 const char *arg;
3964
3965 /* Get the value of the argument. */
3966 if (TREE_VALUE (attr) == NULL_TREE
3967 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3968 return ARM_PCS_UNKNOWN;
3969
3970 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3971
3972 /* Check it against the list of known arguments. */
3973 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3974 if (streq (arg, ptr->arg))
3975 return ptr->value;
3976
3977 /* An unrecognized interrupt type. */
3978 return ARM_PCS_UNKNOWN;
3979 }
3980
3981 /* Get the PCS variant to use for this call. TYPE is the function's type
3982 specification, DECL is the specific declartion. DECL may be null if
3983 the call could be indirect or if this is a library call. */
3984 static enum arm_pcs
3985 arm_get_pcs_model (const_tree type, const_tree decl)
3986 {
3987 bool user_convention = false;
3988 enum arm_pcs user_pcs = arm_pcs_default;
3989 tree attr;
3990
3991 gcc_assert (type);
3992
3993 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3994 if (attr)
3995 {
3996 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3997 user_convention = true;
3998 }
3999
4000 if (TARGET_AAPCS_BASED)
4001 {
4002 /* Detect varargs functions. These always use the base rules
4003 (no argument is ever a candidate for a co-processor
4004 register). */
4005 bool base_rules = stdarg_p (type);
4006
4007 if (user_convention)
4008 {
4009 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4010 sorry ("non-AAPCS derived PCS variant");
4011 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4012 error ("variadic functions must use the base AAPCS variant");
4013 }
4014
4015 if (base_rules)
4016 return ARM_PCS_AAPCS;
4017 else if (user_convention)
4018 return user_pcs;
4019 else if (decl && flag_unit_at_a_time)
4020 {
4021 /* Local functions never leak outside this compilation unit,
4022 so we are free to use whatever conventions are
4023 appropriate. */
4024 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4025 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4026 if (i && i->local)
4027 return ARM_PCS_AAPCS_LOCAL;
4028 }
4029 }
4030 else if (user_convention && user_pcs != arm_pcs_default)
4031 sorry ("PCS variant");
4032
4033 /* For everything else we use the target's default. */
4034 return arm_pcs_default;
4035 }
4036
4037
4038 static void
4039 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4040 const_tree fntype ATTRIBUTE_UNUSED,
4041 rtx libcall ATTRIBUTE_UNUSED,
4042 const_tree fndecl ATTRIBUTE_UNUSED)
4043 {
4044 /* Record the unallocated VFP registers. */
4045 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4046 pcum->aapcs_vfp_reg_alloc = 0;
4047 }
4048
4049 /* Walk down the type tree of TYPE counting consecutive base elements.
4050 If *MODEP is VOIDmode, then set it to the first valid floating point
4051 type. If a non-floating point type is found, or if a floating point
4052 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4053 otherwise return the count in the sub-tree. */
4054 static int
4055 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4056 {
4057 enum machine_mode mode;
4058 HOST_WIDE_INT size;
4059
4060 switch (TREE_CODE (type))
4061 {
4062 case REAL_TYPE:
4063 mode = TYPE_MODE (type);
4064 if (mode != DFmode && mode != SFmode)
4065 return -1;
4066
4067 if (*modep == VOIDmode)
4068 *modep = mode;
4069
4070 if (*modep == mode)
4071 return 1;
4072
4073 break;
4074
4075 case COMPLEX_TYPE:
4076 mode = TYPE_MODE (TREE_TYPE (type));
4077 if (mode != DFmode && mode != SFmode)
4078 return -1;
4079
4080 if (*modep == VOIDmode)
4081 *modep = mode;
4082
4083 if (*modep == mode)
4084 return 2;
4085
4086 break;
4087
4088 case VECTOR_TYPE:
4089 /* Use V2SImode and V4SImode as representatives of all 64-bit
4090 and 128-bit vector types, whether or not those modes are
4091 supported with the present options. */
4092 size = int_size_in_bytes (type);
4093 switch (size)
4094 {
4095 case 8:
4096 mode = V2SImode;
4097 break;
4098 case 16:
4099 mode = V4SImode;
4100 break;
4101 default:
4102 return -1;
4103 }
4104
4105 if (*modep == VOIDmode)
4106 *modep = mode;
4107
4108 /* Vector modes are considered to be opaque: two vectors are
4109 equivalent for the purposes of being homogeneous aggregates
4110 if they are the same size. */
4111 if (*modep == mode)
4112 return 1;
4113
4114 break;
4115
4116 case ARRAY_TYPE:
4117 {
4118 int count;
4119 tree index = TYPE_DOMAIN (type);
4120
4121 /* Can't handle incomplete types. */
4122 if (!COMPLETE_TYPE_P(type))
4123 return -1;
4124
4125 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4126 if (count == -1
4127 || !index
4128 || !TYPE_MAX_VALUE (index)
4129 || !host_integerp (TYPE_MAX_VALUE (index), 1)
4130 || !TYPE_MIN_VALUE (index)
4131 || !host_integerp (TYPE_MIN_VALUE (index), 1)
4132 || count < 0)
4133 return -1;
4134
4135 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
4136 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4137
4138 /* There must be no padding. */
4139 if (!host_integerp (TYPE_SIZE (type), 1)
4140 || (tree_low_cst (TYPE_SIZE (type), 1)
4141 != count * GET_MODE_BITSIZE (*modep)))
4142 return -1;
4143
4144 return count;
4145 }
4146
4147 case RECORD_TYPE:
4148 {
4149 int count = 0;
4150 int sub_count;
4151 tree field;
4152
4153 /* Can't handle incomplete types. */
4154 if (!COMPLETE_TYPE_P(type))
4155 return -1;
4156
4157 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4158 {
4159 if (TREE_CODE (field) != FIELD_DECL)
4160 continue;
4161
4162 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4163 if (sub_count < 0)
4164 return -1;
4165 count += sub_count;
4166 }
4167
4168 /* There must be no padding. */
4169 if (!host_integerp (TYPE_SIZE (type), 1)
4170 || (tree_low_cst (TYPE_SIZE (type), 1)
4171 != count * GET_MODE_BITSIZE (*modep)))
4172 return -1;
4173
4174 return count;
4175 }
4176
4177 case UNION_TYPE:
4178 case QUAL_UNION_TYPE:
4179 {
4180 /* These aren't very interesting except in a degenerate case. */
4181 int count = 0;
4182 int sub_count;
4183 tree field;
4184
4185 /* Can't handle incomplete types. */
4186 if (!COMPLETE_TYPE_P(type))
4187 return -1;
4188
4189 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4190 {
4191 if (TREE_CODE (field) != FIELD_DECL)
4192 continue;
4193
4194 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4195 if (sub_count < 0)
4196 return -1;
4197 count = count > sub_count ? count : sub_count;
4198 }
4199
4200 /* There must be no padding. */
4201 if (!host_integerp (TYPE_SIZE (type), 1)
4202 || (tree_low_cst (TYPE_SIZE (type), 1)
4203 != count * GET_MODE_BITSIZE (*modep)))
4204 return -1;
4205
4206 return count;
4207 }
4208
4209 default:
4210 break;
4211 }
4212
4213 return -1;
4214 }
4215
4216 /* Return true if PCS_VARIANT should use VFP registers. */
4217 static bool
4218 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4219 {
4220 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4221 {
4222 static bool seen_thumb1_vfp = false;
4223
4224 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4225 {
4226 sorry ("Thumb-1 hard-float VFP ABI");
4227 /* sorry() is not immediately fatal, so only display this once. */
4228 seen_thumb1_vfp = true;
4229 }
4230
4231 return true;
4232 }
4233
4234 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4235 return false;
4236
4237 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4238 (TARGET_VFP_DOUBLE || !is_double));
4239 }
4240
4241 /* Return true if an argument whose type is TYPE, or mode is MODE, is
4242 suitable for passing or returning in VFP registers for the PCS
4243 variant selected. If it is, then *BASE_MODE is updated to contain
4244 a machine mode describing each element of the argument's type and
4245 *COUNT to hold the number of such elements. */
4246 static bool
4247 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4248 enum machine_mode mode, const_tree type,
4249 enum machine_mode *base_mode, int *count)
4250 {
4251 enum machine_mode new_mode = VOIDmode;
4252
4253 /* If we have the type information, prefer that to working things
4254 out from the mode. */
4255 if (type)
4256 {
4257 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4258
4259 if (ag_count > 0 && ag_count <= 4)
4260 *count = ag_count;
4261 else
4262 return false;
4263 }
4264 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
4265 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4266 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4267 {
4268 *count = 1;
4269 new_mode = mode;
4270 }
4271 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4272 {
4273 *count = 2;
4274 new_mode = (mode == DCmode ? DFmode : SFmode);
4275 }
4276 else
4277 return false;
4278
4279
4280 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4281 return false;
4282
4283 *base_mode = new_mode;
4284 return true;
4285 }
4286
4287 static bool
4288 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4289 enum machine_mode mode, const_tree type)
4290 {
4291 int count ATTRIBUTE_UNUSED;
4292 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4293
4294 if (!use_vfp_abi (pcs_variant, false))
4295 return false;
4296 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4297 &ag_mode, &count);
4298 }
4299
4300 static bool
4301 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4302 const_tree type)
4303 {
4304 if (!use_vfp_abi (pcum->pcs_variant, false))
4305 return false;
4306
4307 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4308 &pcum->aapcs_vfp_rmode,
4309 &pcum->aapcs_vfp_rcount);
4310 }
4311
4312 static bool
4313 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4314 const_tree type ATTRIBUTE_UNUSED)
4315 {
4316 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4317 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4318 int regno;
4319
4320 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4321 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4322 {
4323 pcum->aapcs_vfp_reg_alloc = mask << regno;
4324 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4325 {
4326 int i;
4327 int rcount = pcum->aapcs_vfp_rcount;
4328 int rshift = shift;
4329 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4330 rtx par;
4331 if (!TARGET_NEON)
4332 {
4333 /* Avoid using unsupported vector modes. */
4334 if (rmode == V2SImode)
4335 rmode = DImode;
4336 else if (rmode == V4SImode)
4337 {
4338 rmode = DImode;
4339 rcount *= 2;
4340 rshift /= 2;
4341 }
4342 }
4343 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4344 for (i = 0; i < rcount; i++)
4345 {
4346 rtx tmp = gen_rtx_REG (rmode,
4347 FIRST_VFP_REGNUM + regno + i * rshift);
4348 tmp = gen_rtx_EXPR_LIST
4349 (VOIDmode, tmp,
4350 GEN_INT (i * GET_MODE_SIZE (rmode)));
4351 XVECEXP (par, 0, i) = tmp;
4352 }
4353
4354 pcum->aapcs_reg = par;
4355 }
4356 else
4357 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4358 return true;
4359 }
4360 return false;
4361 }
4362
4363 static rtx
4364 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4365 enum machine_mode mode,
4366 const_tree type ATTRIBUTE_UNUSED)
4367 {
4368 if (!use_vfp_abi (pcs_variant, false))
4369 return NULL;
4370
4371 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4372 {
4373 int count;
4374 enum machine_mode ag_mode;
4375 int i;
4376 rtx par;
4377 int shift;
4378
4379 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4380 &ag_mode, &count);
4381
4382 if (!TARGET_NEON)
4383 {
4384 if (ag_mode == V2SImode)
4385 ag_mode = DImode;
4386 else if (ag_mode == V4SImode)
4387 {
4388 ag_mode = DImode;
4389 count *= 2;
4390 }
4391 }
4392 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4393 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4394 for (i = 0; i < count; i++)
4395 {
4396 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4397 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4398 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4399 XVECEXP (par, 0, i) = tmp;
4400 }
4401
4402 return par;
4403 }
4404
4405 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4406 }
4407
4408 static void
4409 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4410 enum machine_mode mode ATTRIBUTE_UNUSED,
4411 const_tree type ATTRIBUTE_UNUSED)
4412 {
4413 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4414 pcum->aapcs_vfp_reg_alloc = 0;
4415 return;
4416 }
4417
4418 #define AAPCS_CP(X) \
4419 { \
4420 aapcs_ ## X ## _cum_init, \
4421 aapcs_ ## X ## _is_call_candidate, \
4422 aapcs_ ## X ## _allocate, \
4423 aapcs_ ## X ## _is_return_candidate, \
4424 aapcs_ ## X ## _allocate_return_reg, \
4425 aapcs_ ## X ## _advance \
4426 }
4427
4428 /* Table of co-processors that can be used to pass arguments in
4429 registers. Idealy no arugment should be a candidate for more than
4430 one co-processor table entry, but the table is processed in order
4431 and stops after the first match. If that entry then fails to put
4432 the argument into a co-processor register, the argument will go on
4433 the stack. */
4434 static struct
4435 {
4436 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4437 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4438
4439 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4440 BLKmode) is a candidate for this co-processor's registers; this
4441 function should ignore any position-dependent state in
4442 CUMULATIVE_ARGS and only use call-type dependent information. */
4443 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4444
4445 /* Return true if the argument does get a co-processor register; it
4446 should set aapcs_reg to an RTX of the register allocated as is
4447 required for a return from FUNCTION_ARG. */
4448 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4449
4450 /* Return true if a result of mode MODE (or type TYPE if MODE is
4451 BLKmode) is can be returned in this co-processor's registers. */
4452 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4453
4454 /* Allocate and return an RTX element to hold the return type of a
4455 call, this routine must not fail and will only be called if
4456 is_return_candidate returned true with the same parameters. */
4457 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4458
4459 /* Finish processing this argument and prepare to start processing
4460 the next one. */
4461 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4462 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4463 {
4464 AAPCS_CP(vfp)
4465 };
4466
4467 #undef AAPCS_CP
4468
4469 static int
4470 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4471 const_tree type)
4472 {
4473 int i;
4474
4475 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4476 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4477 return i;
4478
4479 return -1;
4480 }
4481
4482 static int
4483 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4484 {
4485 /* We aren't passed a decl, so we can't check that a call is local.
4486 However, it isn't clear that that would be a win anyway, since it
4487 might limit some tail-calling opportunities. */
4488 enum arm_pcs pcs_variant;
4489
4490 if (fntype)
4491 {
4492 const_tree fndecl = NULL_TREE;
4493
4494 if (TREE_CODE (fntype) == FUNCTION_DECL)
4495 {
4496 fndecl = fntype;
4497 fntype = TREE_TYPE (fntype);
4498 }
4499
4500 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4501 }
4502 else
4503 pcs_variant = arm_pcs_default;
4504
4505 if (pcs_variant != ARM_PCS_AAPCS)
4506 {
4507 int i;
4508
4509 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4510 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4511 TYPE_MODE (type),
4512 type))
4513 return i;
4514 }
4515 return -1;
4516 }
4517
4518 static rtx
4519 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4520 const_tree fntype)
4521 {
4522 /* We aren't passed a decl, so we can't check that a call is local.
4523 However, it isn't clear that that would be a win anyway, since it
4524 might limit some tail-calling opportunities. */
4525 enum arm_pcs pcs_variant;
4526 int unsignedp ATTRIBUTE_UNUSED;
4527
4528 if (fntype)
4529 {
4530 const_tree fndecl = NULL_TREE;
4531
4532 if (TREE_CODE (fntype) == FUNCTION_DECL)
4533 {
4534 fndecl = fntype;
4535 fntype = TREE_TYPE (fntype);
4536 }
4537
4538 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4539 }
4540 else
4541 pcs_variant = arm_pcs_default;
4542
4543 /* Promote integer types. */
4544 if (type && INTEGRAL_TYPE_P (type))
4545 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4546
4547 if (pcs_variant != ARM_PCS_AAPCS)
4548 {
4549 int i;
4550
4551 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4552 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4553 type))
4554 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4555 mode, type);
4556 }
4557
4558 /* Promotes small structs returned in a register to full-word size
4559 for big-endian AAPCS. */
4560 if (type && arm_return_in_msb (type))
4561 {
4562 HOST_WIDE_INT size = int_size_in_bytes (type);
4563 if (size % UNITS_PER_WORD != 0)
4564 {
4565 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4566 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4567 }
4568 }
4569
4570 return gen_rtx_REG (mode, R0_REGNUM);
4571 }
4572
4573 static rtx
4574 aapcs_libcall_value (enum machine_mode mode)
4575 {
4576 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
4577 && GET_MODE_SIZE (mode) <= 4)
4578 mode = SImode;
4579
4580 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4581 }
4582
4583 /* Lay out a function argument using the AAPCS rules. The rule
4584 numbers referred to here are those in the AAPCS. */
4585 static void
4586 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4587 const_tree type, bool named)
4588 {
4589 int nregs, nregs2;
4590 int ncrn;
4591
4592 /* We only need to do this once per argument. */
4593 if (pcum->aapcs_arg_processed)
4594 return;
4595
4596 pcum->aapcs_arg_processed = true;
4597
4598 /* Special case: if named is false then we are handling an incoming
4599 anonymous argument which is on the stack. */
4600 if (!named)
4601 return;
4602
4603 /* Is this a potential co-processor register candidate? */
4604 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4605 {
4606 int slot = aapcs_select_call_coproc (pcum, mode, type);
4607 pcum->aapcs_cprc_slot = slot;
4608
4609 /* We don't have to apply any of the rules from part B of the
4610 preparation phase, these are handled elsewhere in the
4611 compiler. */
4612
4613 if (slot >= 0)
4614 {
4615 /* A Co-processor register candidate goes either in its own
4616 class of registers or on the stack. */
4617 if (!pcum->aapcs_cprc_failed[slot])
4618 {
4619 /* C1.cp - Try to allocate the argument to co-processor
4620 registers. */
4621 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4622 return;
4623
4624 /* C2.cp - Put the argument on the stack and note that we
4625 can't assign any more candidates in this slot. We also
4626 need to note that we have allocated stack space, so that
4627 we won't later try to split a non-cprc candidate between
4628 core registers and the stack. */
4629 pcum->aapcs_cprc_failed[slot] = true;
4630 pcum->can_split = false;
4631 }
4632
4633 /* We didn't get a register, so this argument goes on the
4634 stack. */
4635 gcc_assert (pcum->can_split == false);
4636 return;
4637 }
4638 }
4639
4640 /* C3 - For double-word aligned arguments, round the NCRN up to the
4641 next even number. */
4642 ncrn = pcum->aapcs_ncrn;
4643 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4644 ncrn++;
4645
4646 nregs = ARM_NUM_REGS2(mode, type);
4647
4648 /* Sigh, this test should really assert that nregs > 0, but a GCC
4649 extension allows empty structs and then gives them empty size; it
4650 then allows such a structure to be passed by value. For some of
4651 the code below we have to pretend that such an argument has
4652 non-zero size so that we 'locate' it correctly either in
4653 registers or on the stack. */
4654 gcc_assert (nregs >= 0);
4655
4656 nregs2 = nregs ? nregs : 1;
4657
4658 /* C4 - Argument fits entirely in core registers. */
4659 if (ncrn + nregs2 <= NUM_ARG_REGS)
4660 {
4661 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4662 pcum->aapcs_next_ncrn = ncrn + nregs;
4663 return;
4664 }
4665
4666 /* C5 - Some core registers left and there are no arguments already
4667 on the stack: split this argument between the remaining core
4668 registers and the stack. */
4669 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4670 {
4671 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4672 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4673 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4674 return;
4675 }
4676
4677 /* C6 - NCRN is set to 4. */
4678 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4679
4680 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4681 return;
4682 }
4683
4684 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4685 for a call to a function whose data type is FNTYPE.
4686 For a library call, FNTYPE is NULL. */
4687 void
4688 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4689 rtx libname,
4690 tree fndecl ATTRIBUTE_UNUSED)
4691 {
4692 /* Long call handling. */
4693 if (fntype)
4694 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4695 else
4696 pcum->pcs_variant = arm_pcs_default;
4697
4698 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4699 {
4700 if (arm_libcall_uses_aapcs_base (libname))
4701 pcum->pcs_variant = ARM_PCS_AAPCS;
4702
4703 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4704 pcum->aapcs_reg = NULL_RTX;
4705 pcum->aapcs_partial = 0;
4706 pcum->aapcs_arg_processed = false;
4707 pcum->aapcs_cprc_slot = -1;
4708 pcum->can_split = true;
4709
4710 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4711 {
4712 int i;
4713
4714 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4715 {
4716 pcum->aapcs_cprc_failed[i] = false;
4717 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4718 }
4719 }
4720 return;
4721 }
4722
4723 /* Legacy ABIs */
4724
4725 /* On the ARM, the offset starts at 0. */
4726 pcum->nregs = 0;
4727 pcum->iwmmxt_nregs = 0;
4728 pcum->can_split = true;
4729
4730 /* Varargs vectors are treated the same as long long.
4731 named_count avoids having to change the way arm handles 'named' */
4732 pcum->named_count = 0;
4733 pcum->nargs = 0;
4734
4735 if (TARGET_REALLY_IWMMXT && fntype)
4736 {
4737 tree fn_arg;
4738
4739 for (fn_arg = TYPE_ARG_TYPES (fntype);
4740 fn_arg;
4741 fn_arg = TREE_CHAIN (fn_arg))
4742 pcum->named_count += 1;
4743
4744 if (! pcum->named_count)
4745 pcum->named_count = INT_MAX;
4746 }
4747 }
4748
4749
4750 /* Return true if mode/type need doubleword alignment. */
4751 static bool
4752 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4753 {
4754 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4755 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4756 }
4757
4758
4759 /* Determine where to put an argument to a function.
4760 Value is zero to push the argument on the stack,
4761 or a hard register in which to store the argument.
4762
4763 MODE is the argument's machine mode.
4764 TYPE is the data type of the argument (as a tree).
4765 This is null for libcalls where that information may
4766 not be available.
4767 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4768 the preceding args and about the function being called.
4769 NAMED is nonzero if this argument is a named parameter
4770 (otherwise it is an extra parameter matching an ellipsis).
4771
4772 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4773 other arguments are passed on the stack. If (NAMED == 0) (which happens
4774 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4775 defined), say it is passed in the stack (function_prologue will
4776 indeed make it pass in the stack if necessary). */
4777
4778 static rtx
4779 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
4780 const_tree type, bool named)
4781 {
4782 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4783 int nregs;
4784
4785 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4786 a call insn (op3 of a call_value insn). */
4787 if (mode == VOIDmode)
4788 return const0_rtx;
4789
4790 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4791 {
4792 aapcs_layout_arg (pcum, mode, type, named);
4793 return pcum->aapcs_reg;
4794 }
4795
4796 /* Varargs vectors are treated the same as long long.
4797 named_count avoids having to change the way arm handles 'named' */
4798 if (TARGET_IWMMXT_ABI
4799 && arm_vector_mode_supported_p (mode)
4800 && pcum->named_count > pcum->nargs + 1)
4801 {
4802 if (pcum->iwmmxt_nregs <= 9)
4803 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4804 else
4805 {
4806 pcum->can_split = false;
4807 return NULL_RTX;
4808 }
4809 }
4810
4811 /* Put doubleword aligned quantities in even register pairs. */
4812 if (pcum->nregs & 1
4813 && ARM_DOUBLEWORD_ALIGN
4814 && arm_needs_doubleword_align (mode, type))
4815 pcum->nregs++;
4816
4817 /* Only allow splitting an arg between regs and memory if all preceding
4818 args were allocated to regs. For args passed by reference we only count
4819 the reference pointer. */
4820 if (pcum->can_split)
4821 nregs = 1;
4822 else
4823 nregs = ARM_NUM_REGS2 (mode, type);
4824
4825 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4826 return NULL_RTX;
4827
4828 return gen_rtx_REG (mode, pcum->nregs);
4829 }
4830
4831 static unsigned int
4832 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4833 {
4834 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4835 ? DOUBLEWORD_ALIGNMENT
4836 : PARM_BOUNDARY);
4837 }
4838
4839 static int
4840 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
4841 tree type, bool named)
4842 {
4843 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4844 int nregs = pcum->nregs;
4845
4846 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4847 {
4848 aapcs_layout_arg (pcum, mode, type, named);
4849 return pcum->aapcs_partial;
4850 }
4851
4852 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4853 return 0;
4854
4855 if (NUM_ARG_REGS > nregs
4856 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4857 && pcum->can_split)
4858 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4859
4860 return 0;
4861 }
4862
4863 /* Update the data in PCUM to advance over an argument
4864 of mode MODE and data type TYPE.
4865 (TYPE is null for libcalls where that information may not be available.) */
4866
4867 static void
4868 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
4869 const_tree type, bool named)
4870 {
4871 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4872
4873 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4874 {
4875 aapcs_layout_arg (pcum, mode, type, named);
4876
4877 if (pcum->aapcs_cprc_slot >= 0)
4878 {
4879 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4880 type);
4881 pcum->aapcs_cprc_slot = -1;
4882 }
4883
4884 /* Generic stuff. */
4885 pcum->aapcs_arg_processed = false;
4886 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4887 pcum->aapcs_reg = NULL_RTX;
4888 pcum->aapcs_partial = 0;
4889 }
4890 else
4891 {
4892 pcum->nargs += 1;
4893 if (arm_vector_mode_supported_p (mode)
4894 && pcum->named_count > pcum->nargs
4895 && TARGET_IWMMXT_ABI)
4896 pcum->iwmmxt_nregs += 1;
4897 else
4898 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4899 }
4900 }
4901
4902 /* Variable sized types are passed by reference. This is a GCC
4903 extension to the ARM ABI. */
4904
4905 static bool
4906 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
4907 enum machine_mode mode ATTRIBUTE_UNUSED,
4908 const_tree type, bool named ATTRIBUTE_UNUSED)
4909 {
4910 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4911 }
4912 \f
4913 /* Encode the current state of the #pragma [no_]long_calls. */
4914 typedef enum
4915 {
4916 OFF, /* No #pragma [no_]long_calls is in effect. */
4917 LONG, /* #pragma long_calls is in effect. */
4918 SHORT /* #pragma no_long_calls is in effect. */
4919 } arm_pragma_enum;
4920
4921 static arm_pragma_enum arm_pragma_long_calls = OFF;
4922
4923 void
4924 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4925 {
4926 arm_pragma_long_calls = LONG;
4927 }
4928
4929 void
4930 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4931 {
4932 arm_pragma_long_calls = SHORT;
4933 }
4934
4935 void
4936 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4937 {
4938 arm_pragma_long_calls = OFF;
4939 }
4940 \f
4941 /* Handle an attribute requiring a FUNCTION_DECL;
4942 arguments as in struct attribute_spec.handler. */
4943 static tree
4944 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4945 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4946 {
4947 if (TREE_CODE (*node) != FUNCTION_DECL)
4948 {
4949 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4950 name);
4951 *no_add_attrs = true;
4952 }
4953
4954 return NULL_TREE;
4955 }
4956
4957 /* Handle an "interrupt" or "isr" attribute;
4958 arguments as in struct attribute_spec.handler. */
4959 static tree
4960 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4961 bool *no_add_attrs)
4962 {
4963 if (DECL_P (*node))
4964 {
4965 if (TREE_CODE (*node) != FUNCTION_DECL)
4966 {
4967 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4968 name);
4969 *no_add_attrs = true;
4970 }
4971 /* FIXME: the argument if any is checked for type attributes;
4972 should it be checked for decl ones? */
4973 }
4974 else
4975 {
4976 if (TREE_CODE (*node) == FUNCTION_TYPE
4977 || TREE_CODE (*node) == METHOD_TYPE)
4978 {
4979 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4980 {
4981 warning (OPT_Wattributes, "%qE attribute ignored",
4982 name);
4983 *no_add_attrs = true;
4984 }
4985 }
4986 else if (TREE_CODE (*node) == POINTER_TYPE
4987 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4988 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4989 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4990 {
4991 *node = build_variant_type_copy (*node);
4992 TREE_TYPE (*node) = build_type_attribute_variant
4993 (TREE_TYPE (*node),
4994 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4995 *no_add_attrs = true;
4996 }
4997 else
4998 {
4999 /* Possibly pass this attribute on from the type to a decl. */
5000 if (flags & ((int) ATTR_FLAG_DECL_NEXT
5001 | (int) ATTR_FLAG_FUNCTION_NEXT
5002 | (int) ATTR_FLAG_ARRAY_NEXT))
5003 {
5004 *no_add_attrs = true;
5005 return tree_cons (name, args, NULL_TREE);
5006 }
5007 else
5008 {
5009 warning (OPT_Wattributes, "%qE attribute ignored",
5010 name);
5011 }
5012 }
5013 }
5014
5015 return NULL_TREE;
5016 }
5017
5018 /* Handle a "pcs" attribute; arguments as in struct
5019 attribute_spec.handler. */
5020 static tree
5021 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5022 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5023 {
5024 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5025 {
5026 warning (OPT_Wattributes, "%qE attribute ignored", name);
5027 *no_add_attrs = true;
5028 }
5029 return NULL_TREE;
5030 }
5031
5032 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5033 /* Handle the "notshared" attribute. This attribute is another way of
5034 requesting hidden visibility. ARM's compiler supports
5035 "__declspec(notshared)"; we support the same thing via an
5036 attribute. */
5037
5038 static tree
5039 arm_handle_notshared_attribute (tree *node,
5040 tree name ATTRIBUTE_UNUSED,
5041 tree args ATTRIBUTE_UNUSED,
5042 int flags ATTRIBUTE_UNUSED,
5043 bool *no_add_attrs)
5044 {
5045 tree decl = TYPE_NAME (*node);
5046
5047 if (decl)
5048 {
5049 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5050 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5051 *no_add_attrs = false;
5052 }
5053 return NULL_TREE;
5054 }
5055 #endif
5056
5057 /* Return 0 if the attributes for two types are incompatible, 1 if they
5058 are compatible, and 2 if they are nearly compatible (which causes a
5059 warning to be generated). */
5060 static int
5061 arm_comp_type_attributes (const_tree type1, const_tree type2)
5062 {
5063 int l1, l2, s1, s2;
5064
5065 /* Check for mismatch of non-default calling convention. */
5066 if (TREE_CODE (type1) != FUNCTION_TYPE)
5067 return 1;
5068
5069 /* Check for mismatched call attributes. */
5070 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5071 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5072 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5073 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5074
5075 /* Only bother to check if an attribute is defined. */
5076 if (l1 | l2 | s1 | s2)
5077 {
5078 /* If one type has an attribute, the other must have the same attribute. */
5079 if ((l1 != l2) || (s1 != s2))
5080 return 0;
5081
5082 /* Disallow mixed attributes. */
5083 if ((l1 & s2) || (l2 & s1))
5084 return 0;
5085 }
5086
5087 /* Check for mismatched ISR attribute. */
5088 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5089 if (! l1)
5090 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5091 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5092 if (! l2)
5093 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5094 if (l1 != l2)
5095 return 0;
5096
5097 return 1;
5098 }
5099
5100 /* Assigns default attributes to newly defined type. This is used to
5101 set short_call/long_call attributes for function types of
5102 functions defined inside corresponding #pragma scopes. */
5103 static void
5104 arm_set_default_type_attributes (tree type)
5105 {
5106 /* Add __attribute__ ((long_call)) to all functions, when
5107 inside #pragma long_calls or __attribute__ ((short_call)),
5108 when inside #pragma no_long_calls. */
5109 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5110 {
5111 tree type_attr_list, attr_name;
5112 type_attr_list = TYPE_ATTRIBUTES (type);
5113
5114 if (arm_pragma_long_calls == LONG)
5115 attr_name = get_identifier ("long_call");
5116 else if (arm_pragma_long_calls == SHORT)
5117 attr_name = get_identifier ("short_call");
5118 else
5119 return;
5120
5121 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5122 TYPE_ATTRIBUTES (type) = type_attr_list;
5123 }
5124 }
5125 \f
5126 /* Return true if DECL is known to be linked into section SECTION. */
5127
5128 static bool
5129 arm_function_in_section_p (tree decl, section *section)
5130 {
5131 /* We can only be certain about functions defined in the same
5132 compilation unit. */
5133 if (!TREE_STATIC (decl))
5134 return false;
5135
5136 /* Make sure that SYMBOL always binds to the definition in this
5137 compilation unit. */
5138 if (!targetm.binds_local_p (decl))
5139 return false;
5140
5141 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5142 if (!DECL_SECTION_NAME (decl))
5143 {
5144 /* Make sure that we will not create a unique section for DECL. */
5145 if (flag_function_sections || DECL_ONE_ONLY (decl))
5146 return false;
5147 }
5148
5149 return function_section (decl) == section;
5150 }
5151
5152 /* Return nonzero if a 32-bit "long_call" should be generated for
5153 a call from the current function to DECL. We generate a long_call
5154 if the function:
5155
5156 a. has an __attribute__((long call))
5157 or b. is within the scope of a #pragma long_calls
5158 or c. the -mlong-calls command line switch has been specified
5159
5160 However we do not generate a long call if the function:
5161
5162 d. has an __attribute__ ((short_call))
5163 or e. is inside the scope of a #pragma no_long_calls
5164 or f. is defined in the same section as the current function. */
5165
5166 bool
5167 arm_is_long_call_p (tree decl)
5168 {
5169 tree attrs;
5170
5171 if (!decl)
5172 return TARGET_LONG_CALLS;
5173
5174 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5175 if (lookup_attribute ("short_call", attrs))
5176 return false;
5177
5178 /* For "f", be conservative, and only cater for cases in which the
5179 whole of the current function is placed in the same section. */
5180 if (!flag_reorder_blocks_and_partition
5181 && TREE_CODE (decl) == FUNCTION_DECL
5182 && arm_function_in_section_p (decl, current_function_section ()))
5183 return false;
5184
5185 if (lookup_attribute ("long_call", attrs))
5186 return true;
5187
5188 return TARGET_LONG_CALLS;
5189 }
5190
5191 /* Return nonzero if it is ok to make a tail-call to DECL. */
5192 static bool
5193 arm_function_ok_for_sibcall (tree decl, tree exp)
5194 {
5195 unsigned long func_type;
5196
5197 if (cfun->machine->sibcall_blocked)
5198 return false;
5199
5200 /* Never tailcall something for which we have no decl, or if we
5201 are generating code for Thumb-1. */
5202 if (decl == NULL || TARGET_THUMB1)
5203 return false;
5204
5205 /* The PIC register is live on entry to VxWorks PLT entries, so we
5206 must make the call before restoring the PIC register. */
5207 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5208 return false;
5209
5210 /* Cannot tail-call to long calls, since these are out of range of
5211 a branch instruction. */
5212 if (arm_is_long_call_p (decl))
5213 return false;
5214
5215 /* If we are interworking and the function is not declared static
5216 then we can't tail-call it unless we know that it exists in this
5217 compilation unit (since it might be a Thumb routine). */
5218 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5219 return false;
5220
5221 func_type = arm_current_func_type ();
5222 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5223 if (IS_INTERRUPT (func_type))
5224 return false;
5225
5226 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5227 {
5228 /* Check that the return value locations are the same. For
5229 example that we aren't returning a value from the sibling in
5230 a VFP register but then need to transfer it to a core
5231 register. */
5232 rtx a, b;
5233
5234 a = arm_function_value (TREE_TYPE (exp), decl, false);
5235 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5236 cfun->decl, false);
5237 if (!rtx_equal_p (a, b))
5238 return false;
5239 }
5240
5241 /* Never tailcall if function may be called with a misaligned SP. */
5242 if (IS_STACKALIGN (func_type))
5243 return false;
5244
5245 /* The AAPCS says that, on bare-metal, calls to unresolved weak
5246 references should become a NOP. Don't convert such calls into
5247 sibling calls. */
5248 if (TARGET_AAPCS_BASED
5249 && arm_abi == ARM_ABI_AAPCS
5250 && DECL_WEAK (decl))
5251 return false;
5252
5253 /* Everything else is ok. */
5254 return true;
5255 }
5256
5257 \f
5258 /* Addressing mode support functions. */
5259
5260 /* Return nonzero if X is a legitimate immediate operand when compiling
5261 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5262 int
5263 legitimate_pic_operand_p (rtx x)
5264 {
5265 if (GET_CODE (x) == SYMBOL_REF
5266 || (GET_CODE (x) == CONST
5267 && GET_CODE (XEXP (x, 0)) == PLUS
5268 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5269 return 0;
5270
5271 return 1;
5272 }
5273
5274 /* Record that the current function needs a PIC register. Initialize
5275 cfun->machine->pic_reg if we have not already done so. */
5276
5277 static void
5278 require_pic_register (void)
5279 {
5280 /* A lot of the logic here is made obscure by the fact that this
5281 routine gets called as part of the rtx cost estimation process.
5282 We don't want those calls to affect any assumptions about the real
5283 function; and further, we can't call entry_of_function() until we
5284 start the real expansion process. */
5285 if (!crtl->uses_pic_offset_table)
5286 {
5287 gcc_assert (can_create_pseudo_p ());
5288 if (arm_pic_register != INVALID_REGNUM)
5289 {
5290 if (!cfun->machine->pic_reg)
5291 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5292
5293 /* Play games to avoid marking the function as needing pic
5294 if we are being called as part of the cost-estimation
5295 process. */
5296 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5297 crtl->uses_pic_offset_table = 1;
5298 }
5299 else
5300 {
5301 rtx seq, insn;
5302
5303 if (!cfun->machine->pic_reg)
5304 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5305
5306 /* Play games to avoid marking the function as needing pic
5307 if we are being called as part of the cost-estimation
5308 process. */
5309 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5310 {
5311 crtl->uses_pic_offset_table = 1;
5312 start_sequence ();
5313
5314 arm_load_pic_register (0UL);
5315
5316 seq = get_insns ();
5317 end_sequence ();
5318
5319 for (insn = seq; insn; insn = NEXT_INSN (insn))
5320 if (INSN_P (insn))
5321 INSN_LOCATOR (insn) = prologue_locator;
5322
5323 /* We can be called during expansion of PHI nodes, where
5324 we can't yet emit instructions directly in the final
5325 insn stream. Queue the insns on the entry edge, they will
5326 be committed after everything else is expanded. */
5327 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5328 }
5329 }
5330 }
5331 }
5332
5333 rtx
5334 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5335 {
5336 if (GET_CODE (orig) == SYMBOL_REF
5337 || GET_CODE (orig) == LABEL_REF)
5338 {
5339 rtx insn;
5340
5341 if (reg == 0)
5342 {
5343 gcc_assert (can_create_pseudo_p ());
5344 reg = gen_reg_rtx (Pmode);
5345 }
5346
5347 /* VxWorks does not impose a fixed gap between segments; the run-time
5348 gap can be different from the object-file gap. We therefore can't
5349 use GOTOFF unless we are absolutely sure that the symbol is in the
5350 same segment as the GOT. Unfortunately, the flexibility of linker
5351 scripts means that we can't be sure of that in general, so assume
5352 that GOTOFF is never valid on VxWorks. */
5353 if ((GET_CODE (orig) == LABEL_REF
5354 || (GET_CODE (orig) == SYMBOL_REF &&
5355 SYMBOL_REF_LOCAL_P (orig)))
5356 && NEED_GOT_RELOC
5357 && !TARGET_VXWORKS_RTP)
5358 insn = arm_pic_static_addr (orig, reg);
5359 else
5360 {
5361 rtx pat;
5362 rtx mem;
5363
5364 /* If this function doesn't have a pic register, create one now. */
5365 require_pic_register ();
5366
5367 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5368
5369 /* Make the MEM as close to a constant as possible. */
5370 mem = SET_SRC (pat);
5371 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5372 MEM_READONLY_P (mem) = 1;
5373 MEM_NOTRAP_P (mem) = 1;
5374
5375 insn = emit_insn (pat);
5376 }
5377
5378 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5379 by loop. */
5380 set_unique_reg_note (insn, REG_EQUAL, orig);
5381
5382 return reg;
5383 }
5384 else if (GET_CODE (orig) == CONST)
5385 {
5386 rtx base, offset;
5387
5388 if (GET_CODE (XEXP (orig, 0)) == PLUS
5389 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5390 return orig;
5391
5392 /* Handle the case where we have: const (UNSPEC_TLS). */
5393 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5394 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5395 return orig;
5396
5397 /* Handle the case where we have:
5398 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5399 CONST_INT. */
5400 if (GET_CODE (XEXP (orig, 0)) == PLUS
5401 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5402 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5403 {
5404 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5405 return orig;
5406 }
5407
5408 if (reg == 0)
5409 {
5410 gcc_assert (can_create_pseudo_p ());
5411 reg = gen_reg_rtx (Pmode);
5412 }
5413
5414 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5415
5416 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5417 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5418 base == reg ? 0 : reg);
5419
5420 if (GET_CODE (offset) == CONST_INT)
5421 {
5422 /* The base register doesn't really matter, we only want to
5423 test the index for the appropriate mode. */
5424 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5425 {
5426 gcc_assert (can_create_pseudo_p ());
5427 offset = force_reg (Pmode, offset);
5428 }
5429
5430 if (GET_CODE (offset) == CONST_INT)
5431 return plus_constant (Pmode, base, INTVAL (offset));
5432 }
5433
5434 if (GET_MODE_SIZE (mode) > 4
5435 && (GET_MODE_CLASS (mode) == MODE_INT
5436 || TARGET_SOFT_FLOAT))
5437 {
5438 emit_insn (gen_addsi3 (reg, base, offset));
5439 return reg;
5440 }
5441
5442 return gen_rtx_PLUS (Pmode, base, offset);
5443 }
5444
5445 return orig;
5446 }
5447
5448
5449 /* Find a spare register to use during the prolog of a function. */
5450
5451 static int
5452 thumb_find_work_register (unsigned long pushed_regs_mask)
5453 {
5454 int reg;
5455
5456 /* Check the argument registers first as these are call-used. The
5457 register allocation order means that sometimes r3 might be used
5458 but earlier argument registers might not, so check them all. */
5459 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5460 if (!df_regs_ever_live_p (reg))
5461 return reg;
5462
5463 /* Before going on to check the call-saved registers we can try a couple
5464 more ways of deducing that r3 is available. The first is when we are
5465 pushing anonymous arguments onto the stack and we have less than 4
5466 registers worth of fixed arguments(*). In this case r3 will be part of
5467 the variable argument list and so we can be sure that it will be
5468 pushed right at the start of the function. Hence it will be available
5469 for the rest of the prologue.
5470 (*): ie crtl->args.pretend_args_size is greater than 0. */
5471 if (cfun->machine->uses_anonymous_args
5472 && crtl->args.pretend_args_size > 0)
5473 return LAST_ARG_REGNUM;
5474
5475 /* The other case is when we have fixed arguments but less than 4 registers
5476 worth. In this case r3 might be used in the body of the function, but
5477 it is not being used to convey an argument into the function. In theory
5478 we could just check crtl->args.size to see how many bytes are
5479 being passed in argument registers, but it seems that it is unreliable.
5480 Sometimes it will have the value 0 when in fact arguments are being
5481 passed. (See testcase execute/20021111-1.c for an example). So we also
5482 check the args_info.nregs field as well. The problem with this field is
5483 that it makes no allowances for arguments that are passed to the
5484 function but which are not used. Hence we could miss an opportunity
5485 when a function has an unused argument in r3. But it is better to be
5486 safe than to be sorry. */
5487 if (! cfun->machine->uses_anonymous_args
5488 && crtl->args.size >= 0
5489 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5490 && crtl->args.info.nregs < 4)
5491 return LAST_ARG_REGNUM;
5492
5493 /* Otherwise look for a call-saved register that is going to be pushed. */
5494 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5495 if (pushed_regs_mask & (1 << reg))
5496 return reg;
5497
5498 if (TARGET_THUMB2)
5499 {
5500 /* Thumb-2 can use high regs. */
5501 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5502 if (pushed_regs_mask & (1 << reg))
5503 return reg;
5504 }
5505 /* Something went wrong - thumb_compute_save_reg_mask()
5506 should have arranged for a suitable register to be pushed. */
5507 gcc_unreachable ();
5508 }
5509
5510 static GTY(()) int pic_labelno;
5511
5512 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5513 low register. */
5514
5515 void
5516 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5517 {
5518 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5519
5520 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5521 return;
5522
5523 gcc_assert (flag_pic);
5524
5525 pic_reg = cfun->machine->pic_reg;
5526 if (TARGET_VXWORKS_RTP)
5527 {
5528 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5529 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5530 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5531
5532 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5533
5534 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5535 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5536 }
5537 else
5538 {
5539 /* We use an UNSPEC rather than a LABEL_REF because this label
5540 never appears in the code stream. */
5541
5542 labelno = GEN_INT (pic_labelno++);
5543 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5544 l1 = gen_rtx_CONST (VOIDmode, l1);
5545
5546 /* On the ARM the PC register contains 'dot + 8' at the time of the
5547 addition, on the Thumb it is 'dot + 4'. */
5548 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5549 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5550 UNSPEC_GOTSYM_OFF);
5551 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5552
5553 if (TARGET_32BIT)
5554 {
5555 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5556 }
5557 else /* TARGET_THUMB1 */
5558 {
5559 if (arm_pic_register != INVALID_REGNUM
5560 && REGNO (pic_reg) > LAST_LO_REGNUM)
5561 {
5562 /* We will have pushed the pic register, so we should always be
5563 able to find a work register. */
5564 pic_tmp = gen_rtx_REG (SImode,
5565 thumb_find_work_register (saved_regs));
5566 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5567 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5568 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5569 }
5570 else
5571 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5572 }
5573 }
5574
5575 /* Need to emit this whether or not we obey regdecls,
5576 since setjmp/longjmp can cause life info to screw up. */
5577 emit_use (pic_reg);
5578 }
5579
5580 /* Generate code to load the address of a static var when flag_pic is set. */
5581 static rtx
5582 arm_pic_static_addr (rtx orig, rtx reg)
5583 {
5584 rtx l1, labelno, offset_rtx, insn;
5585
5586 gcc_assert (flag_pic);
5587
5588 /* We use an UNSPEC rather than a LABEL_REF because this label
5589 never appears in the code stream. */
5590 labelno = GEN_INT (pic_labelno++);
5591 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5592 l1 = gen_rtx_CONST (VOIDmode, l1);
5593
5594 /* On the ARM the PC register contains 'dot + 8' at the time of the
5595 addition, on the Thumb it is 'dot + 4'. */
5596 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5597 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5598 UNSPEC_SYMBOL_OFFSET);
5599 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5600
5601 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
5602 return insn;
5603 }
5604
5605 /* Return nonzero if X is valid as an ARM state addressing register. */
5606 static int
5607 arm_address_register_rtx_p (rtx x, int strict_p)
5608 {
5609 int regno;
5610
5611 if (GET_CODE (x) != REG)
5612 return 0;
5613
5614 regno = REGNO (x);
5615
5616 if (strict_p)
5617 return ARM_REGNO_OK_FOR_BASE_P (regno);
5618
5619 return (regno <= LAST_ARM_REGNUM
5620 || regno >= FIRST_PSEUDO_REGISTER
5621 || regno == FRAME_POINTER_REGNUM
5622 || regno == ARG_POINTER_REGNUM);
5623 }
5624
5625 /* Return TRUE if this rtx is the difference of a symbol and a label,
5626 and will reduce to a PC-relative relocation in the object file.
5627 Expressions like this can be left alone when generating PIC, rather
5628 than forced through the GOT. */
5629 static int
5630 pcrel_constant_p (rtx x)
5631 {
5632 if (GET_CODE (x) == MINUS)
5633 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5634
5635 return FALSE;
5636 }
5637
5638 /* Return true if X will surely end up in an index register after next
5639 splitting pass. */
5640 static bool
5641 will_be_in_index_register (const_rtx x)
5642 {
5643 /* arm.md: calculate_pic_address will split this into a register. */
5644 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
5645 }
5646
5647 /* Return nonzero if X is a valid ARM state address operand. */
5648 int
5649 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5650 int strict_p)
5651 {
5652 bool use_ldrd;
5653 enum rtx_code code = GET_CODE (x);
5654
5655 if (arm_address_register_rtx_p (x, strict_p))
5656 return 1;
5657
5658 use_ldrd = (TARGET_LDRD
5659 && (mode == DImode
5660 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5661
5662 if (code == POST_INC || code == PRE_DEC
5663 || ((code == PRE_INC || code == POST_DEC)
5664 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5665 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5666
5667 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5668 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5669 && GET_CODE (XEXP (x, 1)) == PLUS
5670 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5671 {
5672 rtx addend = XEXP (XEXP (x, 1), 1);
5673
5674 /* Don't allow ldrd post increment by register because it's hard
5675 to fixup invalid register choices. */
5676 if (use_ldrd
5677 && GET_CODE (x) == POST_MODIFY
5678 && GET_CODE (addend) == REG)
5679 return 0;
5680
5681 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5682 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5683 }
5684
5685 /* After reload constants split into minipools will have addresses
5686 from a LABEL_REF. */
5687 else if (reload_completed
5688 && (code == LABEL_REF
5689 || (code == CONST
5690 && GET_CODE (XEXP (x, 0)) == PLUS
5691 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5692 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5693 return 1;
5694
5695 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5696 return 0;
5697
5698 else if (code == PLUS)
5699 {
5700 rtx xop0 = XEXP (x, 0);
5701 rtx xop1 = XEXP (x, 1);
5702
5703 return ((arm_address_register_rtx_p (xop0, strict_p)
5704 && ((GET_CODE(xop1) == CONST_INT
5705 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5706 || (!strict_p && will_be_in_index_register (xop1))))
5707 || (arm_address_register_rtx_p (xop1, strict_p)
5708 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5709 }
5710
5711 #if 0
5712 /* Reload currently can't handle MINUS, so disable this for now */
5713 else if (GET_CODE (x) == MINUS)
5714 {
5715 rtx xop0 = XEXP (x, 0);
5716 rtx xop1 = XEXP (x, 1);
5717
5718 return (arm_address_register_rtx_p (xop0, strict_p)
5719 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5720 }
5721 #endif
5722
5723 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5724 && code == SYMBOL_REF
5725 && CONSTANT_POOL_ADDRESS_P (x)
5726 && ! (flag_pic
5727 && symbol_mentioned_p (get_pool_constant (x))
5728 && ! pcrel_constant_p (get_pool_constant (x))))
5729 return 1;
5730
5731 return 0;
5732 }
5733
5734 /* Return nonzero if X is a valid Thumb-2 address operand. */
5735 static int
5736 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5737 {
5738 bool use_ldrd;
5739 enum rtx_code code = GET_CODE (x);
5740
5741 if (arm_address_register_rtx_p (x, strict_p))
5742 return 1;
5743
5744 use_ldrd = (TARGET_LDRD
5745 && (mode == DImode
5746 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5747
5748 if (code == POST_INC || code == PRE_DEC
5749 || ((code == PRE_INC || code == POST_DEC)
5750 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5751 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5752
5753 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5754 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5755 && GET_CODE (XEXP (x, 1)) == PLUS
5756 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5757 {
5758 /* Thumb-2 only has autoincrement by constant. */
5759 rtx addend = XEXP (XEXP (x, 1), 1);
5760 HOST_WIDE_INT offset;
5761
5762 if (GET_CODE (addend) != CONST_INT)
5763 return 0;
5764
5765 offset = INTVAL(addend);
5766 if (GET_MODE_SIZE (mode) <= 4)
5767 return (offset > -256 && offset < 256);
5768
5769 return (use_ldrd && offset > -1024 && offset < 1024
5770 && (offset & 3) == 0);
5771 }
5772
5773 /* After reload constants split into minipools will have addresses
5774 from a LABEL_REF. */
5775 else if (reload_completed
5776 && (code == LABEL_REF
5777 || (code == CONST
5778 && GET_CODE (XEXP (x, 0)) == PLUS
5779 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5780 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5781 return 1;
5782
5783 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5784 return 0;
5785
5786 else if (code == PLUS)
5787 {
5788 rtx xop0 = XEXP (x, 0);
5789 rtx xop1 = XEXP (x, 1);
5790
5791 return ((arm_address_register_rtx_p (xop0, strict_p)
5792 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5793 || (!strict_p && will_be_in_index_register (xop1))))
5794 || (arm_address_register_rtx_p (xop1, strict_p)
5795 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5796 }
5797
5798 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5799 && code == SYMBOL_REF
5800 && CONSTANT_POOL_ADDRESS_P (x)
5801 && ! (flag_pic
5802 && symbol_mentioned_p (get_pool_constant (x))
5803 && ! pcrel_constant_p (get_pool_constant (x))))
5804 return 1;
5805
5806 return 0;
5807 }
5808
5809 /* Return nonzero if INDEX is valid for an address index operand in
5810 ARM state. */
5811 static int
5812 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5813 int strict_p)
5814 {
5815 HOST_WIDE_INT range;
5816 enum rtx_code code = GET_CODE (index);
5817
5818 /* Standard coprocessor addressing modes. */
5819 if (TARGET_HARD_FLOAT
5820 && TARGET_VFP
5821 && (mode == SFmode || mode == DFmode))
5822 return (code == CONST_INT && INTVAL (index) < 1024
5823 && INTVAL (index) > -1024
5824 && (INTVAL (index) & 3) == 0);
5825
5826 /* For quad modes, we restrict the constant offset to be slightly less
5827 than what the instruction format permits. We do this because for
5828 quad mode moves, we will actually decompose them into two separate
5829 double-mode reads or writes. INDEX must therefore be a valid
5830 (double-mode) offset and so should INDEX+8. */
5831 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5832 return (code == CONST_INT
5833 && INTVAL (index) < 1016
5834 && INTVAL (index) > -1024
5835 && (INTVAL (index) & 3) == 0);
5836
5837 /* We have no such constraint on double mode offsets, so we permit the
5838 full range of the instruction format. */
5839 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5840 return (code == CONST_INT
5841 && INTVAL (index) < 1024
5842 && INTVAL (index) > -1024
5843 && (INTVAL (index) & 3) == 0);
5844
5845 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5846 return (code == CONST_INT
5847 && INTVAL (index) < 1024
5848 && INTVAL (index) > -1024
5849 && (INTVAL (index) & 3) == 0);
5850
5851 if (arm_address_register_rtx_p (index, strict_p)
5852 && (GET_MODE_SIZE (mode) <= 4))
5853 return 1;
5854
5855 if (mode == DImode || mode == DFmode)
5856 {
5857 if (code == CONST_INT)
5858 {
5859 HOST_WIDE_INT val = INTVAL (index);
5860
5861 if (TARGET_LDRD)
5862 return val > -256 && val < 256;
5863 else
5864 return val > -4096 && val < 4092;
5865 }
5866
5867 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5868 }
5869
5870 if (GET_MODE_SIZE (mode) <= 4
5871 && ! (arm_arch4
5872 && (mode == HImode
5873 || mode == HFmode
5874 || (mode == QImode && outer == SIGN_EXTEND))))
5875 {
5876 if (code == MULT)
5877 {
5878 rtx xiop0 = XEXP (index, 0);
5879 rtx xiop1 = XEXP (index, 1);
5880
5881 return ((arm_address_register_rtx_p (xiop0, strict_p)
5882 && power_of_two_operand (xiop1, SImode))
5883 || (arm_address_register_rtx_p (xiop1, strict_p)
5884 && power_of_two_operand (xiop0, SImode)));
5885 }
5886 else if (code == LSHIFTRT || code == ASHIFTRT
5887 || code == ASHIFT || code == ROTATERT)
5888 {
5889 rtx op = XEXP (index, 1);
5890
5891 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5892 && GET_CODE (op) == CONST_INT
5893 && INTVAL (op) > 0
5894 && INTVAL (op) <= 31);
5895 }
5896 }
5897
5898 /* For ARM v4 we may be doing a sign-extend operation during the
5899 load. */
5900 if (arm_arch4)
5901 {
5902 if (mode == HImode
5903 || mode == HFmode
5904 || (outer == SIGN_EXTEND && mode == QImode))
5905 range = 256;
5906 else
5907 range = 4096;
5908 }
5909 else
5910 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5911
5912 return (code == CONST_INT
5913 && INTVAL (index) < range
5914 && INTVAL (index) > -range);
5915 }
5916
5917 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5918 index operand. i.e. 1, 2, 4 or 8. */
5919 static bool
5920 thumb2_index_mul_operand (rtx op)
5921 {
5922 HOST_WIDE_INT val;
5923
5924 if (GET_CODE(op) != CONST_INT)
5925 return false;
5926
5927 val = INTVAL(op);
5928 return (val == 1 || val == 2 || val == 4 || val == 8);
5929 }
5930
5931 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5932 static int
5933 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5934 {
5935 enum rtx_code code = GET_CODE (index);
5936
5937 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5938 /* Standard coprocessor addressing modes. */
5939 if (TARGET_HARD_FLOAT
5940 && TARGET_VFP
5941 && (mode == SFmode || mode == DFmode))
5942 return (code == CONST_INT && INTVAL (index) < 1024
5943 /* Thumb-2 allows only > -256 index range for it's core register
5944 load/stores. Since we allow SF/DF in core registers, we have
5945 to use the intersection between -256~4096 (core) and -1024~1024
5946 (coprocessor). */
5947 && INTVAL (index) > -256
5948 && (INTVAL (index) & 3) == 0);
5949
5950 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5951 {
5952 /* For DImode assume values will usually live in core regs
5953 and only allow LDRD addressing modes. */
5954 if (!TARGET_LDRD || mode != DImode)
5955 return (code == CONST_INT
5956 && INTVAL (index) < 1024
5957 && INTVAL (index) > -1024
5958 && (INTVAL (index) & 3) == 0);
5959 }
5960
5961 /* For quad modes, we restrict the constant offset to be slightly less
5962 than what the instruction format permits. We do this because for
5963 quad mode moves, we will actually decompose them into two separate
5964 double-mode reads or writes. INDEX must therefore be a valid
5965 (double-mode) offset and so should INDEX+8. */
5966 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5967 return (code == CONST_INT
5968 && INTVAL (index) < 1016
5969 && INTVAL (index) > -1024
5970 && (INTVAL (index) & 3) == 0);
5971
5972 /* We have no such constraint on double mode offsets, so we permit the
5973 full range of the instruction format. */
5974 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5975 return (code == CONST_INT
5976 && INTVAL (index) < 1024
5977 && INTVAL (index) > -1024
5978 && (INTVAL (index) & 3) == 0);
5979
5980 if (arm_address_register_rtx_p (index, strict_p)
5981 && (GET_MODE_SIZE (mode) <= 4))
5982 return 1;
5983
5984 if (mode == DImode || mode == DFmode)
5985 {
5986 if (code == CONST_INT)
5987 {
5988 HOST_WIDE_INT val = INTVAL (index);
5989 /* ??? Can we assume ldrd for thumb2? */
5990 /* Thumb-2 ldrd only has reg+const addressing modes. */
5991 /* ldrd supports offsets of +-1020.
5992 However the ldr fallback does not. */
5993 return val > -256 && val < 256 && (val & 3) == 0;
5994 }
5995 else
5996 return 0;
5997 }
5998
5999 if (code == MULT)
6000 {
6001 rtx xiop0 = XEXP (index, 0);
6002 rtx xiop1 = XEXP (index, 1);
6003
6004 return ((arm_address_register_rtx_p (xiop0, strict_p)
6005 && thumb2_index_mul_operand (xiop1))
6006 || (arm_address_register_rtx_p (xiop1, strict_p)
6007 && thumb2_index_mul_operand (xiop0)));
6008 }
6009 else if (code == ASHIFT)
6010 {
6011 rtx op = XEXP (index, 1);
6012
6013 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6014 && GET_CODE (op) == CONST_INT
6015 && INTVAL (op) > 0
6016 && INTVAL (op) <= 3);
6017 }
6018
6019 return (code == CONST_INT
6020 && INTVAL (index) < 4096
6021 && INTVAL (index) > -256);
6022 }
6023
6024 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6025 static int
6026 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6027 {
6028 int regno;
6029
6030 if (GET_CODE (x) != REG)
6031 return 0;
6032
6033 regno = REGNO (x);
6034
6035 if (strict_p)
6036 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6037
6038 return (regno <= LAST_LO_REGNUM
6039 || regno > LAST_VIRTUAL_REGISTER
6040 || regno == FRAME_POINTER_REGNUM
6041 || (GET_MODE_SIZE (mode) >= 4
6042 && (regno == STACK_POINTER_REGNUM
6043 || regno >= FIRST_PSEUDO_REGISTER
6044 || x == hard_frame_pointer_rtx
6045 || x == arg_pointer_rtx)));
6046 }
6047
6048 /* Return nonzero if x is a legitimate index register. This is the case
6049 for any base register that can access a QImode object. */
6050 inline static int
6051 thumb1_index_register_rtx_p (rtx x, int strict_p)
6052 {
6053 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6054 }
6055
6056 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6057
6058 The AP may be eliminated to either the SP or the FP, so we use the
6059 least common denominator, e.g. SImode, and offsets from 0 to 64.
6060
6061 ??? Verify whether the above is the right approach.
6062
6063 ??? Also, the FP may be eliminated to the SP, so perhaps that
6064 needs special handling also.
6065
6066 ??? Look at how the mips16 port solves this problem. It probably uses
6067 better ways to solve some of these problems.
6068
6069 Although it is not incorrect, we don't accept QImode and HImode
6070 addresses based on the frame pointer or arg pointer until the
6071 reload pass starts. This is so that eliminating such addresses
6072 into stack based ones won't produce impossible code. */
6073 int
6074 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6075 {
6076 /* ??? Not clear if this is right. Experiment. */
6077 if (GET_MODE_SIZE (mode) < 4
6078 && !(reload_in_progress || reload_completed)
6079 && (reg_mentioned_p (frame_pointer_rtx, x)
6080 || reg_mentioned_p (arg_pointer_rtx, x)
6081 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6082 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6083 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6084 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6085 return 0;
6086
6087 /* Accept any base register. SP only in SImode or larger. */
6088 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6089 return 1;
6090
6091 /* This is PC relative data before arm_reorg runs. */
6092 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6093 && GET_CODE (x) == SYMBOL_REF
6094 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6095 return 1;
6096
6097 /* This is PC relative data after arm_reorg runs. */
6098 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6099 && reload_completed
6100 && (GET_CODE (x) == LABEL_REF
6101 || (GET_CODE (x) == CONST
6102 && GET_CODE (XEXP (x, 0)) == PLUS
6103 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6104 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
6105 return 1;
6106
6107 /* Post-inc indexing only supported for SImode and larger. */
6108 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6109 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6110 return 1;
6111
6112 else if (GET_CODE (x) == PLUS)
6113 {
6114 /* REG+REG address can be any two index registers. */
6115 /* We disallow FRAME+REG addressing since we know that FRAME
6116 will be replaced with STACK, and SP relative addressing only
6117 permits SP+OFFSET. */
6118 if (GET_MODE_SIZE (mode) <= 4
6119 && XEXP (x, 0) != frame_pointer_rtx
6120 && XEXP (x, 1) != frame_pointer_rtx
6121 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6122 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
6123 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
6124 return 1;
6125
6126 /* REG+const has 5-7 bit offset for non-SP registers. */
6127 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6128 || XEXP (x, 0) == arg_pointer_rtx)
6129 && GET_CODE (XEXP (x, 1)) == CONST_INT
6130 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6131 return 1;
6132
6133 /* REG+const has 10-bit offset for SP, but only SImode and
6134 larger is supported. */
6135 /* ??? Should probably check for DI/DFmode overflow here
6136 just like GO_IF_LEGITIMATE_OFFSET does. */
6137 else if (GET_CODE (XEXP (x, 0)) == REG
6138 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
6139 && GET_MODE_SIZE (mode) >= 4
6140 && GET_CODE (XEXP (x, 1)) == CONST_INT
6141 && INTVAL (XEXP (x, 1)) >= 0
6142 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6143 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6144 return 1;
6145
6146 else if (GET_CODE (XEXP (x, 0)) == REG
6147 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6148 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6149 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6150 && REGNO (XEXP (x, 0))
6151 <= LAST_VIRTUAL_POINTER_REGISTER))
6152 && GET_MODE_SIZE (mode) >= 4
6153 && GET_CODE (XEXP (x, 1)) == CONST_INT
6154 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6155 return 1;
6156 }
6157
6158 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6159 && GET_MODE_SIZE (mode) == 4
6160 && GET_CODE (x) == SYMBOL_REF
6161 && CONSTANT_POOL_ADDRESS_P (x)
6162 && ! (flag_pic
6163 && symbol_mentioned_p (get_pool_constant (x))
6164 && ! pcrel_constant_p (get_pool_constant (x))))
6165 return 1;
6166
6167 return 0;
6168 }
6169
6170 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6171 instruction of mode MODE. */
6172 int
6173 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6174 {
6175 switch (GET_MODE_SIZE (mode))
6176 {
6177 case 1:
6178 return val >= 0 && val < 32;
6179
6180 case 2:
6181 return val >= 0 && val < 64 && (val & 1) == 0;
6182
6183 default:
6184 return (val >= 0
6185 && (val + GET_MODE_SIZE (mode)) <= 128
6186 && (val & 3) == 0);
6187 }
6188 }
6189
6190 bool
6191 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6192 {
6193 if (TARGET_ARM)
6194 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6195 else if (TARGET_THUMB2)
6196 return thumb2_legitimate_address_p (mode, x, strict_p);
6197 else /* if (TARGET_THUMB1) */
6198 return thumb1_legitimate_address_p (mode, x, strict_p);
6199 }
6200
6201 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
6202
6203 Given an rtx X being reloaded into a reg required to be
6204 in class CLASS, return the class of reg to actually use.
6205 In general this is just CLASS, but for the Thumb core registers and
6206 immediate constants we prefer a LO_REGS class or a subset. */
6207
6208 static reg_class_t
6209 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
6210 {
6211 if (TARGET_32BIT)
6212 return rclass;
6213 else
6214 {
6215 if (rclass == GENERAL_REGS
6216 || rclass == HI_REGS
6217 || rclass == NO_REGS
6218 || rclass == STACK_REG)
6219 return LO_REGS;
6220 else
6221 return rclass;
6222 }
6223 }
6224
6225 /* Build the SYMBOL_REF for __tls_get_addr. */
6226
6227 static GTY(()) rtx tls_get_addr_libfunc;
6228
6229 static rtx
6230 get_tls_get_addr (void)
6231 {
6232 if (!tls_get_addr_libfunc)
6233 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6234 return tls_get_addr_libfunc;
6235 }
6236
6237 static rtx
6238 arm_load_tp (rtx target)
6239 {
6240 if (!target)
6241 target = gen_reg_rtx (SImode);
6242
6243 if (TARGET_HARD_TP)
6244 {
6245 /* Can return in any reg. */
6246 emit_insn (gen_load_tp_hard (target));
6247 }
6248 else
6249 {
6250 /* Always returned in r0. Immediately copy the result into a pseudo,
6251 otherwise other uses of r0 (e.g. setting up function arguments) may
6252 clobber the value. */
6253
6254 rtx tmp;
6255
6256 emit_insn (gen_load_tp_soft ());
6257
6258 tmp = gen_rtx_REG (SImode, 0);
6259 emit_move_insn (target, tmp);
6260 }
6261 return target;
6262 }
6263
6264 static rtx
6265 load_tls_operand (rtx x, rtx reg)
6266 {
6267 rtx tmp;
6268
6269 if (reg == NULL_RTX)
6270 reg = gen_reg_rtx (SImode);
6271
6272 tmp = gen_rtx_CONST (SImode, x);
6273
6274 emit_move_insn (reg, tmp);
6275
6276 return reg;
6277 }
6278
6279 static rtx
6280 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6281 {
6282 rtx insns, label, labelno, sum;
6283
6284 gcc_assert (reloc != TLS_DESCSEQ);
6285 start_sequence ();
6286
6287 labelno = GEN_INT (pic_labelno++);
6288 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6289 label = gen_rtx_CONST (VOIDmode, label);
6290
6291 sum = gen_rtx_UNSPEC (Pmode,
6292 gen_rtvec (4, x, GEN_INT (reloc), label,
6293 GEN_INT (TARGET_ARM ? 8 : 4)),
6294 UNSPEC_TLS);
6295 reg = load_tls_operand (sum, reg);
6296
6297 if (TARGET_ARM)
6298 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6299 else
6300 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6301
6302 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6303 LCT_PURE, /* LCT_CONST? */
6304 Pmode, 1, reg, Pmode);
6305
6306 insns = get_insns ();
6307 end_sequence ();
6308
6309 return insns;
6310 }
6311
6312 static rtx
6313 arm_tls_descseq_addr (rtx x, rtx reg)
6314 {
6315 rtx labelno = GEN_INT (pic_labelno++);
6316 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6317 rtx sum = gen_rtx_UNSPEC (Pmode,
6318 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6319 gen_rtx_CONST (VOIDmode, label),
6320 GEN_INT (!TARGET_ARM)),
6321 UNSPEC_TLS);
6322 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6323
6324 emit_insn (gen_tlscall (x, labelno));
6325 if (!reg)
6326 reg = gen_reg_rtx (SImode);
6327 else
6328 gcc_assert (REGNO (reg) != 0);
6329
6330 emit_move_insn (reg, reg0);
6331
6332 return reg;
6333 }
6334
6335 rtx
6336 legitimize_tls_address (rtx x, rtx reg)
6337 {
6338 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6339 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6340
6341 switch (model)
6342 {
6343 case TLS_MODEL_GLOBAL_DYNAMIC:
6344 if (TARGET_GNU2_TLS)
6345 {
6346 reg = arm_tls_descseq_addr (x, reg);
6347
6348 tp = arm_load_tp (NULL_RTX);
6349
6350 dest = gen_rtx_PLUS (Pmode, tp, reg);
6351 }
6352 else
6353 {
6354 /* Original scheme */
6355 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6356 dest = gen_reg_rtx (Pmode);
6357 emit_libcall_block (insns, dest, ret, x);
6358 }
6359 return dest;
6360
6361 case TLS_MODEL_LOCAL_DYNAMIC:
6362 if (TARGET_GNU2_TLS)
6363 {
6364 reg = arm_tls_descseq_addr (x, reg);
6365
6366 tp = arm_load_tp (NULL_RTX);
6367
6368 dest = gen_rtx_PLUS (Pmode, tp, reg);
6369 }
6370 else
6371 {
6372 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6373
6374 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6375 share the LDM result with other LD model accesses. */
6376 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6377 UNSPEC_TLS);
6378 dest = gen_reg_rtx (Pmode);
6379 emit_libcall_block (insns, dest, ret, eqv);
6380
6381 /* Load the addend. */
6382 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
6383 GEN_INT (TLS_LDO32)),
6384 UNSPEC_TLS);
6385 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6386 dest = gen_rtx_PLUS (Pmode, dest, addend);
6387 }
6388 return dest;
6389
6390 case TLS_MODEL_INITIAL_EXEC:
6391 labelno = GEN_INT (pic_labelno++);
6392 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6393 label = gen_rtx_CONST (VOIDmode, label);
6394 sum = gen_rtx_UNSPEC (Pmode,
6395 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6396 GEN_INT (TARGET_ARM ? 8 : 4)),
6397 UNSPEC_TLS);
6398 reg = load_tls_operand (sum, reg);
6399
6400 if (TARGET_ARM)
6401 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6402 else if (TARGET_THUMB2)
6403 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6404 else
6405 {
6406 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6407 emit_move_insn (reg, gen_const_mem (SImode, reg));
6408 }
6409
6410 tp = arm_load_tp (NULL_RTX);
6411
6412 return gen_rtx_PLUS (Pmode, tp, reg);
6413
6414 case TLS_MODEL_LOCAL_EXEC:
6415 tp = arm_load_tp (NULL_RTX);
6416
6417 reg = gen_rtx_UNSPEC (Pmode,
6418 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6419 UNSPEC_TLS);
6420 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6421
6422 return gen_rtx_PLUS (Pmode, tp, reg);
6423
6424 default:
6425 abort ();
6426 }
6427 }
6428
6429 /* Try machine-dependent ways of modifying an illegitimate address
6430 to be legitimate. If we find one, return the new, valid address. */
6431 rtx
6432 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6433 {
6434 if (!TARGET_ARM)
6435 {
6436 /* TODO: legitimize_address for Thumb2. */
6437 if (TARGET_THUMB2)
6438 return x;
6439 return thumb_legitimize_address (x, orig_x, mode);
6440 }
6441
6442 if (arm_tls_symbol_p (x))
6443 return legitimize_tls_address (x, NULL_RTX);
6444
6445 if (GET_CODE (x) == PLUS)
6446 {
6447 rtx xop0 = XEXP (x, 0);
6448 rtx xop1 = XEXP (x, 1);
6449
6450 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6451 xop0 = force_reg (SImode, xop0);
6452
6453 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6454 xop1 = force_reg (SImode, xop1);
6455
6456 if (ARM_BASE_REGISTER_RTX_P (xop0)
6457 && GET_CODE (xop1) == CONST_INT)
6458 {
6459 HOST_WIDE_INT n, low_n;
6460 rtx base_reg, val;
6461 n = INTVAL (xop1);
6462
6463 /* VFP addressing modes actually allow greater offsets, but for
6464 now we just stick with the lowest common denominator. */
6465 if (mode == DImode
6466 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6467 {
6468 low_n = n & 0x0f;
6469 n &= ~0x0f;
6470 if (low_n > 4)
6471 {
6472 n += 16;
6473 low_n -= 16;
6474 }
6475 }
6476 else
6477 {
6478 low_n = ((mode) == TImode ? 0
6479 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6480 n -= low_n;
6481 }
6482
6483 base_reg = gen_reg_rtx (SImode);
6484 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
6485 emit_move_insn (base_reg, val);
6486 x = plus_constant (Pmode, base_reg, low_n);
6487 }
6488 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6489 x = gen_rtx_PLUS (SImode, xop0, xop1);
6490 }
6491
6492 /* XXX We don't allow MINUS any more -- see comment in
6493 arm_legitimate_address_outer_p (). */
6494 else if (GET_CODE (x) == MINUS)
6495 {
6496 rtx xop0 = XEXP (x, 0);
6497 rtx xop1 = XEXP (x, 1);
6498
6499 if (CONSTANT_P (xop0))
6500 xop0 = force_reg (SImode, xop0);
6501
6502 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6503 xop1 = force_reg (SImode, xop1);
6504
6505 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6506 x = gen_rtx_MINUS (SImode, xop0, xop1);
6507 }
6508
6509 /* Make sure to take full advantage of the pre-indexed addressing mode
6510 with absolute addresses which often allows for the base register to
6511 be factorized for multiple adjacent memory references, and it might
6512 even allows for the mini pool to be avoided entirely. */
6513 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6514 {
6515 unsigned int bits;
6516 HOST_WIDE_INT mask, base, index;
6517 rtx base_reg;
6518
6519 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6520 use a 8-bit index. So let's use a 12-bit index for SImode only and
6521 hope that arm_gen_constant will enable ldrb to use more bits. */
6522 bits = (mode == SImode) ? 12 : 8;
6523 mask = (1 << bits) - 1;
6524 base = INTVAL (x) & ~mask;
6525 index = INTVAL (x) & mask;
6526 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6527 {
6528 /* It'll most probably be more efficient to generate the base
6529 with more bits set and use a negative index instead. */
6530 base |= mask;
6531 index -= mask;
6532 }
6533 base_reg = force_reg (SImode, GEN_INT (base));
6534 x = plus_constant (Pmode, base_reg, index);
6535 }
6536
6537 if (flag_pic)
6538 {
6539 /* We need to find and carefully transform any SYMBOL and LABEL
6540 references; so go back to the original address expression. */
6541 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6542
6543 if (new_x != orig_x)
6544 x = new_x;
6545 }
6546
6547 return x;
6548 }
6549
6550
6551 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6552 to be legitimate. If we find one, return the new, valid address. */
6553 rtx
6554 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6555 {
6556 if (arm_tls_symbol_p (x))
6557 return legitimize_tls_address (x, NULL_RTX);
6558
6559 if (GET_CODE (x) == PLUS
6560 && GET_CODE (XEXP (x, 1)) == CONST_INT
6561 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6562 || INTVAL (XEXP (x, 1)) < 0))
6563 {
6564 rtx xop0 = XEXP (x, 0);
6565 rtx xop1 = XEXP (x, 1);
6566 HOST_WIDE_INT offset = INTVAL (xop1);
6567
6568 /* Try and fold the offset into a biasing of the base register and
6569 then offsetting that. Don't do this when optimizing for space
6570 since it can cause too many CSEs. */
6571 if (optimize_size && offset >= 0
6572 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6573 {
6574 HOST_WIDE_INT delta;
6575
6576 if (offset >= 256)
6577 delta = offset - (256 - GET_MODE_SIZE (mode));
6578 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6579 delta = 31 * GET_MODE_SIZE (mode);
6580 else
6581 delta = offset & (~31 * GET_MODE_SIZE (mode));
6582
6583 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
6584 NULL_RTX);
6585 x = plus_constant (Pmode, xop0, delta);
6586 }
6587 else if (offset < 0 && offset > -256)
6588 /* Small negative offsets are best done with a subtract before the
6589 dereference, forcing these into a register normally takes two
6590 instructions. */
6591 x = force_operand (x, NULL_RTX);
6592 else
6593 {
6594 /* For the remaining cases, force the constant into a register. */
6595 xop1 = force_reg (SImode, xop1);
6596 x = gen_rtx_PLUS (SImode, xop0, xop1);
6597 }
6598 }
6599 else if (GET_CODE (x) == PLUS
6600 && s_register_operand (XEXP (x, 1), SImode)
6601 && !s_register_operand (XEXP (x, 0), SImode))
6602 {
6603 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6604
6605 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6606 }
6607
6608 if (flag_pic)
6609 {
6610 /* We need to find and carefully transform any SYMBOL and LABEL
6611 references; so go back to the original address expression. */
6612 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6613
6614 if (new_x != orig_x)
6615 x = new_x;
6616 }
6617
6618 return x;
6619 }
6620
6621 bool
6622 arm_legitimize_reload_address (rtx *p,
6623 enum machine_mode mode,
6624 int opnum, int type,
6625 int ind_levels ATTRIBUTE_UNUSED)
6626 {
6627 /* We must recognize output that we have already generated ourselves. */
6628 if (GET_CODE (*p) == PLUS
6629 && GET_CODE (XEXP (*p, 0)) == PLUS
6630 && GET_CODE (XEXP (XEXP (*p, 0), 0)) == REG
6631 && GET_CODE (XEXP (XEXP (*p, 0), 1)) == CONST_INT
6632 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6633 {
6634 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6635 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6636 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6637 return true;
6638 }
6639
6640 if (GET_CODE (*p) == PLUS
6641 && GET_CODE (XEXP (*p, 0)) == REG
6642 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6643 /* If the base register is equivalent to a constant, let the generic
6644 code handle it. Otherwise we will run into problems if a future
6645 reload pass decides to rematerialize the constant. */
6646 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
6647 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6648 {
6649 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6650 HOST_WIDE_INT low, high;
6651
6652 /* Detect coprocessor load/stores. */
6653 bool coproc_p = ((TARGET_HARD_FLOAT
6654 && TARGET_VFP
6655 && (mode == SFmode || mode == DFmode))
6656 || (TARGET_REALLY_IWMMXT
6657 && VALID_IWMMXT_REG_MODE (mode))
6658 || (TARGET_NEON
6659 && (VALID_NEON_DREG_MODE (mode)
6660 || VALID_NEON_QREG_MODE (mode))));
6661
6662 /* For some conditions, bail out when lower two bits are unaligned. */
6663 if ((val & 0x3) != 0
6664 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6665 && (coproc_p
6666 /* For DI, and DF under soft-float: */
6667 || ((mode == DImode || mode == DFmode)
6668 /* Without ldrd, we use stm/ldm, which does not
6669 fair well with unaligned bits. */
6670 && (! TARGET_LDRD
6671 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6672 || TARGET_THUMB2))))
6673 return false;
6674
6675 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6676 of which the (reg+high) gets turned into a reload add insn,
6677 we try to decompose the index into high/low values that can often
6678 also lead to better reload CSE.
6679 For example:
6680 ldr r0, [r2, #4100] // Offset too large
6681 ldr r1, [r2, #4104] // Offset too large
6682
6683 is best reloaded as:
6684 add t1, r2, #4096
6685 ldr r0, [t1, #4]
6686 add t2, r2, #4096
6687 ldr r1, [t2, #8]
6688
6689 which post-reload CSE can simplify in most cases to eliminate the
6690 second add instruction:
6691 add t1, r2, #4096
6692 ldr r0, [t1, #4]
6693 ldr r1, [t1, #8]
6694
6695 The idea here is that we want to split out the bits of the constant
6696 as a mask, rather than as subtracting the maximum offset that the
6697 respective type of load/store used can handle.
6698
6699 When encountering negative offsets, we can still utilize it even if
6700 the overall offset is positive; sometimes this may lead to an immediate
6701 that can be constructed with fewer instructions.
6702 For example:
6703 ldr r0, [r2, #0x3FFFFC]
6704
6705 This is best reloaded as:
6706 add t1, r2, #0x400000
6707 ldr r0, [t1, #-4]
6708
6709 The trick for spotting this for a load insn with N bits of offset
6710 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6711 negative offset that is going to make bit N and all the bits below
6712 it become zero in the remainder part.
6713
6714 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6715 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6716 used in most cases of ARM load/store instructions. */
6717
6718 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6719 (((VAL) & ((1 << (N)) - 1)) \
6720 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6721 : 0)
6722
6723 if (coproc_p)
6724 {
6725 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6726
6727 /* NEON quad-word load/stores are made of two double-word accesses,
6728 so the valid index range is reduced by 8. Treat as 9-bit range if
6729 we go over it. */
6730 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6731 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6732 }
6733 else if (GET_MODE_SIZE (mode) == 8)
6734 {
6735 if (TARGET_LDRD)
6736 low = (TARGET_THUMB2
6737 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6738 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6739 else
6740 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6741 to access doublewords. The supported load/store offsets are
6742 -8, -4, and 4, which we try to produce here. */
6743 low = ((val & 0xf) ^ 0x8) - 0x8;
6744 }
6745 else if (GET_MODE_SIZE (mode) < 8)
6746 {
6747 /* NEON element load/stores do not have an offset. */
6748 if (TARGET_NEON_FP16 && mode == HFmode)
6749 return false;
6750
6751 if (TARGET_THUMB2)
6752 {
6753 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6754 Try the wider 12-bit range first, and re-try if the result
6755 is out of range. */
6756 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6757 if (low < -255)
6758 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6759 }
6760 else
6761 {
6762 if (mode == HImode || mode == HFmode)
6763 {
6764 if (arm_arch4)
6765 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6766 else
6767 {
6768 /* The storehi/movhi_bytes fallbacks can use only
6769 [-4094,+4094] of the full ldrb/strb index range. */
6770 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6771 if (low == 4095 || low == -4095)
6772 return false;
6773 }
6774 }
6775 else
6776 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6777 }
6778 }
6779 else
6780 return false;
6781
6782 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6783 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6784 - (unsigned HOST_WIDE_INT) 0x80000000);
6785 /* Check for overflow or zero */
6786 if (low == 0 || high == 0 || (high + low != val))
6787 return false;
6788
6789 /* Reload the high part into a base reg; leave the low part
6790 in the mem. */
6791 *p = gen_rtx_PLUS (GET_MODE (*p),
6792 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6793 GEN_INT (high)),
6794 GEN_INT (low));
6795 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6796 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6797 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6798 return true;
6799 }
6800
6801 return false;
6802 }
6803
6804 rtx
6805 thumb_legitimize_reload_address (rtx *x_p,
6806 enum machine_mode mode,
6807 int opnum, int type,
6808 int ind_levels ATTRIBUTE_UNUSED)
6809 {
6810 rtx x = *x_p;
6811
6812 if (GET_CODE (x) == PLUS
6813 && GET_MODE_SIZE (mode) < 4
6814 && REG_P (XEXP (x, 0))
6815 && XEXP (x, 0) == stack_pointer_rtx
6816 && GET_CODE (XEXP (x, 1)) == CONST_INT
6817 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6818 {
6819 rtx orig_x = x;
6820
6821 x = copy_rtx (x);
6822 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6823 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6824 return x;
6825 }
6826
6827 /* If both registers are hi-regs, then it's better to reload the
6828 entire expression rather than each register individually. That
6829 only requires one reload register rather than two. */
6830 if (GET_CODE (x) == PLUS
6831 && REG_P (XEXP (x, 0))
6832 && REG_P (XEXP (x, 1))
6833 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6834 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6835 {
6836 rtx orig_x = x;
6837
6838 x = copy_rtx (x);
6839 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6840 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6841 return x;
6842 }
6843
6844 return NULL;
6845 }
6846
6847 /* Test for various thread-local symbols. */
6848
6849 /* Return TRUE if X is a thread-local symbol. */
6850
6851 static bool
6852 arm_tls_symbol_p (rtx x)
6853 {
6854 if (! TARGET_HAVE_TLS)
6855 return false;
6856
6857 if (GET_CODE (x) != SYMBOL_REF)
6858 return false;
6859
6860 return SYMBOL_REF_TLS_MODEL (x) != 0;
6861 }
6862
6863 /* Helper for arm_tls_referenced_p. */
6864
6865 static int
6866 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6867 {
6868 if (GET_CODE (*x) == SYMBOL_REF)
6869 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6870
6871 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6872 TLS offsets, not real symbol references. */
6873 if (GET_CODE (*x) == UNSPEC
6874 && XINT (*x, 1) == UNSPEC_TLS)
6875 return -1;
6876
6877 return 0;
6878 }
6879
6880 /* Return TRUE if X contains any TLS symbol references. */
6881
6882 bool
6883 arm_tls_referenced_p (rtx x)
6884 {
6885 if (! TARGET_HAVE_TLS)
6886 return false;
6887
6888 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6889 }
6890
6891 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6892
6893 On the ARM, allow any integer (invalid ones are removed later by insn
6894 patterns), nice doubles and symbol_refs which refer to the function's
6895 constant pool XXX.
6896
6897 When generating pic allow anything. */
6898
6899 static bool
6900 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
6901 {
6902 /* At present, we have no support for Neon structure constants, so forbid
6903 them here. It might be possible to handle simple cases like 0 and -1
6904 in future. */
6905 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
6906 return false;
6907
6908 return flag_pic || !label_mentioned_p (x);
6909 }
6910
6911 static bool
6912 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6913 {
6914 return (GET_CODE (x) == CONST_INT
6915 || GET_CODE (x) == CONST_DOUBLE
6916 || CONSTANT_ADDRESS_P (x)
6917 || flag_pic);
6918 }
6919
6920 static bool
6921 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
6922 {
6923 return (!arm_cannot_force_const_mem (mode, x)
6924 && (TARGET_32BIT
6925 ? arm_legitimate_constant_p_1 (mode, x)
6926 : thumb_legitimate_constant_p (mode, x)));
6927 }
6928
6929 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6930
6931 static bool
6932 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6933 {
6934 rtx base, offset;
6935
6936 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6937 {
6938 split_const (x, &base, &offset);
6939 if (GET_CODE (base) == SYMBOL_REF
6940 && !offset_within_block_p (base, INTVAL (offset)))
6941 return true;
6942 }
6943 return arm_tls_referenced_p (x);
6944 }
6945 \f
6946 #define REG_OR_SUBREG_REG(X) \
6947 (GET_CODE (X) == REG \
6948 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6949
6950 #define REG_OR_SUBREG_RTX(X) \
6951 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6952
6953 static inline int
6954 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6955 {
6956 enum machine_mode mode = GET_MODE (x);
6957 int total;
6958
6959 switch (code)
6960 {
6961 case ASHIFT:
6962 case ASHIFTRT:
6963 case LSHIFTRT:
6964 case ROTATERT:
6965 case PLUS:
6966 case MINUS:
6967 case COMPARE:
6968 case NEG:
6969 case NOT:
6970 return COSTS_N_INSNS (1);
6971
6972 case MULT:
6973 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6974 {
6975 int cycles = 0;
6976 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6977
6978 while (i)
6979 {
6980 i >>= 2;
6981 cycles++;
6982 }
6983 return COSTS_N_INSNS (2) + cycles;
6984 }
6985 return COSTS_N_INSNS (1) + 16;
6986
6987 case SET:
6988 return (COSTS_N_INSNS (1)
6989 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6990 + GET_CODE (SET_DEST (x)) == MEM));
6991
6992 case CONST_INT:
6993 if (outer == SET)
6994 {
6995 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6996 return 0;
6997 if (thumb_shiftable_const (INTVAL (x)))
6998 return COSTS_N_INSNS (2);
6999 return COSTS_N_INSNS (3);
7000 }
7001 else if ((outer == PLUS || outer == COMPARE)
7002 && INTVAL (x) < 256 && INTVAL (x) > -256)
7003 return 0;
7004 else if ((outer == IOR || outer == XOR || outer == AND)
7005 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7006 return COSTS_N_INSNS (1);
7007 else if (outer == AND)
7008 {
7009 int i;
7010 /* This duplicates the tests in the andsi3 expander. */
7011 for (i = 9; i <= 31; i++)
7012 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7013 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7014 return COSTS_N_INSNS (2);
7015 }
7016 else if (outer == ASHIFT || outer == ASHIFTRT
7017 || outer == LSHIFTRT)
7018 return 0;
7019 return COSTS_N_INSNS (2);
7020
7021 case CONST:
7022 case CONST_DOUBLE:
7023 case LABEL_REF:
7024 case SYMBOL_REF:
7025 return COSTS_N_INSNS (3);
7026
7027 case UDIV:
7028 case UMOD:
7029 case DIV:
7030 case MOD:
7031 return 100;
7032
7033 case TRUNCATE:
7034 return 99;
7035
7036 case AND:
7037 case XOR:
7038 case IOR:
7039 /* XXX guess. */
7040 return 8;
7041
7042 case MEM:
7043 /* XXX another guess. */
7044 /* Memory costs quite a lot for the first word, but subsequent words
7045 load at the equivalent of a single insn each. */
7046 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7047 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7048 ? 4 : 0));
7049
7050 case IF_THEN_ELSE:
7051 /* XXX a guess. */
7052 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7053 return 14;
7054 return 2;
7055
7056 case SIGN_EXTEND:
7057 case ZERO_EXTEND:
7058 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7059 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7060
7061 if (mode == SImode)
7062 return total;
7063
7064 if (arm_arch6)
7065 return total + COSTS_N_INSNS (1);
7066
7067 /* Assume a two-shift sequence. Increase the cost slightly so
7068 we prefer actual shifts over an extend operation. */
7069 return total + 1 + COSTS_N_INSNS (2);
7070
7071 default:
7072 return 99;
7073 }
7074 }
7075
7076 static inline bool
7077 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7078 {
7079 enum machine_mode mode = GET_MODE (x);
7080 enum rtx_code subcode;
7081 rtx operand;
7082 enum rtx_code code = GET_CODE (x);
7083 *total = 0;
7084
7085 switch (code)
7086 {
7087 case MEM:
7088 /* Memory costs quite a lot for the first word, but subsequent words
7089 load at the equivalent of a single insn each. */
7090 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7091 return true;
7092
7093 case DIV:
7094 case MOD:
7095 case UDIV:
7096 case UMOD:
7097 if (TARGET_HARD_FLOAT && mode == SFmode)
7098 *total = COSTS_N_INSNS (2);
7099 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7100 *total = COSTS_N_INSNS (4);
7101 else
7102 *total = COSTS_N_INSNS (20);
7103 return false;
7104
7105 case ROTATE:
7106 if (GET_CODE (XEXP (x, 1)) == REG)
7107 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
7108 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7109 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
7110
7111 /* Fall through */
7112 case ROTATERT:
7113 if (mode != SImode)
7114 {
7115 *total += COSTS_N_INSNS (4);
7116 return true;
7117 }
7118
7119 /* Fall through */
7120 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
7121 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7122 if (mode == DImode)
7123 {
7124 *total += COSTS_N_INSNS (3);
7125 return true;
7126 }
7127
7128 *total += COSTS_N_INSNS (1);
7129 /* Increase the cost of complex shifts because they aren't any faster,
7130 and reduce dual issue opportunities. */
7131 if (arm_tune_cortex_a9
7132 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
7133 ++*total;
7134
7135 return true;
7136
7137 case MINUS:
7138 if (mode == DImode)
7139 {
7140 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7141 if (GET_CODE (XEXP (x, 0)) == CONST_INT
7142 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7143 {
7144 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7145 return true;
7146 }
7147
7148 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7149 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
7150 {
7151 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7152 return true;
7153 }
7154
7155 return false;
7156 }
7157
7158 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7159 {
7160 if (TARGET_HARD_FLOAT
7161 && (mode == SFmode
7162 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7163 {
7164 *total = COSTS_N_INSNS (1);
7165 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
7166 && arm_const_double_rtx (XEXP (x, 0)))
7167 {
7168 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7169 return true;
7170 }
7171
7172 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7173 && arm_const_double_rtx (XEXP (x, 1)))
7174 {
7175 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7176 return true;
7177 }
7178
7179 return false;
7180 }
7181 *total = COSTS_N_INSNS (20);
7182 return false;
7183 }
7184
7185 *total = COSTS_N_INSNS (1);
7186 if (GET_CODE (XEXP (x, 0)) == CONST_INT
7187 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7188 {
7189 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7190 return true;
7191 }
7192
7193 subcode = GET_CODE (XEXP (x, 1));
7194 if (subcode == ASHIFT || subcode == ASHIFTRT
7195 || subcode == LSHIFTRT
7196 || subcode == ROTATE || subcode == ROTATERT)
7197 {
7198 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7199 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7200 return true;
7201 }
7202
7203 /* A shift as a part of RSB costs no more than RSB itself. */
7204 if (GET_CODE (XEXP (x, 0)) == MULT
7205 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7206 {
7207 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
7208 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7209 return true;
7210 }
7211
7212 if (subcode == MULT
7213 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
7214 {
7215 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7216 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7217 return true;
7218 }
7219
7220 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
7221 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
7222 {
7223 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7224 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
7225 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7226 *total += COSTS_N_INSNS (1);
7227
7228 return true;
7229 }
7230
7231 /* Fall through */
7232
7233 case PLUS:
7234 if (code == PLUS && arm_arch6 && mode == SImode
7235 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7236 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7237 {
7238 *total = COSTS_N_INSNS (1);
7239 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7240 0, speed);
7241 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7242 return true;
7243 }
7244
7245 /* MLA: All arguments must be registers. We filter out
7246 multiplication by a power of two, so that we fall down into
7247 the code below. */
7248 if (GET_CODE (XEXP (x, 0)) == MULT
7249 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7250 {
7251 /* The cost comes from the cost of the multiply. */
7252 return false;
7253 }
7254
7255 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7256 {
7257 if (TARGET_HARD_FLOAT
7258 && (mode == SFmode
7259 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7260 {
7261 *total = COSTS_N_INSNS (1);
7262 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7263 && arm_const_double_rtx (XEXP (x, 1)))
7264 {
7265 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7266 return true;
7267 }
7268
7269 return false;
7270 }
7271
7272 *total = COSTS_N_INSNS (20);
7273 return false;
7274 }
7275
7276 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7277 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7278 {
7279 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
7280 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7281 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7282 *total += COSTS_N_INSNS (1);
7283 return true;
7284 }
7285
7286 /* Fall through */
7287
7288 case AND: case XOR: case IOR:
7289
7290 /* Normally the frame registers will be spilt into reg+const during
7291 reload, so it is a bad idea to combine them with other instructions,
7292 since then they might not be moved outside of loops. As a compromise
7293 we allow integration with ops that have a constant as their second
7294 operand. */
7295 if (REG_OR_SUBREG_REG (XEXP (x, 0))
7296 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7297 && GET_CODE (XEXP (x, 1)) != CONST_INT)
7298 *total = COSTS_N_INSNS (1);
7299
7300 if (mode == DImode)
7301 {
7302 *total += COSTS_N_INSNS (2);
7303 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7304 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7305 {
7306 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7307 return true;
7308 }
7309
7310 return false;
7311 }
7312
7313 *total += COSTS_N_INSNS (1);
7314 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7315 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7316 {
7317 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7318 return true;
7319 }
7320 subcode = GET_CODE (XEXP (x, 0));
7321 if (subcode == ASHIFT || subcode == ASHIFTRT
7322 || subcode == LSHIFTRT
7323 || subcode == ROTATE || subcode == ROTATERT)
7324 {
7325 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7326 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7327 return true;
7328 }
7329
7330 if (subcode == MULT
7331 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7332 {
7333 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7334 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7335 return true;
7336 }
7337
7338 if (subcode == UMIN || subcode == UMAX
7339 || subcode == SMIN || subcode == SMAX)
7340 {
7341 *total = COSTS_N_INSNS (3);
7342 return true;
7343 }
7344
7345 return false;
7346
7347 case MULT:
7348 /* This should have been handled by the CPU specific routines. */
7349 gcc_unreachable ();
7350
7351 case TRUNCATE:
7352 if (arm_arch3m && mode == SImode
7353 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7354 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7355 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7356 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7357 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7358 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7359 {
7360 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
7361 return true;
7362 }
7363 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7364 return false;
7365
7366 case NEG:
7367 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7368 {
7369 if (TARGET_HARD_FLOAT
7370 && (mode == SFmode
7371 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7372 {
7373 *total = COSTS_N_INSNS (1);
7374 return false;
7375 }
7376 *total = COSTS_N_INSNS (2);
7377 return false;
7378 }
7379
7380 /* Fall through */
7381 case NOT:
7382 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7383 if (mode == SImode && code == NOT)
7384 {
7385 subcode = GET_CODE (XEXP (x, 0));
7386 if (subcode == ASHIFT || subcode == ASHIFTRT
7387 || subcode == LSHIFTRT
7388 || subcode == ROTATE || subcode == ROTATERT
7389 || (subcode == MULT
7390 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7391 {
7392 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7393 /* Register shifts cost an extra cycle. */
7394 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
7395 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7396 subcode, 1, speed);
7397 return true;
7398 }
7399 }
7400
7401 return false;
7402
7403 case IF_THEN_ELSE:
7404 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7405 {
7406 *total = COSTS_N_INSNS (4);
7407 return true;
7408 }
7409
7410 operand = XEXP (x, 0);
7411
7412 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7413 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7414 && GET_CODE (XEXP (operand, 0)) == REG
7415 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7416 *total += COSTS_N_INSNS (1);
7417 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
7418 + rtx_cost (XEXP (x, 2), code, 2, speed));
7419 return true;
7420
7421 case NE:
7422 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7423 {
7424 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7425 return true;
7426 }
7427 goto scc_insn;
7428
7429 case GE:
7430 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7431 && mode == SImode && XEXP (x, 1) == const0_rtx)
7432 {
7433 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7434 return true;
7435 }
7436 goto scc_insn;
7437
7438 case LT:
7439 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7440 && mode == SImode && XEXP (x, 1) == const0_rtx)
7441 {
7442 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7443 return true;
7444 }
7445 goto scc_insn;
7446
7447 case EQ:
7448 case GT:
7449 case LE:
7450 case GEU:
7451 case LTU:
7452 case GTU:
7453 case LEU:
7454 case UNORDERED:
7455 case ORDERED:
7456 case UNEQ:
7457 case UNGE:
7458 case UNLT:
7459 case UNGT:
7460 case UNLE:
7461 scc_insn:
7462 /* SCC insns. In the case where the comparison has already been
7463 performed, then they cost 2 instructions. Otherwise they need
7464 an additional comparison before them. */
7465 *total = COSTS_N_INSNS (2);
7466 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7467 {
7468 return true;
7469 }
7470
7471 /* Fall through */
7472 case COMPARE:
7473 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7474 {
7475 *total = 0;
7476 return true;
7477 }
7478
7479 *total += COSTS_N_INSNS (1);
7480 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7481 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7482 {
7483 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7484 return true;
7485 }
7486
7487 subcode = GET_CODE (XEXP (x, 0));
7488 if (subcode == ASHIFT || subcode == ASHIFTRT
7489 || subcode == LSHIFTRT
7490 || subcode == ROTATE || subcode == ROTATERT)
7491 {
7492 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7493 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7494 return true;
7495 }
7496
7497 if (subcode == MULT
7498 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7499 {
7500 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7501 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7502 return true;
7503 }
7504
7505 return false;
7506
7507 case UMIN:
7508 case UMAX:
7509 case SMIN:
7510 case SMAX:
7511 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7512 if (GET_CODE (XEXP (x, 1)) != CONST_INT
7513 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7514 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7515 return true;
7516
7517 case ABS:
7518 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7519 {
7520 if (TARGET_HARD_FLOAT
7521 && (mode == SFmode
7522 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7523 {
7524 *total = COSTS_N_INSNS (1);
7525 return false;
7526 }
7527 *total = COSTS_N_INSNS (20);
7528 return false;
7529 }
7530 *total = COSTS_N_INSNS (1);
7531 if (mode == DImode)
7532 *total += COSTS_N_INSNS (3);
7533 return false;
7534
7535 case SIGN_EXTEND:
7536 case ZERO_EXTEND:
7537 *total = 0;
7538 if (GET_MODE_CLASS (mode) == MODE_INT)
7539 {
7540 rtx op = XEXP (x, 0);
7541 enum machine_mode opmode = GET_MODE (op);
7542
7543 if (mode == DImode)
7544 *total += COSTS_N_INSNS (1);
7545
7546 if (opmode != SImode)
7547 {
7548 if (MEM_P (op))
7549 {
7550 /* If !arm_arch4, we use one of the extendhisi2_mem
7551 or movhi_bytes patterns for HImode. For a QImode
7552 sign extension, we first zero-extend from memory
7553 and then perform a shift sequence. */
7554 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7555 *total += COSTS_N_INSNS (2);
7556 }
7557 else if (arm_arch6)
7558 *total += COSTS_N_INSNS (1);
7559
7560 /* We don't have the necessary insn, so we need to perform some
7561 other operation. */
7562 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7563 /* An and with constant 255. */
7564 *total += COSTS_N_INSNS (1);
7565 else
7566 /* A shift sequence. Increase costs slightly to avoid
7567 combining two shifts into an extend operation. */
7568 *total += COSTS_N_INSNS (2) + 1;
7569 }
7570
7571 return false;
7572 }
7573
7574 switch (GET_MODE (XEXP (x, 0)))
7575 {
7576 case V8QImode:
7577 case V4HImode:
7578 case V2SImode:
7579 case V4QImode:
7580 case V2HImode:
7581 *total = COSTS_N_INSNS (1);
7582 return false;
7583
7584 default:
7585 gcc_unreachable ();
7586 }
7587 gcc_unreachable ();
7588
7589 case ZERO_EXTRACT:
7590 case SIGN_EXTRACT:
7591 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7592 return true;
7593
7594 case CONST_INT:
7595 if (const_ok_for_arm (INTVAL (x))
7596 || const_ok_for_arm (~INTVAL (x)))
7597 *total = COSTS_N_INSNS (1);
7598 else
7599 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7600 INTVAL (x), NULL_RTX,
7601 NULL_RTX, 0, 0));
7602 return true;
7603
7604 case CONST:
7605 case LABEL_REF:
7606 case SYMBOL_REF:
7607 *total = COSTS_N_INSNS (3);
7608 return true;
7609
7610 case HIGH:
7611 *total = COSTS_N_INSNS (1);
7612 return true;
7613
7614 case LO_SUM:
7615 *total = COSTS_N_INSNS (1);
7616 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7617 return true;
7618
7619 case CONST_DOUBLE:
7620 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7621 && (mode == SFmode || !TARGET_VFP_SINGLE))
7622 *total = COSTS_N_INSNS (1);
7623 else
7624 *total = COSTS_N_INSNS (4);
7625 return true;
7626
7627 case SET:
7628 return false;
7629
7630 case UNSPEC:
7631 /* We cost this as high as our memory costs to allow this to
7632 be hoisted from loops. */
7633 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
7634 {
7635 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7636 }
7637 return true;
7638
7639 case CONST_VECTOR:
7640 if (TARGET_NEON
7641 && TARGET_HARD_FLOAT
7642 && outer == SET
7643 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
7644 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
7645 *total = COSTS_N_INSNS (1);
7646 else
7647 *total = COSTS_N_INSNS (4);
7648 return true;
7649
7650 default:
7651 *total = COSTS_N_INSNS (4);
7652 return false;
7653 }
7654 }
7655
7656 /* Estimates the size cost of thumb1 instructions.
7657 For now most of the code is copied from thumb1_rtx_costs. We need more
7658 fine grain tuning when we have more related test cases. */
7659 static inline int
7660 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7661 {
7662 enum machine_mode mode = GET_MODE (x);
7663
7664 switch (code)
7665 {
7666 case ASHIFT:
7667 case ASHIFTRT:
7668 case LSHIFTRT:
7669 case ROTATERT:
7670 case PLUS:
7671 case MINUS:
7672 case COMPARE:
7673 case NEG:
7674 case NOT:
7675 return COSTS_N_INSNS (1);
7676
7677 case MULT:
7678 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7679 {
7680 /* Thumb1 mul instruction can't operate on const. We must Load it
7681 into a register first. */
7682 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7683 return COSTS_N_INSNS (1) + const_size;
7684 }
7685 return COSTS_N_INSNS (1);
7686
7687 case SET:
7688 return (COSTS_N_INSNS (1)
7689 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7690 + GET_CODE (SET_DEST (x)) == MEM));
7691
7692 case CONST_INT:
7693 if (outer == SET)
7694 {
7695 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7696 return COSTS_N_INSNS (1);
7697 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7698 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7699 return COSTS_N_INSNS (2);
7700 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7701 if (thumb_shiftable_const (INTVAL (x)))
7702 return COSTS_N_INSNS (2);
7703 return COSTS_N_INSNS (3);
7704 }
7705 else if ((outer == PLUS || outer == COMPARE)
7706 && INTVAL (x) < 256 && INTVAL (x) > -256)
7707 return 0;
7708 else if ((outer == IOR || outer == XOR || outer == AND)
7709 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7710 return COSTS_N_INSNS (1);
7711 else if (outer == AND)
7712 {
7713 int i;
7714 /* This duplicates the tests in the andsi3 expander. */
7715 for (i = 9; i <= 31; i++)
7716 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7717 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7718 return COSTS_N_INSNS (2);
7719 }
7720 else if (outer == ASHIFT || outer == ASHIFTRT
7721 || outer == LSHIFTRT)
7722 return 0;
7723 return COSTS_N_INSNS (2);
7724
7725 case CONST:
7726 case CONST_DOUBLE:
7727 case LABEL_REF:
7728 case SYMBOL_REF:
7729 return COSTS_N_INSNS (3);
7730
7731 case UDIV:
7732 case UMOD:
7733 case DIV:
7734 case MOD:
7735 return 100;
7736
7737 case TRUNCATE:
7738 return 99;
7739
7740 case AND:
7741 case XOR:
7742 case IOR:
7743 /* XXX guess. */
7744 return 8;
7745
7746 case MEM:
7747 /* XXX another guess. */
7748 /* Memory costs quite a lot for the first word, but subsequent words
7749 load at the equivalent of a single insn each. */
7750 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7751 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7752 ? 4 : 0));
7753
7754 case IF_THEN_ELSE:
7755 /* XXX a guess. */
7756 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7757 return 14;
7758 return 2;
7759
7760 case ZERO_EXTEND:
7761 /* XXX still guessing. */
7762 switch (GET_MODE (XEXP (x, 0)))
7763 {
7764 case QImode:
7765 return (1 + (mode == DImode ? 4 : 0)
7766 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7767
7768 case HImode:
7769 return (4 + (mode == DImode ? 4 : 0)
7770 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7771
7772 case SImode:
7773 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7774
7775 default:
7776 return 99;
7777 }
7778
7779 default:
7780 return 99;
7781 }
7782 }
7783
7784 /* RTX costs when optimizing for size. */
7785 static bool
7786 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7787 int *total)
7788 {
7789 enum machine_mode mode = GET_MODE (x);
7790 if (TARGET_THUMB1)
7791 {
7792 *total = thumb1_size_rtx_costs (x, code, outer_code);
7793 return true;
7794 }
7795
7796 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7797 switch (code)
7798 {
7799 case MEM:
7800 /* A memory access costs 1 insn if the mode is small, or the address is
7801 a single register, otherwise it costs one insn per word. */
7802 if (REG_P (XEXP (x, 0)))
7803 *total = COSTS_N_INSNS (1);
7804 else if (flag_pic
7805 && GET_CODE (XEXP (x, 0)) == PLUS
7806 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7807 /* This will be split into two instructions.
7808 See arm.md:calculate_pic_address. */
7809 *total = COSTS_N_INSNS (2);
7810 else
7811 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7812 return true;
7813
7814 case DIV:
7815 case MOD:
7816 case UDIV:
7817 case UMOD:
7818 /* Needs a libcall, so it costs about this. */
7819 *total = COSTS_N_INSNS (2);
7820 return false;
7821
7822 case ROTATE:
7823 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7824 {
7825 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
7826 return true;
7827 }
7828 /* Fall through */
7829 case ROTATERT:
7830 case ASHIFT:
7831 case LSHIFTRT:
7832 case ASHIFTRT:
7833 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7834 {
7835 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
7836 return true;
7837 }
7838 else if (mode == SImode)
7839 {
7840 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
7841 /* Slightly disparage register shifts, but not by much. */
7842 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7843 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
7844 return true;
7845 }
7846
7847 /* Needs a libcall. */
7848 *total = COSTS_N_INSNS (2);
7849 return false;
7850
7851 case MINUS:
7852 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7853 && (mode == SFmode || !TARGET_VFP_SINGLE))
7854 {
7855 *total = COSTS_N_INSNS (1);
7856 return false;
7857 }
7858
7859 if (mode == SImode)
7860 {
7861 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7862 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7863
7864 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7865 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7866 || subcode1 == ROTATE || subcode1 == ROTATERT
7867 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7868 || subcode1 == ASHIFTRT)
7869 {
7870 /* It's just the cost of the two operands. */
7871 *total = 0;
7872 return false;
7873 }
7874
7875 *total = COSTS_N_INSNS (1);
7876 return false;
7877 }
7878
7879 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7880 return false;
7881
7882 case PLUS:
7883 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7884 && (mode == SFmode || !TARGET_VFP_SINGLE))
7885 {
7886 *total = COSTS_N_INSNS (1);
7887 return false;
7888 }
7889
7890 /* A shift as a part of ADD costs nothing. */
7891 if (GET_CODE (XEXP (x, 0)) == MULT
7892 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7893 {
7894 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7895 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
7896 *total += rtx_cost (XEXP (x, 1), code, 1, false);
7897 return true;
7898 }
7899
7900 /* Fall through */
7901 case AND: case XOR: case IOR:
7902 if (mode == SImode)
7903 {
7904 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7905
7906 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7907 || subcode == LSHIFTRT || subcode == ASHIFTRT
7908 || (code == AND && subcode == NOT))
7909 {
7910 /* It's just the cost of the two operands. */
7911 *total = 0;
7912 return false;
7913 }
7914 }
7915
7916 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7917 return false;
7918
7919 case MULT:
7920 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7921 return false;
7922
7923 case NEG:
7924 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7925 && (mode == SFmode || !TARGET_VFP_SINGLE))
7926 {
7927 *total = COSTS_N_INSNS (1);
7928 return false;
7929 }
7930
7931 /* Fall through */
7932 case NOT:
7933 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7934
7935 return false;
7936
7937 case IF_THEN_ELSE:
7938 *total = 0;
7939 return false;
7940
7941 case COMPARE:
7942 if (cc_register (XEXP (x, 0), VOIDmode))
7943 * total = 0;
7944 else
7945 *total = COSTS_N_INSNS (1);
7946 return false;
7947
7948 case ABS:
7949 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7950 && (mode == SFmode || !TARGET_VFP_SINGLE))
7951 *total = COSTS_N_INSNS (1);
7952 else
7953 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7954 return false;
7955
7956 case SIGN_EXTEND:
7957 case ZERO_EXTEND:
7958 return arm_rtx_costs_1 (x, outer_code, total, 0);
7959
7960 case CONST_INT:
7961 if (const_ok_for_arm (INTVAL (x)))
7962 /* A multiplication by a constant requires another instruction
7963 to load the constant to a register. */
7964 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7965 ? 1 : 0);
7966 else if (const_ok_for_arm (~INTVAL (x)))
7967 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7968 else if (const_ok_for_arm (-INTVAL (x)))
7969 {
7970 if (outer_code == COMPARE || outer_code == PLUS
7971 || outer_code == MINUS)
7972 *total = 0;
7973 else
7974 *total = COSTS_N_INSNS (1);
7975 }
7976 else
7977 *total = COSTS_N_INSNS (2);
7978 return true;
7979
7980 case CONST:
7981 case LABEL_REF:
7982 case SYMBOL_REF:
7983 *total = COSTS_N_INSNS (2);
7984 return true;
7985
7986 case CONST_DOUBLE:
7987 *total = COSTS_N_INSNS (4);
7988 return true;
7989
7990 case CONST_VECTOR:
7991 if (TARGET_NEON
7992 && TARGET_HARD_FLOAT
7993 && outer_code == SET
7994 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
7995 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
7996 *total = COSTS_N_INSNS (1);
7997 else
7998 *total = COSTS_N_INSNS (4);
7999 return true;
8000
8001 case HIGH:
8002 case LO_SUM:
8003 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8004 cost of these slightly. */
8005 *total = COSTS_N_INSNS (1) + 1;
8006 return true;
8007
8008 case SET:
8009 return false;
8010
8011 default:
8012 if (mode != VOIDmode)
8013 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8014 else
8015 *total = COSTS_N_INSNS (4); /* How knows? */
8016 return false;
8017 }
8018 }
8019
8020 /* RTX costs when optimizing for size. */
8021 static bool
8022 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
8023 int *total, bool speed)
8024 {
8025 if (!speed)
8026 return arm_size_rtx_costs (x, (enum rtx_code) code,
8027 (enum rtx_code) outer_code, total);
8028 else
8029 return current_tune->rtx_costs (x, (enum rtx_code) code,
8030 (enum rtx_code) outer_code,
8031 total, speed);
8032 }
8033
8034 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
8035 supported on any "slowmul" cores, so it can be ignored. */
8036
8037 static bool
8038 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8039 int *total, bool speed)
8040 {
8041 enum machine_mode mode = GET_MODE (x);
8042
8043 if (TARGET_THUMB)
8044 {
8045 *total = thumb1_rtx_costs (x, code, outer_code);
8046 return true;
8047 }
8048
8049 switch (code)
8050 {
8051 case MULT:
8052 if (GET_MODE_CLASS (mode) == MODE_FLOAT
8053 || mode == DImode)
8054 {
8055 *total = COSTS_N_INSNS (20);
8056 return false;
8057 }
8058
8059 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8060 {
8061 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8062 & (unsigned HOST_WIDE_INT) 0xffffffff);
8063 int cost, const_ok = const_ok_for_arm (i);
8064 int j, booth_unit_size;
8065
8066 /* Tune as appropriate. */
8067 cost = const_ok ? 4 : 8;
8068 booth_unit_size = 2;
8069 for (j = 0; i && j < 32; j += booth_unit_size)
8070 {
8071 i >>= booth_unit_size;
8072 cost++;
8073 }
8074
8075 *total = COSTS_N_INSNS (cost);
8076 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8077 return true;
8078 }
8079
8080 *total = COSTS_N_INSNS (20);
8081 return false;
8082
8083 default:
8084 return arm_rtx_costs_1 (x, outer_code, total, speed);;
8085 }
8086 }
8087
8088
8089 /* RTX cost for cores with a fast multiply unit (M variants). */
8090
8091 static bool
8092 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8093 int *total, bool speed)
8094 {
8095 enum machine_mode mode = GET_MODE (x);
8096
8097 if (TARGET_THUMB1)
8098 {
8099 *total = thumb1_rtx_costs (x, code, outer_code);
8100 return true;
8101 }
8102
8103 /* ??? should thumb2 use different costs? */
8104 switch (code)
8105 {
8106 case MULT:
8107 /* There is no point basing this on the tuning, since it is always the
8108 fast variant if it exists at all. */
8109 if (mode == DImode
8110 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8111 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8112 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8113 {
8114 *total = COSTS_N_INSNS(2);
8115 return false;
8116 }
8117
8118
8119 if (mode == DImode)
8120 {
8121 *total = COSTS_N_INSNS (5);
8122 return false;
8123 }
8124
8125 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8126 {
8127 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8128 & (unsigned HOST_WIDE_INT) 0xffffffff);
8129 int cost, const_ok = const_ok_for_arm (i);
8130 int j, booth_unit_size;
8131
8132 /* Tune as appropriate. */
8133 cost = const_ok ? 4 : 8;
8134 booth_unit_size = 8;
8135 for (j = 0; i && j < 32; j += booth_unit_size)
8136 {
8137 i >>= booth_unit_size;
8138 cost++;
8139 }
8140
8141 *total = COSTS_N_INSNS(cost);
8142 return false;
8143 }
8144
8145 if (mode == SImode)
8146 {
8147 *total = COSTS_N_INSNS (4);
8148 return false;
8149 }
8150
8151 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8152 {
8153 if (TARGET_HARD_FLOAT
8154 && (mode == SFmode
8155 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8156 {
8157 *total = COSTS_N_INSNS (1);
8158 return false;
8159 }
8160 }
8161
8162 /* Requires a lib call */
8163 *total = COSTS_N_INSNS (20);
8164 return false;
8165
8166 default:
8167 return arm_rtx_costs_1 (x, outer_code, total, speed);
8168 }
8169 }
8170
8171
8172 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
8173 so it can be ignored. */
8174
8175 static bool
8176 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8177 int *total, bool speed)
8178 {
8179 enum machine_mode mode = GET_MODE (x);
8180
8181 if (TARGET_THUMB)
8182 {
8183 *total = thumb1_rtx_costs (x, code, outer_code);
8184 return true;
8185 }
8186
8187 switch (code)
8188 {
8189 case COMPARE:
8190 if (GET_CODE (XEXP (x, 0)) != MULT)
8191 return arm_rtx_costs_1 (x, outer_code, total, speed);
8192
8193 /* A COMPARE of a MULT is slow on XScale; the muls instruction
8194 will stall until the multiplication is complete. */
8195 *total = COSTS_N_INSNS (3);
8196 return false;
8197
8198 case MULT:
8199 /* There is no point basing this on the tuning, since it is always the
8200 fast variant if it exists at all. */
8201 if (mode == DImode
8202 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8203 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8204 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8205 {
8206 *total = COSTS_N_INSNS (2);
8207 return false;
8208 }
8209
8210
8211 if (mode == DImode)
8212 {
8213 *total = COSTS_N_INSNS (5);
8214 return false;
8215 }
8216
8217 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8218 {
8219 /* If operand 1 is a constant we can more accurately
8220 calculate the cost of the multiply. The multiplier can
8221 retire 15 bits on the first cycle and a further 12 on the
8222 second. We do, of course, have to load the constant into
8223 a register first. */
8224 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8225 /* There's a general overhead of one cycle. */
8226 int cost = 1;
8227 unsigned HOST_WIDE_INT masked_const;
8228
8229 if (i & 0x80000000)
8230 i = ~i;
8231
8232 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
8233
8234 masked_const = i & 0xffff8000;
8235 if (masked_const != 0)
8236 {
8237 cost++;
8238 masked_const = i & 0xf8000000;
8239 if (masked_const != 0)
8240 cost++;
8241 }
8242 *total = COSTS_N_INSNS (cost);
8243 return false;
8244 }
8245
8246 if (mode == SImode)
8247 {
8248 *total = COSTS_N_INSNS (3);
8249 return false;
8250 }
8251
8252 /* Requires a lib call */
8253 *total = COSTS_N_INSNS (20);
8254 return false;
8255
8256 default:
8257 return arm_rtx_costs_1 (x, outer_code, total, speed);
8258 }
8259 }
8260
8261
8262 /* RTX costs for 9e (and later) cores. */
8263
8264 static bool
8265 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8266 int *total, bool speed)
8267 {
8268 enum machine_mode mode = GET_MODE (x);
8269
8270 if (TARGET_THUMB1)
8271 {
8272 switch (code)
8273 {
8274 case MULT:
8275 *total = COSTS_N_INSNS (3);
8276 return true;
8277
8278 default:
8279 *total = thumb1_rtx_costs (x, code, outer_code);
8280 return true;
8281 }
8282 }
8283
8284 switch (code)
8285 {
8286 case MULT:
8287 /* There is no point basing this on the tuning, since it is always the
8288 fast variant if it exists at all. */
8289 if (mode == DImode
8290 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8291 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8292 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8293 {
8294 *total = COSTS_N_INSNS (2);
8295 return false;
8296 }
8297
8298
8299 if (mode == DImode)
8300 {
8301 *total = COSTS_N_INSNS (5);
8302 return false;
8303 }
8304
8305 if (mode == SImode)
8306 {
8307 *total = COSTS_N_INSNS (2);
8308 return false;
8309 }
8310
8311 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8312 {
8313 if (TARGET_HARD_FLOAT
8314 && (mode == SFmode
8315 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8316 {
8317 *total = COSTS_N_INSNS (1);
8318 return false;
8319 }
8320 }
8321
8322 *total = COSTS_N_INSNS (20);
8323 return false;
8324
8325 default:
8326 return arm_rtx_costs_1 (x, outer_code, total, speed);
8327 }
8328 }
8329 /* All address computations that can be done are free, but rtx cost returns
8330 the same for practically all of them. So we weight the different types
8331 of address here in the order (most pref first):
8332 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8333 static inline int
8334 arm_arm_address_cost (rtx x)
8335 {
8336 enum rtx_code c = GET_CODE (x);
8337
8338 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
8339 return 0;
8340 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
8341 return 10;
8342
8343 if (c == PLUS)
8344 {
8345 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8346 return 2;
8347
8348 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
8349 return 3;
8350
8351 return 4;
8352 }
8353
8354 return 6;
8355 }
8356
8357 static inline int
8358 arm_thumb_address_cost (rtx x)
8359 {
8360 enum rtx_code c = GET_CODE (x);
8361
8362 if (c == REG)
8363 return 1;
8364 if (c == PLUS
8365 && GET_CODE (XEXP (x, 0)) == REG
8366 && GET_CODE (XEXP (x, 1)) == CONST_INT)
8367 return 1;
8368
8369 return 2;
8370 }
8371
8372 static int
8373 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8374 {
8375 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
8376 }
8377
8378 /* Adjust cost hook for XScale. */
8379 static bool
8380 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8381 {
8382 /* Some true dependencies can have a higher cost depending
8383 on precisely how certain input operands are used. */
8384 if (REG_NOTE_KIND(link) == 0
8385 && recog_memoized (insn) >= 0
8386 && recog_memoized (dep) >= 0)
8387 {
8388 int shift_opnum = get_attr_shift (insn);
8389 enum attr_type attr_type = get_attr_type (dep);
8390
8391 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8392 operand for INSN. If we have a shifted input operand and the
8393 instruction we depend on is another ALU instruction, then we may
8394 have to account for an additional stall. */
8395 if (shift_opnum != 0
8396 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
8397 {
8398 rtx shifted_operand;
8399 int opno;
8400
8401 /* Get the shifted operand. */
8402 extract_insn (insn);
8403 shifted_operand = recog_data.operand[shift_opnum];
8404
8405 /* Iterate over all the operands in DEP. If we write an operand
8406 that overlaps with SHIFTED_OPERAND, then we have increase the
8407 cost of this dependency. */
8408 extract_insn (dep);
8409 preprocess_constraints ();
8410 for (opno = 0; opno < recog_data.n_operands; opno++)
8411 {
8412 /* We can ignore strict inputs. */
8413 if (recog_data.operand_type[opno] == OP_IN)
8414 continue;
8415
8416 if (reg_overlap_mentioned_p (recog_data.operand[opno],
8417 shifted_operand))
8418 {
8419 *cost = 2;
8420 return false;
8421 }
8422 }
8423 }
8424 }
8425 return true;
8426 }
8427
8428 /* Adjust cost hook for Cortex A9. */
8429 static bool
8430 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8431 {
8432 switch (REG_NOTE_KIND (link))
8433 {
8434 case REG_DEP_ANTI:
8435 *cost = 0;
8436 return false;
8437
8438 case REG_DEP_TRUE:
8439 case REG_DEP_OUTPUT:
8440 if (recog_memoized (insn) >= 0
8441 && recog_memoized (dep) >= 0)
8442 {
8443 if (GET_CODE (PATTERN (insn)) == SET)
8444 {
8445 if (GET_MODE_CLASS
8446 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8447 || GET_MODE_CLASS
8448 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8449 {
8450 enum attr_type attr_type_insn = get_attr_type (insn);
8451 enum attr_type attr_type_dep = get_attr_type (dep);
8452
8453 /* By default all dependencies of the form
8454 s0 = s0 <op> s1
8455 s0 = s0 <op> s2
8456 have an extra latency of 1 cycle because
8457 of the input and output dependency in this
8458 case. However this gets modeled as an true
8459 dependency and hence all these checks. */
8460 if (REG_P (SET_DEST (PATTERN (insn)))
8461 && REG_P (SET_DEST (PATTERN (dep)))
8462 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8463 SET_DEST (PATTERN (dep))))
8464 {
8465 /* FMACS is a special case where the dependent
8466 instruction can be issued 3 cycles before
8467 the normal latency in case of an output
8468 dependency. */
8469 if ((attr_type_insn == TYPE_FMACS
8470 || attr_type_insn == TYPE_FMACD)
8471 && (attr_type_dep == TYPE_FMACS
8472 || attr_type_dep == TYPE_FMACD))
8473 {
8474 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8475 *cost = insn_default_latency (dep) - 3;
8476 else
8477 *cost = insn_default_latency (dep);
8478 return false;
8479 }
8480 else
8481 {
8482 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8483 *cost = insn_default_latency (dep) + 1;
8484 else
8485 *cost = insn_default_latency (dep);
8486 }
8487 return false;
8488 }
8489 }
8490 }
8491 }
8492 break;
8493
8494 default:
8495 gcc_unreachable ();
8496 }
8497
8498 return true;
8499 }
8500
8501 /* Adjust cost hook for FA726TE. */
8502 static bool
8503 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8504 {
8505 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8506 have penalty of 3. */
8507 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8508 && recog_memoized (insn) >= 0
8509 && recog_memoized (dep) >= 0
8510 && get_attr_conds (dep) == CONDS_SET)
8511 {
8512 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8513 if (get_attr_conds (insn) == CONDS_USE
8514 && get_attr_type (insn) != TYPE_BRANCH)
8515 {
8516 *cost = 3;
8517 return false;
8518 }
8519
8520 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8521 || get_attr_conds (insn) == CONDS_USE)
8522 {
8523 *cost = 0;
8524 return false;
8525 }
8526 }
8527
8528 return true;
8529 }
8530
8531 /* Implement TARGET_REGISTER_MOVE_COST.
8532
8533 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
8534 it is typically more expensive than a single memory access. We set
8535 the cost to less than two memory accesses so that floating
8536 point to integer conversion does not go through memory. */
8537
8538 int
8539 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
8540 reg_class_t from, reg_class_t to)
8541 {
8542 if (TARGET_32BIT)
8543 {
8544 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
8545 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
8546 return 15;
8547 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
8548 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
8549 return 4;
8550 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
8551 return 20;
8552 else
8553 return 2;
8554 }
8555 else
8556 {
8557 if (from == HI_REGS || to == HI_REGS)
8558 return 4;
8559 else
8560 return 2;
8561 }
8562 }
8563
8564 /* Implement TARGET_MEMORY_MOVE_COST. */
8565
8566 int
8567 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
8568 bool in ATTRIBUTE_UNUSED)
8569 {
8570 if (TARGET_32BIT)
8571 return 10;
8572 else
8573 {
8574 if (GET_MODE_SIZE (mode) < 4)
8575 return 8;
8576 else
8577 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
8578 }
8579 }
8580
8581 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8582 It corrects the value of COST based on the relationship between
8583 INSN and DEP through the dependence LINK. It returns the new
8584 value. There is a per-core adjust_cost hook to adjust scheduler costs
8585 and the per-core hook can choose to completely override the generic
8586 adjust_cost function. Only put bits of code into arm_adjust_cost that
8587 are common across all cores. */
8588 static int
8589 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8590 {
8591 rtx i_pat, d_pat;
8592
8593 /* When generating Thumb-1 code, we want to place flag-setting operations
8594 close to a conditional branch which depends on them, so that we can
8595 omit the comparison. */
8596 if (TARGET_THUMB1
8597 && REG_NOTE_KIND (link) == 0
8598 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8599 && recog_memoized (dep) >= 0
8600 && get_attr_conds (dep) == CONDS_SET)
8601 return 0;
8602
8603 if (current_tune->sched_adjust_cost != NULL)
8604 {
8605 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8606 return cost;
8607 }
8608
8609 /* XXX Is this strictly true? */
8610 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8611 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8612 return 0;
8613
8614 /* Call insns don't incur a stall, even if they follow a load. */
8615 if (REG_NOTE_KIND (link) == 0
8616 && GET_CODE (insn) == CALL_INSN)
8617 return 1;
8618
8619 if ((i_pat = single_set (insn)) != NULL
8620 && GET_CODE (SET_SRC (i_pat)) == MEM
8621 && (d_pat = single_set (dep)) != NULL
8622 && GET_CODE (SET_DEST (d_pat)) == MEM)
8623 {
8624 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8625 /* This is a load after a store, there is no conflict if the load reads
8626 from a cached area. Assume that loads from the stack, and from the
8627 constant pool are cached, and that others will miss. This is a
8628 hack. */
8629
8630 if ((GET_CODE (src_mem) == SYMBOL_REF
8631 && CONSTANT_POOL_ADDRESS_P (src_mem))
8632 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8633 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8634 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8635 return 1;
8636 }
8637
8638 return cost;
8639 }
8640
8641 static int
8642 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
8643 {
8644 if (TARGET_32BIT)
8645 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
8646 else
8647 return (optimize > 0) ? 2 : 0;
8648 }
8649
8650 static int
8651 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
8652 {
8653 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
8654 }
8655
8656 static bool fp_consts_inited = false;
8657
8658 static REAL_VALUE_TYPE value_fp0;
8659
8660 static void
8661 init_fp_table (void)
8662 {
8663 REAL_VALUE_TYPE r;
8664
8665 r = REAL_VALUE_ATOF ("0", DFmode);
8666 value_fp0 = r;
8667 fp_consts_inited = true;
8668 }
8669
8670 /* Return TRUE if rtx X is a valid immediate FP constant. */
8671 int
8672 arm_const_double_rtx (rtx x)
8673 {
8674 REAL_VALUE_TYPE r;
8675
8676 if (!fp_consts_inited)
8677 init_fp_table ();
8678
8679 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8680 if (REAL_VALUE_MINUS_ZERO (r))
8681 return 0;
8682
8683 if (REAL_VALUES_EQUAL (r, value_fp0))
8684 return 1;
8685
8686 return 0;
8687 }
8688
8689 /* VFPv3 has a fairly wide range of representable immediates, formed from
8690 "quarter-precision" floating-point values. These can be evaluated using this
8691 formula (with ^ for exponentiation):
8692
8693 -1^s * n * 2^-r
8694
8695 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8696 16 <= n <= 31 and 0 <= r <= 7.
8697
8698 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8699
8700 - A (most-significant) is the sign bit.
8701 - BCD are the exponent (encoded as r XOR 3).
8702 - EFGH are the mantissa (encoded as n - 16).
8703 */
8704
8705 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8706 fconst[sd] instruction, or -1 if X isn't suitable. */
8707 static int
8708 vfp3_const_double_index (rtx x)
8709 {
8710 REAL_VALUE_TYPE r, m;
8711 int sign, exponent;
8712 unsigned HOST_WIDE_INT mantissa, mant_hi;
8713 unsigned HOST_WIDE_INT mask;
8714 HOST_WIDE_INT m1, m2;
8715 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8716
8717 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8718 return -1;
8719
8720 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8721
8722 /* We can't represent these things, so detect them first. */
8723 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8724 return -1;
8725
8726 /* Extract sign, exponent and mantissa. */
8727 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8728 r = real_value_abs (&r);
8729 exponent = REAL_EXP (&r);
8730 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8731 highest (sign) bit, with a fixed binary point at bit point_pos.
8732 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8733 bits for the mantissa, this may fail (low bits would be lost). */
8734 real_ldexp (&m, &r, point_pos - exponent);
8735 REAL_VALUE_TO_INT (&m1, &m2, m);
8736 mantissa = m1;
8737 mant_hi = m2;
8738
8739 /* If there are bits set in the low part of the mantissa, we can't
8740 represent this value. */
8741 if (mantissa != 0)
8742 return -1;
8743
8744 /* Now make it so that mantissa contains the most-significant bits, and move
8745 the point_pos to indicate that the least-significant bits have been
8746 discarded. */
8747 point_pos -= HOST_BITS_PER_WIDE_INT;
8748 mantissa = mant_hi;
8749
8750 /* We can permit four significant bits of mantissa only, plus a high bit
8751 which is always 1. */
8752 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8753 if ((mantissa & mask) != 0)
8754 return -1;
8755
8756 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8757 mantissa >>= point_pos - 5;
8758
8759 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8760 floating-point immediate zero with Neon using an integer-zero load, but
8761 that case is handled elsewhere.) */
8762 if (mantissa == 0)
8763 return -1;
8764
8765 gcc_assert (mantissa >= 16 && mantissa <= 31);
8766
8767 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8768 normalized significands are in the range [1, 2). (Our mantissa is shifted
8769 left 4 places at this point relative to normalized IEEE754 values). GCC
8770 internally uses [0.5, 1) (see real.c), so the exponent returned from
8771 REAL_EXP must be altered. */
8772 exponent = 5 - exponent;
8773
8774 if (exponent < 0 || exponent > 7)
8775 return -1;
8776
8777 /* Sign, mantissa and exponent are now in the correct form to plug into the
8778 formula described in the comment above. */
8779 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8780 }
8781
8782 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8783 int
8784 vfp3_const_double_rtx (rtx x)
8785 {
8786 if (!TARGET_VFP3)
8787 return 0;
8788
8789 return vfp3_const_double_index (x) != -1;
8790 }
8791
8792 /* Recognize immediates which can be used in various Neon instructions. Legal
8793 immediates are described by the following table (for VMVN variants, the
8794 bitwise inverse of the constant shown is recognized. In either case, VMOV
8795 is output and the correct instruction to use for a given constant is chosen
8796 by the assembler). The constant shown is replicated across all elements of
8797 the destination vector.
8798
8799 insn elems variant constant (binary)
8800 ---- ----- ------- -----------------
8801 vmov i32 0 00000000 00000000 00000000 abcdefgh
8802 vmov i32 1 00000000 00000000 abcdefgh 00000000
8803 vmov i32 2 00000000 abcdefgh 00000000 00000000
8804 vmov i32 3 abcdefgh 00000000 00000000 00000000
8805 vmov i16 4 00000000 abcdefgh
8806 vmov i16 5 abcdefgh 00000000
8807 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8808 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8809 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8810 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8811 vmvn i16 10 00000000 abcdefgh
8812 vmvn i16 11 abcdefgh 00000000
8813 vmov i32 12 00000000 00000000 abcdefgh 11111111
8814 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8815 vmov i32 14 00000000 abcdefgh 11111111 11111111
8816 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8817 vmov i8 16 abcdefgh
8818 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8819 eeeeeeee ffffffff gggggggg hhhhhhhh
8820 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8821 vmov f32 19 00000000 00000000 00000000 00000000
8822
8823 For case 18, B = !b. Representable values are exactly those accepted by
8824 vfp3_const_double_index, but are output as floating-point numbers rather
8825 than indices.
8826
8827 For case 19, we will change it to vmov.i32 when assembling.
8828
8829 Variants 0-5 (inclusive) may also be used as immediates for the second
8830 operand of VORR/VBIC instructions.
8831
8832 The INVERSE argument causes the bitwise inverse of the given operand to be
8833 recognized instead (used for recognizing legal immediates for the VAND/VORN
8834 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8835 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8836 output, rather than the real insns vbic/vorr).
8837
8838 INVERSE makes no difference to the recognition of float vectors.
8839
8840 The return value is the variant of immediate as shown in the above table, or
8841 -1 if the given value doesn't match any of the listed patterns.
8842 */
8843 static int
8844 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8845 rtx *modconst, int *elementwidth)
8846 {
8847 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8848 matches = 1; \
8849 for (i = 0; i < idx; i += (STRIDE)) \
8850 if (!(TEST)) \
8851 matches = 0; \
8852 if (matches) \
8853 { \
8854 immtype = (CLASS); \
8855 elsize = (ELSIZE); \
8856 break; \
8857 }
8858
8859 unsigned int i, elsize = 0, idx = 0, n_elts;
8860 unsigned int innersize;
8861 unsigned char bytes[16];
8862 int immtype = -1, matches;
8863 unsigned int invmask = inverse ? 0xff : 0;
8864 bool vector = GET_CODE (op) == CONST_VECTOR;
8865
8866 if (vector)
8867 {
8868 n_elts = CONST_VECTOR_NUNITS (op);
8869 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8870 }
8871 else
8872 {
8873 n_elts = 1;
8874 if (mode == VOIDmode)
8875 mode = DImode;
8876 innersize = GET_MODE_SIZE (mode);
8877 }
8878
8879 /* Vectors of float constants. */
8880 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8881 {
8882 rtx el0 = CONST_VECTOR_ELT (op, 0);
8883 REAL_VALUE_TYPE r0;
8884
8885 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
8886 return -1;
8887
8888 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8889
8890 for (i = 1; i < n_elts; i++)
8891 {
8892 rtx elt = CONST_VECTOR_ELT (op, i);
8893 REAL_VALUE_TYPE re;
8894
8895 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8896
8897 if (!REAL_VALUES_EQUAL (r0, re))
8898 return -1;
8899 }
8900
8901 if (modconst)
8902 *modconst = CONST_VECTOR_ELT (op, 0);
8903
8904 if (elementwidth)
8905 *elementwidth = 0;
8906
8907 if (el0 == CONST0_RTX (GET_MODE (el0)))
8908 return 19;
8909 else
8910 return 18;
8911 }
8912
8913 /* Splat vector constant out into a byte vector. */
8914 for (i = 0; i < n_elts; i++)
8915 {
8916 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
8917 unsigned HOST_WIDE_INT elpart;
8918 unsigned int part, parts;
8919
8920 if (GET_CODE (el) == CONST_INT)
8921 {
8922 elpart = INTVAL (el);
8923 parts = 1;
8924 }
8925 else if (GET_CODE (el) == CONST_DOUBLE)
8926 {
8927 elpart = CONST_DOUBLE_LOW (el);
8928 parts = 2;
8929 }
8930 else
8931 gcc_unreachable ();
8932
8933 for (part = 0; part < parts; part++)
8934 {
8935 unsigned int byte;
8936 for (byte = 0; byte < innersize; byte++)
8937 {
8938 bytes[idx++] = (elpart & 0xff) ^ invmask;
8939 elpart >>= BITS_PER_UNIT;
8940 }
8941 if (GET_CODE (el) == CONST_DOUBLE)
8942 elpart = CONST_DOUBLE_HIGH (el);
8943 }
8944 }
8945
8946 /* Sanity check. */
8947 gcc_assert (idx == GET_MODE_SIZE (mode));
8948
8949 do
8950 {
8951 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8952 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8953
8954 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8955 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8956
8957 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8958 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8959
8960 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8961 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8962
8963 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8964
8965 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8966
8967 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8968 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8969
8970 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8971 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8972
8973 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8974 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8975
8976 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8977 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8978
8979 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8980
8981 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8982
8983 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8984 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8985
8986 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8987 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8988
8989 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8990 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8991
8992 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8993 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8994
8995 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8996
8997 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8998 && bytes[i] == bytes[(i + 8) % idx]);
8999 }
9000 while (0);
9001
9002 if (immtype == -1)
9003 return -1;
9004
9005 if (elementwidth)
9006 *elementwidth = elsize;
9007
9008 if (modconst)
9009 {
9010 unsigned HOST_WIDE_INT imm = 0;
9011
9012 /* Un-invert bytes of recognized vector, if necessary. */
9013 if (invmask != 0)
9014 for (i = 0; i < idx; i++)
9015 bytes[i] ^= invmask;
9016
9017 if (immtype == 17)
9018 {
9019 /* FIXME: Broken on 32-bit H_W_I hosts. */
9020 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
9021
9022 for (i = 0; i < 8; i++)
9023 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
9024 << (i * BITS_PER_UNIT);
9025
9026 *modconst = GEN_INT (imm);
9027 }
9028 else
9029 {
9030 unsigned HOST_WIDE_INT imm = 0;
9031
9032 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
9033 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
9034
9035 *modconst = GEN_INT (imm);
9036 }
9037 }
9038
9039 return immtype;
9040 #undef CHECK
9041 }
9042
9043 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
9044 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
9045 float elements), and a modified constant (whatever should be output for a
9046 VMOV) in *MODCONST. */
9047
9048 int
9049 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
9050 rtx *modconst, int *elementwidth)
9051 {
9052 rtx tmpconst;
9053 int tmpwidth;
9054 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
9055
9056 if (retval == -1)
9057 return 0;
9058
9059 if (modconst)
9060 *modconst = tmpconst;
9061
9062 if (elementwidth)
9063 *elementwidth = tmpwidth;
9064
9065 return 1;
9066 }
9067
9068 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
9069 the immediate is valid, write a constant suitable for using as an operand
9070 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
9071 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
9072
9073 int
9074 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
9075 rtx *modconst, int *elementwidth)
9076 {
9077 rtx tmpconst;
9078 int tmpwidth;
9079 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
9080
9081 if (retval < 0 || retval > 5)
9082 return 0;
9083
9084 if (modconst)
9085 *modconst = tmpconst;
9086
9087 if (elementwidth)
9088 *elementwidth = tmpwidth;
9089
9090 return 1;
9091 }
9092
9093 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
9094 the immediate is valid, write a constant suitable for using as an operand
9095 to VSHR/VSHL to *MODCONST and the corresponding element width to
9096 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
9097 because they have different limitations. */
9098
9099 int
9100 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
9101 rtx *modconst, int *elementwidth,
9102 bool isleftshift)
9103 {
9104 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9105 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
9106 unsigned HOST_WIDE_INT last_elt = 0;
9107 unsigned HOST_WIDE_INT maxshift;
9108
9109 /* Split vector constant out into a byte vector. */
9110 for (i = 0; i < n_elts; i++)
9111 {
9112 rtx el = CONST_VECTOR_ELT (op, i);
9113 unsigned HOST_WIDE_INT elpart;
9114
9115 if (GET_CODE (el) == CONST_INT)
9116 elpart = INTVAL (el);
9117 else if (GET_CODE (el) == CONST_DOUBLE)
9118 return 0;
9119 else
9120 gcc_unreachable ();
9121
9122 if (i != 0 && elpart != last_elt)
9123 return 0;
9124
9125 last_elt = elpart;
9126 }
9127
9128 /* Shift less than element size. */
9129 maxshift = innersize * 8;
9130
9131 if (isleftshift)
9132 {
9133 /* Left shift immediate value can be from 0 to <size>-1. */
9134 if (last_elt >= maxshift)
9135 return 0;
9136 }
9137 else
9138 {
9139 /* Right shift immediate value can be from 1 to <size>. */
9140 if (last_elt == 0 || last_elt > maxshift)
9141 return 0;
9142 }
9143
9144 if (elementwidth)
9145 *elementwidth = innersize * 8;
9146
9147 if (modconst)
9148 *modconst = CONST_VECTOR_ELT (op, 0);
9149
9150 return 1;
9151 }
9152
9153 /* Return a string suitable for output of Neon immediate logic operation
9154 MNEM. */
9155
9156 char *
9157 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
9158 int inverse, int quad)
9159 {
9160 int width, is_valid;
9161 static char templ[40];
9162
9163 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
9164
9165 gcc_assert (is_valid != 0);
9166
9167 if (quad)
9168 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
9169 else
9170 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
9171
9172 return templ;
9173 }
9174
9175 /* Return a string suitable for output of Neon immediate shift operation
9176 (VSHR or VSHL) MNEM. */
9177
9178 char *
9179 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
9180 enum machine_mode mode, int quad,
9181 bool isleftshift)
9182 {
9183 int width, is_valid;
9184 static char templ[40];
9185
9186 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
9187 gcc_assert (is_valid != 0);
9188
9189 if (quad)
9190 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
9191 else
9192 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
9193
9194 return templ;
9195 }
9196
9197 /* Output a sequence of pairwise operations to implement a reduction.
9198 NOTE: We do "too much work" here, because pairwise operations work on two
9199 registers-worth of operands in one go. Unfortunately we can't exploit those
9200 extra calculations to do the full operation in fewer steps, I don't think.
9201 Although all vector elements of the result but the first are ignored, we
9202 actually calculate the same result in each of the elements. An alternative
9203 such as initially loading a vector with zero to use as each of the second
9204 operands would use up an additional register and take an extra instruction,
9205 for no particular gain. */
9206
9207 void
9208 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
9209 rtx (*reduc) (rtx, rtx, rtx))
9210 {
9211 enum machine_mode inner = GET_MODE_INNER (mode);
9212 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
9213 rtx tmpsum = op1;
9214
9215 for (i = parts / 2; i >= 1; i /= 2)
9216 {
9217 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
9218 emit_insn (reduc (dest, tmpsum, tmpsum));
9219 tmpsum = dest;
9220 }
9221 }
9222
9223 /* If VALS is a vector constant that can be loaded into a register
9224 using VDUP, generate instructions to do so and return an RTX to
9225 assign to the register. Otherwise return NULL_RTX. */
9226
9227 static rtx
9228 neon_vdup_constant (rtx vals)
9229 {
9230 enum machine_mode mode = GET_MODE (vals);
9231 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9232 int n_elts = GET_MODE_NUNITS (mode);
9233 bool all_same = true;
9234 rtx x;
9235 int i;
9236
9237 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
9238 return NULL_RTX;
9239
9240 for (i = 0; i < n_elts; ++i)
9241 {
9242 x = XVECEXP (vals, 0, i);
9243 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9244 all_same = false;
9245 }
9246
9247 if (!all_same)
9248 /* The elements are not all the same. We could handle repeating
9249 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9250 {0, C, 0, C, 0, C, 0, C} which can be loaded using
9251 vdup.i16). */
9252 return NULL_RTX;
9253
9254 /* We can load this constant by using VDUP and a constant in a
9255 single ARM register. This will be cheaper than a vector
9256 load. */
9257
9258 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9259 return gen_rtx_VEC_DUPLICATE (mode, x);
9260 }
9261
9262 /* Generate code to load VALS, which is a PARALLEL containing only
9263 constants (for vec_init) or CONST_VECTOR, efficiently into a
9264 register. Returns an RTX to copy into the register, or NULL_RTX
9265 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9266
9267 rtx
9268 neon_make_constant (rtx vals)
9269 {
9270 enum machine_mode mode = GET_MODE (vals);
9271 rtx target;
9272 rtx const_vec = NULL_RTX;
9273 int n_elts = GET_MODE_NUNITS (mode);
9274 int n_const = 0;
9275 int i;
9276
9277 if (GET_CODE (vals) == CONST_VECTOR)
9278 const_vec = vals;
9279 else if (GET_CODE (vals) == PARALLEL)
9280 {
9281 /* A CONST_VECTOR must contain only CONST_INTs and
9282 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9283 Only store valid constants in a CONST_VECTOR. */
9284 for (i = 0; i < n_elts; ++i)
9285 {
9286 rtx x = XVECEXP (vals, 0, i);
9287 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
9288 n_const++;
9289 }
9290 if (n_const == n_elts)
9291 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9292 }
9293 else
9294 gcc_unreachable ();
9295
9296 if (const_vec != NULL
9297 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
9298 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9299 return const_vec;
9300 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
9301 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9302 pipeline cycle; creating the constant takes one or two ARM
9303 pipeline cycles. */
9304 return target;
9305 else if (const_vec != NULL_RTX)
9306 /* Load from constant pool. On Cortex-A8 this takes two cycles
9307 (for either double or quad vectors). We can not take advantage
9308 of single-cycle VLD1 because we need a PC-relative addressing
9309 mode. */
9310 return const_vec;
9311 else
9312 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9313 We can not construct an initializer. */
9314 return NULL_RTX;
9315 }
9316
9317 /* Initialize vector TARGET to VALS. */
9318
9319 void
9320 neon_expand_vector_init (rtx target, rtx vals)
9321 {
9322 enum machine_mode mode = GET_MODE (target);
9323 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9324 int n_elts = GET_MODE_NUNITS (mode);
9325 int n_var = 0, one_var = -1;
9326 bool all_same = true;
9327 rtx x, mem;
9328 int i;
9329
9330 for (i = 0; i < n_elts; ++i)
9331 {
9332 x = XVECEXP (vals, 0, i);
9333 if (!CONSTANT_P (x))
9334 ++n_var, one_var = i;
9335
9336 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9337 all_same = false;
9338 }
9339
9340 if (n_var == 0)
9341 {
9342 rtx constant = neon_make_constant (vals);
9343 if (constant != NULL_RTX)
9344 {
9345 emit_move_insn (target, constant);
9346 return;
9347 }
9348 }
9349
9350 /* Splat a single non-constant element if we can. */
9351 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
9352 {
9353 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9354 emit_insn (gen_rtx_SET (VOIDmode, target,
9355 gen_rtx_VEC_DUPLICATE (mode, x)));
9356 return;
9357 }
9358
9359 /* One field is non-constant. Load constant then overwrite varying
9360 field. This is more efficient than using the stack. */
9361 if (n_var == 1)
9362 {
9363 rtx copy = copy_rtx (vals);
9364 rtx index = GEN_INT (one_var);
9365
9366 /* Load constant part of vector, substitute neighboring value for
9367 varying element. */
9368 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
9369 neon_expand_vector_init (target, copy);
9370
9371 /* Insert variable. */
9372 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
9373 switch (mode)
9374 {
9375 case V8QImode:
9376 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
9377 break;
9378 case V16QImode:
9379 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
9380 break;
9381 case V4HImode:
9382 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
9383 break;
9384 case V8HImode:
9385 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
9386 break;
9387 case V2SImode:
9388 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
9389 break;
9390 case V4SImode:
9391 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
9392 break;
9393 case V2SFmode:
9394 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
9395 break;
9396 case V4SFmode:
9397 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
9398 break;
9399 case V2DImode:
9400 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
9401 break;
9402 default:
9403 gcc_unreachable ();
9404 }
9405 return;
9406 }
9407
9408 /* Construct the vector in memory one field at a time
9409 and load the whole vector. */
9410 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
9411 for (i = 0; i < n_elts; i++)
9412 emit_move_insn (adjust_address_nv (mem, inner_mode,
9413 i * GET_MODE_SIZE (inner_mode)),
9414 XVECEXP (vals, 0, i));
9415 emit_move_insn (target, mem);
9416 }
9417
9418 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9419 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9420 reported source locations are bogus. */
9421
9422 static void
9423 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9424 const char *err)
9425 {
9426 HOST_WIDE_INT lane;
9427
9428 gcc_assert (GET_CODE (operand) == CONST_INT);
9429
9430 lane = INTVAL (operand);
9431
9432 if (lane < low || lane >= high)
9433 error (err);
9434 }
9435
9436 /* Bounds-check lanes. */
9437
9438 void
9439 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9440 {
9441 bounds_check (operand, low, high, "lane out of range");
9442 }
9443
9444 /* Bounds-check constants. */
9445
9446 void
9447 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9448 {
9449 bounds_check (operand, low, high, "constant out of range");
9450 }
9451
9452 HOST_WIDE_INT
9453 neon_element_bits (enum machine_mode mode)
9454 {
9455 if (mode == DImode)
9456 return GET_MODE_BITSIZE (mode);
9457 else
9458 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
9459 }
9460
9461 \f
9462 /* Predicates for `match_operand' and `match_operator'. */
9463
9464 /* Return TRUE if OP is a valid coprocessor memory address pattern.
9465 WB is true if full writeback address modes are allowed and is false
9466 if limited writeback address modes (POST_INC and PRE_DEC) are
9467 allowed. */
9468
9469 int
9470 arm_coproc_mem_operand (rtx op, bool wb)
9471 {
9472 rtx ind;
9473
9474 /* Reject eliminable registers. */
9475 if (! (reload_in_progress || reload_completed)
9476 && ( reg_mentioned_p (frame_pointer_rtx, op)
9477 || reg_mentioned_p (arg_pointer_rtx, op)
9478 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9479 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9480 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9481 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9482 return FALSE;
9483
9484 /* Constants are converted into offsets from labels. */
9485 if (GET_CODE (op) != MEM)
9486 return FALSE;
9487
9488 ind = XEXP (op, 0);
9489
9490 if (reload_completed
9491 && (GET_CODE (ind) == LABEL_REF
9492 || (GET_CODE (ind) == CONST
9493 && GET_CODE (XEXP (ind, 0)) == PLUS
9494 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9495 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9496 return TRUE;
9497
9498 /* Match: (mem (reg)). */
9499 if (GET_CODE (ind) == REG)
9500 return arm_address_register_rtx_p (ind, 0);
9501
9502 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
9503 acceptable in any case (subject to verification by
9504 arm_address_register_rtx_p). We need WB to be true to accept
9505 PRE_INC and POST_DEC. */
9506 if (GET_CODE (ind) == POST_INC
9507 || GET_CODE (ind) == PRE_DEC
9508 || (wb
9509 && (GET_CODE (ind) == PRE_INC
9510 || GET_CODE (ind) == POST_DEC)))
9511 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9512
9513 if (wb
9514 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
9515 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
9516 && GET_CODE (XEXP (ind, 1)) == PLUS
9517 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
9518 ind = XEXP (ind, 1);
9519
9520 /* Match:
9521 (plus (reg)
9522 (const)). */
9523 if (GET_CODE (ind) == PLUS
9524 && GET_CODE (XEXP (ind, 0)) == REG
9525 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9526 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9527 && INTVAL (XEXP (ind, 1)) > -1024
9528 && INTVAL (XEXP (ind, 1)) < 1024
9529 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9530 return TRUE;
9531
9532 return FALSE;
9533 }
9534
9535 /* Return TRUE if OP is a memory operand which we can load or store a vector
9536 to/from. TYPE is one of the following values:
9537 0 - Vector load/stor (vldr)
9538 1 - Core registers (ldm)
9539 2 - Element/structure loads (vld1)
9540 */
9541 int
9542 neon_vector_mem_operand (rtx op, int type)
9543 {
9544 rtx ind;
9545
9546 /* Reject eliminable registers. */
9547 if (! (reload_in_progress || reload_completed)
9548 && ( reg_mentioned_p (frame_pointer_rtx, op)
9549 || reg_mentioned_p (arg_pointer_rtx, op)
9550 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9551 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9552 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9553 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9554 return FALSE;
9555
9556 /* Constants are converted into offsets from labels. */
9557 if (GET_CODE (op) != MEM)
9558 return FALSE;
9559
9560 ind = XEXP (op, 0);
9561
9562 if (reload_completed
9563 && (GET_CODE (ind) == LABEL_REF
9564 || (GET_CODE (ind) == CONST
9565 && GET_CODE (XEXP (ind, 0)) == PLUS
9566 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9567 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9568 return TRUE;
9569
9570 /* Match: (mem (reg)). */
9571 if (GET_CODE (ind) == REG)
9572 return arm_address_register_rtx_p (ind, 0);
9573
9574 /* Allow post-increment with Neon registers. */
9575 if ((type != 1 && GET_CODE (ind) == POST_INC)
9576 || (type == 0 && GET_CODE (ind) == PRE_DEC))
9577 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9578
9579 /* FIXME: vld1 allows register post-modify. */
9580
9581 /* Match:
9582 (plus (reg)
9583 (const)). */
9584 if (type == 0
9585 && GET_CODE (ind) == PLUS
9586 && GET_CODE (XEXP (ind, 0)) == REG
9587 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9588 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9589 && INTVAL (XEXP (ind, 1)) > -1024
9590 && INTVAL (XEXP (ind, 1)) < 1016
9591 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9592 return TRUE;
9593
9594 return FALSE;
9595 }
9596
9597 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9598 type. */
9599 int
9600 neon_struct_mem_operand (rtx op)
9601 {
9602 rtx ind;
9603
9604 /* Reject eliminable registers. */
9605 if (! (reload_in_progress || reload_completed)
9606 && ( reg_mentioned_p (frame_pointer_rtx, op)
9607 || reg_mentioned_p (arg_pointer_rtx, op)
9608 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9609 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9610 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9611 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9612 return FALSE;
9613
9614 /* Constants are converted into offsets from labels. */
9615 if (GET_CODE (op) != MEM)
9616 return FALSE;
9617
9618 ind = XEXP (op, 0);
9619
9620 if (reload_completed
9621 && (GET_CODE (ind) == LABEL_REF
9622 || (GET_CODE (ind) == CONST
9623 && GET_CODE (XEXP (ind, 0)) == PLUS
9624 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9625 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9626 return TRUE;
9627
9628 /* Match: (mem (reg)). */
9629 if (GET_CODE (ind) == REG)
9630 return arm_address_register_rtx_p (ind, 0);
9631
9632 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9633 if (GET_CODE (ind) == POST_INC
9634 || GET_CODE (ind) == PRE_DEC)
9635 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9636
9637 return FALSE;
9638 }
9639
9640 /* Return true if X is a register that will be eliminated later on. */
9641 int
9642 arm_eliminable_register (rtx x)
9643 {
9644 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9645 || REGNO (x) == ARG_POINTER_REGNUM
9646 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9647 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9648 }
9649
9650 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9651 coprocessor registers. Otherwise return NO_REGS. */
9652
9653 enum reg_class
9654 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9655 {
9656 if (mode == HFmode)
9657 {
9658 if (!TARGET_NEON_FP16)
9659 return GENERAL_REGS;
9660 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9661 return NO_REGS;
9662 return GENERAL_REGS;
9663 }
9664
9665 /* The neon move patterns handle all legitimate vector and struct
9666 addresses. */
9667 if (TARGET_NEON
9668 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
9669 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9670 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9671 || VALID_NEON_STRUCT_MODE (mode)))
9672 return NO_REGS;
9673
9674 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9675 return NO_REGS;
9676
9677 return GENERAL_REGS;
9678 }
9679
9680 /* Values which must be returned in the most-significant end of the return
9681 register. */
9682
9683 static bool
9684 arm_return_in_msb (const_tree valtype)
9685 {
9686 return (TARGET_AAPCS_BASED
9687 && BYTES_BIG_ENDIAN
9688 && (AGGREGATE_TYPE_P (valtype)
9689 || TREE_CODE (valtype) == COMPLEX_TYPE
9690 || FIXED_POINT_TYPE_P (valtype)));
9691 }
9692
9693 /* Return TRUE if X references a SYMBOL_REF. */
9694 int
9695 symbol_mentioned_p (rtx x)
9696 {
9697 const char * fmt;
9698 int i;
9699
9700 if (GET_CODE (x) == SYMBOL_REF)
9701 return 1;
9702
9703 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9704 are constant offsets, not symbols. */
9705 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9706 return 0;
9707
9708 fmt = GET_RTX_FORMAT (GET_CODE (x));
9709
9710 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9711 {
9712 if (fmt[i] == 'E')
9713 {
9714 int j;
9715
9716 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9717 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9718 return 1;
9719 }
9720 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9721 return 1;
9722 }
9723
9724 return 0;
9725 }
9726
9727 /* Return TRUE if X references a LABEL_REF. */
9728 int
9729 label_mentioned_p (rtx x)
9730 {
9731 const char * fmt;
9732 int i;
9733
9734 if (GET_CODE (x) == LABEL_REF)
9735 return 1;
9736
9737 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9738 instruction, but they are constant offsets, not symbols. */
9739 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9740 return 0;
9741
9742 fmt = GET_RTX_FORMAT (GET_CODE (x));
9743 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9744 {
9745 if (fmt[i] == 'E')
9746 {
9747 int j;
9748
9749 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9750 if (label_mentioned_p (XVECEXP (x, i, j)))
9751 return 1;
9752 }
9753 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9754 return 1;
9755 }
9756
9757 return 0;
9758 }
9759
9760 int
9761 tls_mentioned_p (rtx x)
9762 {
9763 switch (GET_CODE (x))
9764 {
9765 case CONST:
9766 return tls_mentioned_p (XEXP (x, 0));
9767
9768 case UNSPEC:
9769 if (XINT (x, 1) == UNSPEC_TLS)
9770 return 1;
9771
9772 default:
9773 return 0;
9774 }
9775 }
9776
9777 /* Must not copy any rtx that uses a pc-relative address. */
9778
9779 static int
9780 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9781 {
9782 if (GET_CODE (*x) == UNSPEC
9783 && (XINT (*x, 1) == UNSPEC_PIC_BASE
9784 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
9785 return 1;
9786 return 0;
9787 }
9788
9789 static bool
9790 arm_cannot_copy_insn_p (rtx insn)
9791 {
9792 /* The tls call insn cannot be copied, as it is paired with a data
9793 word. */
9794 if (recog_memoized (insn) == CODE_FOR_tlscall)
9795 return true;
9796
9797 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9798 }
9799
9800 enum rtx_code
9801 minmax_code (rtx x)
9802 {
9803 enum rtx_code code = GET_CODE (x);
9804
9805 switch (code)
9806 {
9807 case SMAX:
9808 return GE;
9809 case SMIN:
9810 return LE;
9811 case UMIN:
9812 return LEU;
9813 case UMAX:
9814 return GEU;
9815 default:
9816 gcc_unreachable ();
9817 }
9818 }
9819
9820 /* Match pair of min/max operators that can be implemented via usat/ssat. */
9821
9822 bool
9823 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
9824 int *mask, bool *signed_sat)
9825 {
9826 /* The high bound must be a power of two minus one. */
9827 int log = exact_log2 (INTVAL (hi_bound) + 1);
9828 if (log == -1)
9829 return false;
9830
9831 /* The low bound is either zero (for usat) or one less than the
9832 negation of the high bound (for ssat). */
9833 if (INTVAL (lo_bound) == 0)
9834 {
9835 if (mask)
9836 *mask = log;
9837 if (signed_sat)
9838 *signed_sat = false;
9839
9840 return true;
9841 }
9842
9843 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
9844 {
9845 if (mask)
9846 *mask = log + 1;
9847 if (signed_sat)
9848 *signed_sat = true;
9849
9850 return true;
9851 }
9852
9853 return false;
9854 }
9855
9856 /* Return 1 if memory locations are adjacent. */
9857 int
9858 adjacent_mem_locations (rtx a, rtx b)
9859 {
9860 /* We don't guarantee to preserve the order of these memory refs. */
9861 if (volatile_refs_p (a) || volatile_refs_p (b))
9862 return 0;
9863
9864 if ((GET_CODE (XEXP (a, 0)) == REG
9865 || (GET_CODE (XEXP (a, 0)) == PLUS
9866 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9867 && (GET_CODE (XEXP (b, 0)) == REG
9868 || (GET_CODE (XEXP (b, 0)) == PLUS
9869 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9870 {
9871 HOST_WIDE_INT val0 = 0, val1 = 0;
9872 rtx reg0, reg1;
9873 int val_diff;
9874
9875 if (GET_CODE (XEXP (a, 0)) == PLUS)
9876 {
9877 reg0 = XEXP (XEXP (a, 0), 0);
9878 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9879 }
9880 else
9881 reg0 = XEXP (a, 0);
9882
9883 if (GET_CODE (XEXP (b, 0)) == PLUS)
9884 {
9885 reg1 = XEXP (XEXP (b, 0), 0);
9886 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9887 }
9888 else
9889 reg1 = XEXP (b, 0);
9890
9891 /* Don't accept any offset that will require multiple
9892 instructions to handle, since this would cause the
9893 arith_adjacentmem pattern to output an overlong sequence. */
9894 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9895 return 0;
9896
9897 /* Don't allow an eliminable register: register elimination can make
9898 the offset too large. */
9899 if (arm_eliminable_register (reg0))
9900 return 0;
9901
9902 val_diff = val1 - val0;
9903
9904 if (arm_ld_sched)
9905 {
9906 /* If the target has load delay slots, then there's no benefit
9907 to using an ldm instruction unless the offset is zero and
9908 we are optimizing for size. */
9909 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9910 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9911 && (val_diff == 4 || val_diff == -4));
9912 }
9913
9914 return ((REGNO (reg0) == REGNO (reg1))
9915 && (val_diff == 4 || val_diff == -4));
9916 }
9917
9918 return 0;
9919 }
9920
9921 /* Return true if OP is a valid load or store multiple operation. LOAD is true
9922 for load operations, false for store operations. CONSECUTIVE is true
9923 if the register numbers in the operation must be consecutive in the register
9924 bank. RETURN_PC is true if value is to be loaded in PC.
9925 The pattern we are trying to match for load is:
9926 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
9927 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
9928 :
9929 :
9930 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
9931 ]
9932 where
9933 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
9934 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
9935 3. If consecutive is TRUE, then for kth register being loaded,
9936 REGNO (R_dk) = REGNO (R_d0) + k.
9937 The pattern for store is similar. */
9938 bool
9939 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
9940 bool consecutive, bool return_pc)
9941 {
9942 HOST_WIDE_INT count = XVECLEN (op, 0);
9943 rtx reg, mem, addr;
9944 unsigned regno;
9945 unsigned first_regno;
9946 HOST_WIDE_INT i = 1, base = 0, offset = 0;
9947 rtx elt;
9948 bool addr_reg_in_reglist = false;
9949 bool update = false;
9950 int reg_increment;
9951 int offset_adj;
9952 int regs_per_val;
9953
9954 /* If not in SImode, then registers must be consecutive
9955 (e.g., VLDM instructions for DFmode). */
9956 gcc_assert ((mode == SImode) || consecutive);
9957 /* Setting return_pc for stores is illegal. */
9958 gcc_assert (!return_pc || load);
9959
9960 /* Set up the increments and the regs per val based on the mode. */
9961 reg_increment = GET_MODE_SIZE (mode);
9962 regs_per_val = reg_increment / 4;
9963 offset_adj = return_pc ? 1 : 0;
9964
9965 if (count <= 1
9966 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
9967 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
9968 return false;
9969
9970 /* Check if this is a write-back. */
9971 elt = XVECEXP (op, 0, offset_adj);
9972 if (GET_CODE (SET_SRC (elt)) == PLUS)
9973 {
9974 i++;
9975 base = 1;
9976 update = true;
9977
9978 /* The offset adjustment must be the number of registers being
9979 popped times the size of a single register. */
9980 if (!REG_P (SET_DEST (elt))
9981 || !REG_P (XEXP (SET_SRC (elt), 0))
9982 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
9983 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
9984 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
9985 ((count - 1 - offset_adj) * reg_increment))
9986 return false;
9987 }
9988
9989 i = i + offset_adj;
9990 base = base + offset_adj;
9991 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
9992 success depends on the type: VLDM can do just one reg,
9993 LDM must do at least two. */
9994 if ((count <= i) && (mode == SImode))
9995 return false;
9996
9997 elt = XVECEXP (op, 0, i - 1);
9998 if (GET_CODE (elt) != SET)
9999 return false;
10000
10001 if (load)
10002 {
10003 reg = SET_DEST (elt);
10004 mem = SET_SRC (elt);
10005 }
10006 else
10007 {
10008 reg = SET_SRC (elt);
10009 mem = SET_DEST (elt);
10010 }
10011
10012 if (!REG_P (reg) || !MEM_P (mem))
10013 return false;
10014
10015 regno = REGNO (reg);
10016 first_regno = regno;
10017 addr = XEXP (mem, 0);
10018 if (GET_CODE (addr) == PLUS)
10019 {
10020 if (!CONST_INT_P (XEXP (addr, 1)))
10021 return false;
10022
10023 offset = INTVAL (XEXP (addr, 1));
10024 addr = XEXP (addr, 0);
10025 }
10026
10027 if (!REG_P (addr))
10028 return false;
10029
10030 /* Don't allow SP to be loaded unless it is also the base register. It
10031 guarantees that SP is reset correctly when an LDM instruction
10032 is interruptted. Otherwise, we might end up with a corrupt stack. */
10033 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10034 return false;
10035
10036 for (; i < count; i++)
10037 {
10038 elt = XVECEXP (op, 0, i);
10039 if (GET_CODE (elt) != SET)
10040 return false;
10041
10042 if (load)
10043 {
10044 reg = SET_DEST (elt);
10045 mem = SET_SRC (elt);
10046 }
10047 else
10048 {
10049 reg = SET_SRC (elt);
10050 mem = SET_DEST (elt);
10051 }
10052
10053 if (!REG_P (reg)
10054 || GET_MODE (reg) != mode
10055 || REGNO (reg) <= regno
10056 || (consecutive
10057 && (REGNO (reg) !=
10058 (unsigned int) (first_regno + regs_per_val * (i - base))))
10059 /* Don't allow SP to be loaded unless it is also the base register. It
10060 guarantees that SP is reset correctly when an LDM instruction
10061 is interrupted. Otherwise, we might end up with a corrupt stack. */
10062 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10063 || !MEM_P (mem)
10064 || GET_MODE (mem) != mode
10065 || ((GET_CODE (XEXP (mem, 0)) != PLUS
10066 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
10067 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
10068 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
10069 offset + (i - base) * reg_increment))
10070 && (!REG_P (XEXP (mem, 0))
10071 || offset + (i - base) * reg_increment != 0)))
10072 return false;
10073
10074 regno = REGNO (reg);
10075 if (regno == REGNO (addr))
10076 addr_reg_in_reglist = true;
10077 }
10078
10079 if (load)
10080 {
10081 if (update && addr_reg_in_reglist)
10082 return false;
10083
10084 /* For Thumb-1, address register is always modified - either by write-back
10085 or by explicit load. If the pattern does not describe an update,
10086 then the address register must be in the list of loaded registers. */
10087 if (TARGET_THUMB1)
10088 return update || addr_reg_in_reglist;
10089 }
10090
10091 return true;
10092 }
10093
10094 /* Return true iff it would be profitable to turn a sequence of NOPS loads
10095 or stores (depending on IS_STORE) into a load-multiple or store-multiple
10096 instruction. ADD_OFFSET is nonzero if the base address register needs
10097 to be modified with an add instruction before we can use it. */
10098
10099 static bool
10100 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
10101 int nops, HOST_WIDE_INT add_offset)
10102 {
10103 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
10104 if the offset isn't small enough. The reason 2 ldrs are faster
10105 is because these ARMs are able to do more than one cache access
10106 in a single cycle. The ARM9 and StrongARM have Harvard caches,
10107 whilst the ARM8 has a double bandwidth cache. This means that
10108 these cores can do both an instruction fetch and a data fetch in
10109 a single cycle, so the trick of calculating the address into a
10110 scratch register (one of the result regs) and then doing a load
10111 multiple actually becomes slower (and no smaller in code size).
10112 That is the transformation
10113
10114 ldr rd1, [rbase + offset]
10115 ldr rd2, [rbase + offset + 4]
10116
10117 to
10118
10119 add rd1, rbase, offset
10120 ldmia rd1, {rd1, rd2}
10121
10122 produces worse code -- '3 cycles + any stalls on rd2' instead of
10123 '2 cycles + any stalls on rd2'. On ARMs with only one cache
10124 access per cycle, the first sequence could never complete in less
10125 than 6 cycles, whereas the ldm sequence would only take 5 and
10126 would make better use of sequential accesses if not hitting the
10127 cache.
10128
10129 We cheat here and test 'arm_ld_sched' which we currently know to
10130 only be true for the ARM8, ARM9 and StrongARM. If this ever
10131 changes, then the test below needs to be reworked. */
10132 if (nops == 2 && arm_ld_sched && add_offset != 0)
10133 return false;
10134
10135 /* XScale has load-store double instructions, but they have stricter
10136 alignment requirements than load-store multiple, so we cannot
10137 use them.
10138
10139 For XScale ldm requires 2 + NREGS cycles to complete and blocks
10140 the pipeline until completion.
10141
10142 NREGS CYCLES
10143 1 3
10144 2 4
10145 3 5
10146 4 6
10147
10148 An ldr instruction takes 1-3 cycles, but does not block the
10149 pipeline.
10150
10151 NREGS CYCLES
10152 1 1-3
10153 2 2-6
10154 3 3-9
10155 4 4-12
10156
10157 Best case ldr will always win. However, the more ldr instructions
10158 we issue, the less likely we are to be able to schedule them well.
10159 Using ldr instructions also increases code size.
10160
10161 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10162 for counts of 3 or 4 regs. */
10163 if (nops <= 2 && arm_tune_xscale && !optimize_size)
10164 return false;
10165 return true;
10166 }
10167
10168 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
10169 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10170 an array ORDER which describes the sequence to use when accessing the
10171 offsets that produces an ascending order. In this sequence, each
10172 offset must be larger by exactly 4 than the previous one. ORDER[0]
10173 must have been filled in with the lowest offset by the caller.
10174 If UNSORTED_REGS is nonnull, it is an array of register numbers that
10175 we use to verify that ORDER produces an ascending order of registers.
10176 Return true if it was possible to construct such an order, false if
10177 not. */
10178
10179 static bool
10180 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
10181 int *unsorted_regs)
10182 {
10183 int i;
10184 for (i = 1; i < nops; i++)
10185 {
10186 int j;
10187
10188 order[i] = order[i - 1];
10189 for (j = 0; j < nops; j++)
10190 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
10191 {
10192 /* We must find exactly one offset that is higher than the
10193 previous one by 4. */
10194 if (order[i] != order[i - 1])
10195 return false;
10196 order[i] = j;
10197 }
10198 if (order[i] == order[i - 1])
10199 return false;
10200 /* The register numbers must be ascending. */
10201 if (unsorted_regs != NULL
10202 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
10203 return false;
10204 }
10205 return true;
10206 }
10207
10208 /* Used to determine in a peephole whether a sequence of load
10209 instructions can be changed into a load-multiple instruction.
10210 NOPS is the number of separate load instructions we are examining. The
10211 first NOPS entries in OPERANDS are the destination registers, the
10212 next NOPS entries are memory operands. If this function is
10213 successful, *BASE is set to the common base register of the memory
10214 accesses; *LOAD_OFFSET is set to the first memory location's offset
10215 from that base register.
10216 REGS is an array filled in with the destination register numbers.
10217 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10218 insn numbers to an ascending order of stores. If CHECK_REGS is true,
10219 the sequence of registers in REGS matches the loads from ascending memory
10220 locations, and the function verifies that the register numbers are
10221 themselves ascending. If CHECK_REGS is false, the register numbers
10222 are stored in the order they are found in the operands. */
10223 static int
10224 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
10225 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
10226 {
10227 int unsorted_regs[MAX_LDM_STM_OPS];
10228 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10229 int order[MAX_LDM_STM_OPS];
10230 rtx base_reg_rtx = NULL;
10231 int base_reg = -1;
10232 int i, ldm_case;
10233
10234 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10235 easily extended if required. */
10236 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10237
10238 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10239
10240 /* Loop over the operands and check that the memory references are
10241 suitable (i.e. immediate offsets from the same base register). At
10242 the same time, extract the target register, and the memory
10243 offsets. */
10244 for (i = 0; i < nops; i++)
10245 {
10246 rtx reg;
10247 rtx offset;
10248
10249 /* Convert a subreg of a mem into the mem itself. */
10250 if (GET_CODE (operands[nops + i]) == SUBREG)
10251 operands[nops + i] = alter_subreg (operands + (nops + i));
10252
10253 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10254
10255 /* Don't reorder volatile memory references; it doesn't seem worth
10256 looking for the case where the order is ok anyway. */
10257 if (MEM_VOLATILE_P (operands[nops + i]))
10258 return 0;
10259
10260 offset = const0_rtx;
10261
10262 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10263 || (GET_CODE (reg) == SUBREG
10264 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10265 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10266 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10267 == REG)
10268 || (GET_CODE (reg) == SUBREG
10269 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10270 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10271 == CONST_INT)))
10272 {
10273 if (i == 0)
10274 {
10275 base_reg = REGNO (reg);
10276 base_reg_rtx = reg;
10277 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10278 return 0;
10279 }
10280 else if (base_reg != (int) REGNO (reg))
10281 /* Not addressed from the same base register. */
10282 return 0;
10283
10284 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
10285 ? REGNO (operands[i])
10286 : REGNO (SUBREG_REG (operands[i])));
10287
10288 /* If it isn't an integer register, or if it overwrites the
10289 base register but isn't the last insn in the list, then
10290 we can't do this. */
10291 if (unsorted_regs[i] < 0
10292 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10293 || unsorted_regs[i] > 14
10294 || (i != nops - 1 && unsorted_regs[i] == base_reg))
10295 return 0;
10296
10297 unsorted_offsets[i] = INTVAL (offset);
10298 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10299 order[0] = i;
10300 }
10301 else
10302 /* Not a suitable memory address. */
10303 return 0;
10304 }
10305
10306 /* All the useful information has now been extracted from the
10307 operands into unsorted_regs and unsorted_offsets; additionally,
10308 order[0] has been set to the lowest offset in the list. Sort
10309 the offsets into order, verifying that they are adjacent, and
10310 check that the register numbers are ascending. */
10311 if (!compute_offset_order (nops, unsorted_offsets, order,
10312 check_regs ? unsorted_regs : NULL))
10313 return 0;
10314
10315 if (saved_order)
10316 memcpy (saved_order, order, sizeof order);
10317
10318 if (base)
10319 {
10320 *base = base_reg;
10321
10322 for (i = 0; i < nops; i++)
10323 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10324
10325 *load_offset = unsorted_offsets[order[0]];
10326 }
10327
10328 if (TARGET_THUMB1
10329 && !peep2_reg_dead_p (nops, base_reg_rtx))
10330 return 0;
10331
10332 if (unsorted_offsets[order[0]] == 0)
10333 ldm_case = 1; /* ldmia */
10334 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10335 ldm_case = 2; /* ldmib */
10336 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10337 ldm_case = 3; /* ldmda */
10338 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10339 ldm_case = 4; /* ldmdb */
10340 else if (const_ok_for_arm (unsorted_offsets[order[0]])
10341 || const_ok_for_arm (-unsorted_offsets[order[0]]))
10342 ldm_case = 5;
10343 else
10344 return 0;
10345
10346 if (!multiple_operation_profitable_p (false, nops,
10347 ldm_case == 5
10348 ? unsorted_offsets[order[0]] : 0))
10349 return 0;
10350
10351 return ldm_case;
10352 }
10353
10354 /* Used to determine in a peephole whether a sequence of store instructions can
10355 be changed into a store-multiple instruction.
10356 NOPS is the number of separate store instructions we are examining.
10357 NOPS_TOTAL is the total number of instructions recognized by the peephole
10358 pattern.
10359 The first NOPS entries in OPERANDS are the source registers, the next
10360 NOPS entries are memory operands. If this function is successful, *BASE is
10361 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10362 to the first memory location's offset from that base register. REGS is an
10363 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10364 likewise filled with the corresponding rtx's.
10365 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10366 numbers to an ascending order of stores.
10367 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10368 from ascending memory locations, and the function verifies that the register
10369 numbers are themselves ascending. If CHECK_REGS is false, the register
10370 numbers are stored in the order they are found in the operands. */
10371 static int
10372 store_multiple_sequence (rtx *operands, int nops, int nops_total,
10373 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
10374 HOST_WIDE_INT *load_offset, bool check_regs)
10375 {
10376 int unsorted_regs[MAX_LDM_STM_OPS];
10377 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
10378 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10379 int order[MAX_LDM_STM_OPS];
10380 int base_reg = -1;
10381 rtx base_reg_rtx = NULL;
10382 int i, stm_case;
10383
10384 /* Write back of base register is currently only supported for Thumb 1. */
10385 int base_writeback = TARGET_THUMB1;
10386
10387 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10388 easily extended if required. */
10389 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10390
10391 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10392
10393 /* Loop over the operands and check that the memory references are
10394 suitable (i.e. immediate offsets from the same base register). At
10395 the same time, extract the target register, and the memory
10396 offsets. */
10397 for (i = 0; i < nops; i++)
10398 {
10399 rtx reg;
10400 rtx offset;
10401
10402 /* Convert a subreg of a mem into the mem itself. */
10403 if (GET_CODE (operands[nops + i]) == SUBREG)
10404 operands[nops + i] = alter_subreg (operands + (nops + i));
10405
10406 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10407
10408 /* Don't reorder volatile memory references; it doesn't seem worth
10409 looking for the case where the order is ok anyway. */
10410 if (MEM_VOLATILE_P (operands[nops + i]))
10411 return 0;
10412
10413 offset = const0_rtx;
10414
10415 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10416 || (GET_CODE (reg) == SUBREG
10417 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10418 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10419 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10420 == REG)
10421 || (GET_CODE (reg) == SUBREG
10422 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10423 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10424 == CONST_INT)))
10425 {
10426 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
10427 ? operands[i] : SUBREG_REG (operands[i]));
10428 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
10429
10430 if (i == 0)
10431 {
10432 base_reg = REGNO (reg);
10433 base_reg_rtx = reg;
10434 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10435 return 0;
10436 }
10437 else if (base_reg != (int) REGNO (reg))
10438 /* Not addressed from the same base register. */
10439 return 0;
10440
10441 /* If it isn't an integer register, then we can't do this. */
10442 if (unsorted_regs[i] < 0
10443 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10444 /* The effects are unpredictable if the base register is
10445 both updated and stored. */
10446 || (base_writeback && unsorted_regs[i] == base_reg)
10447 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
10448 || unsorted_regs[i] > 14)
10449 return 0;
10450
10451 unsorted_offsets[i] = INTVAL (offset);
10452 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10453 order[0] = i;
10454 }
10455 else
10456 /* Not a suitable memory address. */
10457 return 0;
10458 }
10459
10460 /* All the useful information has now been extracted from the
10461 operands into unsorted_regs and unsorted_offsets; additionally,
10462 order[0] has been set to the lowest offset in the list. Sort
10463 the offsets into order, verifying that they are adjacent, and
10464 check that the register numbers are ascending. */
10465 if (!compute_offset_order (nops, unsorted_offsets, order,
10466 check_regs ? unsorted_regs : NULL))
10467 return 0;
10468
10469 if (saved_order)
10470 memcpy (saved_order, order, sizeof order);
10471
10472 if (base)
10473 {
10474 *base = base_reg;
10475
10476 for (i = 0; i < nops; i++)
10477 {
10478 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10479 if (reg_rtxs)
10480 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
10481 }
10482
10483 *load_offset = unsorted_offsets[order[0]];
10484 }
10485
10486 if (TARGET_THUMB1
10487 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
10488 return 0;
10489
10490 if (unsorted_offsets[order[0]] == 0)
10491 stm_case = 1; /* stmia */
10492 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10493 stm_case = 2; /* stmib */
10494 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10495 stm_case = 3; /* stmda */
10496 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10497 stm_case = 4; /* stmdb */
10498 else
10499 return 0;
10500
10501 if (!multiple_operation_profitable_p (false, nops, 0))
10502 return 0;
10503
10504 return stm_case;
10505 }
10506 \f
10507 /* Routines for use in generating RTL. */
10508
10509 /* Generate a load-multiple instruction. COUNT is the number of loads in
10510 the instruction; REGS and MEMS are arrays containing the operands.
10511 BASEREG is the base register to be used in addressing the memory operands.
10512 WBACK_OFFSET is nonzero if the instruction should update the base
10513 register. */
10514
10515 static rtx
10516 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10517 HOST_WIDE_INT wback_offset)
10518 {
10519 int i = 0, j;
10520 rtx result;
10521
10522 if (!multiple_operation_profitable_p (false, count, 0))
10523 {
10524 rtx seq;
10525
10526 start_sequence ();
10527
10528 for (i = 0; i < count; i++)
10529 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
10530
10531 if (wback_offset != 0)
10532 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
10533
10534 seq = get_insns ();
10535 end_sequence ();
10536
10537 return seq;
10538 }
10539
10540 result = gen_rtx_PARALLEL (VOIDmode,
10541 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10542 if (wback_offset != 0)
10543 {
10544 XVECEXP (result, 0, 0)
10545 = gen_rtx_SET (VOIDmode, basereg,
10546 plus_constant (Pmode, basereg, wback_offset));
10547 i = 1;
10548 count++;
10549 }
10550
10551 for (j = 0; i < count; i++, j++)
10552 XVECEXP (result, 0, i)
10553 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
10554
10555 return result;
10556 }
10557
10558 /* Generate a store-multiple instruction. COUNT is the number of stores in
10559 the instruction; REGS and MEMS are arrays containing the operands.
10560 BASEREG is the base register to be used in addressing the memory operands.
10561 WBACK_OFFSET is nonzero if the instruction should update the base
10562 register. */
10563
10564 static rtx
10565 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10566 HOST_WIDE_INT wback_offset)
10567 {
10568 int i = 0, j;
10569 rtx result;
10570
10571 if (GET_CODE (basereg) == PLUS)
10572 basereg = XEXP (basereg, 0);
10573
10574 if (!multiple_operation_profitable_p (false, count, 0))
10575 {
10576 rtx seq;
10577
10578 start_sequence ();
10579
10580 for (i = 0; i < count; i++)
10581 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
10582
10583 if (wback_offset != 0)
10584 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
10585
10586 seq = get_insns ();
10587 end_sequence ();
10588
10589 return seq;
10590 }
10591
10592 result = gen_rtx_PARALLEL (VOIDmode,
10593 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10594 if (wback_offset != 0)
10595 {
10596 XVECEXP (result, 0, 0)
10597 = gen_rtx_SET (VOIDmode, basereg,
10598 plus_constant (Pmode, basereg, wback_offset));
10599 i = 1;
10600 count++;
10601 }
10602
10603 for (j = 0; i < count; i++, j++)
10604 XVECEXP (result, 0, i)
10605 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
10606
10607 return result;
10608 }
10609
10610 /* Generate either a load-multiple or a store-multiple instruction. This
10611 function can be used in situations where we can start with a single MEM
10612 rtx and adjust its address upwards.
10613 COUNT is the number of operations in the instruction, not counting a
10614 possible update of the base register. REGS is an array containing the
10615 register operands.
10616 BASEREG is the base register to be used in addressing the memory operands,
10617 which are constructed from BASEMEM.
10618 WRITE_BACK specifies whether the generated instruction should include an
10619 update of the base register.
10620 OFFSETP is used to pass an offset to and from this function; this offset
10621 is not used when constructing the address (instead BASEMEM should have an
10622 appropriate offset in its address), it is used only for setting
10623 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10624
10625 static rtx
10626 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
10627 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
10628 {
10629 rtx mems[MAX_LDM_STM_OPS];
10630 HOST_WIDE_INT offset = *offsetp;
10631 int i;
10632
10633 gcc_assert (count <= MAX_LDM_STM_OPS);
10634
10635 if (GET_CODE (basereg) == PLUS)
10636 basereg = XEXP (basereg, 0);
10637
10638 for (i = 0; i < count; i++)
10639 {
10640 rtx addr = plus_constant (Pmode, basereg, i * 4);
10641 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10642 offset += 4;
10643 }
10644
10645 if (write_back)
10646 *offsetp = offset;
10647
10648 if (is_load)
10649 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10650 write_back ? 4 * count : 0);
10651 else
10652 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10653 write_back ? 4 * count : 0);
10654 }
10655
10656 rtx
10657 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10658 rtx basemem, HOST_WIDE_INT *offsetp)
10659 {
10660 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10661 offsetp);
10662 }
10663
10664 rtx
10665 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10666 rtx basemem, HOST_WIDE_INT *offsetp)
10667 {
10668 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10669 offsetp);
10670 }
10671
10672 /* Called from a peephole2 expander to turn a sequence of loads into an
10673 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10674 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10675 is true if we can reorder the registers because they are used commutatively
10676 subsequently.
10677 Returns true iff we could generate a new instruction. */
10678
10679 bool
10680 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10681 {
10682 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10683 rtx mems[MAX_LDM_STM_OPS];
10684 int i, j, base_reg;
10685 rtx base_reg_rtx;
10686 HOST_WIDE_INT offset;
10687 int write_back = FALSE;
10688 int ldm_case;
10689 rtx addr;
10690
10691 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10692 &base_reg, &offset, !sort_regs);
10693
10694 if (ldm_case == 0)
10695 return false;
10696
10697 if (sort_regs)
10698 for (i = 0; i < nops - 1; i++)
10699 for (j = i + 1; j < nops; j++)
10700 if (regs[i] > regs[j])
10701 {
10702 int t = regs[i];
10703 regs[i] = regs[j];
10704 regs[j] = t;
10705 }
10706 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10707
10708 if (TARGET_THUMB1)
10709 {
10710 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10711 gcc_assert (ldm_case == 1 || ldm_case == 5);
10712 write_back = TRUE;
10713 }
10714
10715 if (ldm_case == 5)
10716 {
10717 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10718 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10719 offset = 0;
10720 if (!TARGET_THUMB1)
10721 {
10722 base_reg = regs[0];
10723 base_reg_rtx = newbase;
10724 }
10725 }
10726
10727 for (i = 0; i < nops; i++)
10728 {
10729 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
10730 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10731 SImode, addr, 0);
10732 }
10733 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10734 write_back ? offset + i * 4 : 0));
10735 return true;
10736 }
10737
10738 /* Called from a peephole2 expander to turn a sequence of stores into an
10739 STM instruction. OPERANDS are the operands found by the peephole matcher;
10740 NOPS indicates how many separate stores we are trying to combine.
10741 Returns true iff we could generate a new instruction. */
10742
10743 bool
10744 gen_stm_seq (rtx *operands, int nops)
10745 {
10746 int i;
10747 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10748 rtx mems[MAX_LDM_STM_OPS];
10749 int base_reg;
10750 rtx base_reg_rtx;
10751 HOST_WIDE_INT offset;
10752 int write_back = FALSE;
10753 int stm_case;
10754 rtx addr;
10755 bool base_reg_dies;
10756
10757 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10758 mem_order, &base_reg, &offset, true);
10759
10760 if (stm_case == 0)
10761 return false;
10762
10763 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10764
10765 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10766 if (TARGET_THUMB1)
10767 {
10768 gcc_assert (base_reg_dies);
10769 write_back = TRUE;
10770 }
10771
10772 if (stm_case == 5)
10773 {
10774 gcc_assert (base_reg_dies);
10775 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10776 offset = 0;
10777 }
10778
10779 addr = plus_constant (Pmode, base_reg_rtx, offset);
10780
10781 for (i = 0; i < nops; i++)
10782 {
10783 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
10784 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10785 SImode, addr, 0);
10786 }
10787 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10788 write_back ? offset + i * 4 : 0));
10789 return true;
10790 }
10791
10792 /* Called from a peephole2 expander to turn a sequence of stores that are
10793 preceded by constant loads into an STM instruction. OPERANDS are the
10794 operands found by the peephole matcher; NOPS indicates how many
10795 separate stores we are trying to combine; there are 2 * NOPS
10796 instructions in the peephole.
10797 Returns true iff we could generate a new instruction. */
10798
10799 bool
10800 gen_const_stm_seq (rtx *operands, int nops)
10801 {
10802 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10803 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10804 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10805 rtx mems[MAX_LDM_STM_OPS];
10806 int base_reg;
10807 rtx base_reg_rtx;
10808 HOST_WIDE_INT offset;
10809 int write_back = FALSE;
10810 int stm_case;
10811 rtx addr;
10812 bool base_reg_dies;
10813 int i, j;
10814 HARD_REG_SET allocated;
10815
10816 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10817 mem_order, &base_reg, &offset, false);
10818
10819 if (stm_case == 0)
10820 return false;
10821
10822 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10823
10824 /* If the same register is used more than once, try to find a free
10825 register. */
10826 CLEAR_HARD_REG_SET (allocated);
10827 for (i = 0; i < nops; i++)
10828 {
10829 for (j = i + 1; j < nops; j++)
10830 if (regs[i] == regs[j])
10831 {
10832 rtx t = peep2_find_free_register (0, nops * 2,
10833 TARGET_THUMB1 ? "l" : "r",
10834 SImode, &allocated);
10835 if (t == NULL_RTX)
10836 return false;
10837 reg_rtxs[i] = t;
10838 regs[i] = REGNO (t);
10839 }
10840 }
10841
10842 /* Compute an ordering that maps the register numbers to an ascending
10843 sequence. */
10844 reg_order[0] = 0;
10845 for (i = 0; i < nops; i++)
10846 if (regs[i] < regs[reg_order[0]])
10847 reg_order[0] = i;
10848
10849 for (i = 1; i < nops; i++)
10850 {
10851 int this_order = reg_order[i - 1];
10852 for (j = 0; j < nops; j++)
10853 if (regs[j] > regs[reg_order[i - 1]]
10854 && (this_order == reg_order[i - 1]
10855 || regs[j] < regs[this_order]))
10856 this_order = j;
10857 reg_order[i] = this_order;
10858 }
10859
10860 /* Ensure that registers that must be live after the instruction end
10861 up with the correct value. */
10862 for (i = 0; i < nops; i++)
10863 {
10864 int this_order = reg_order[i];
10865 if ((this_order != mem_order[i]
10866 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10867 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10868 return false;
10869 }
10870
10871 /* Load the constants. */
10872 for (i = 0; i < nops; i++)
10873 {
10874 rtx op = operands[2 * nops + mem_order[i]];
10875 sorted_regs[i] = regs[reg_order[i]];
10876 emit_move_insn (reg_rtxs[reg_order[i]], op);
10877 }
10878
10879 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10880
10881 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10882 if (TARGET_THUMB1)
10883 {
10884 gcc_assert (base_reg_dies);
10885 write_back = TRUE;
10886 }
10887
10888 if (stm_case == 5)
10889 {
10890 gcc_assert (base_reg_dies);
10891 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10892 offset = 0;
10893 }
10894
10895 addr = plus_constant (Pmode, base_reg_rtx, offset);
10896
10897 for (i = 0; i < nops; i++)
10898 {
10899 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
10900 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10901 SImode, addr, 0);
10902 }
10903 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10904 write_back ? offset + i * 4 : 0));
10905 return true;
10906 }
10907
10908 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
10909 unaligned copies on processors which support unaligned semantics for those
10910 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
10911 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
10912 An interleave factor of 1 (the minimum) will perform no interleaving.
10913 Load/store multiple are used for aligned addresses where possible. */
10914
10915 static void
10916 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
10917 HOST_WIDE_INT length,
10918 unsigned int interleave_factor)
10919 {
10920 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
10921 int *regnos = XALLOCAVEC (int, interleave_factor);
10922 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
10923 HOST_WIDE_INT i, j;
10924 HOST_WIDE_INT remaining = length, words;
10925 rtx halfword_tmp = NULL, byte_tmp = NULL;
10926 rtx dst, src;
10927 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
10928 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
10929 HOST_WIDE_INT srcoffset, dstoffset;
10930 HOST_WIDE_INT src_autoinc, dst_autoinc;
10931 rtx mem, addr;
10932
10933 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
10934
10935 /* Use hard registers if we have aligned source or destination so we can use
10936 load/store multiple with contiguous registers. */
10937 if (dst_aligned || src_aligned)
10938 for (i = 0; i < interleave_factor; i++)
10939 regs[i] = gen_rtx_REG (SImode, i);
10940 else
10941 for (i = 0; i < interleave_factor; i++)
10942 regs[i] = gen_reg_rtx (SImode);
10943
10944 dst = copy_addr_to_reg (XEXP (dstbase, 0));
10945 src = copy_addr_to_reg (XEXP (srcbase, 0));
10946
10947 srcoffset = dstoffset = 0;
10948
10949 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
10950 For copying the last bytes we want to subtract this offset again. */
10951 src_autoinc = dst_autoinc = 0;
10952
10953 for (i = 0; i < interleave_factor; i++)
10954 regnos[i] = i;
10955
10956 /* Copy BLOCK_SIZE_BYTES chunks. */
10957
10958 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
10959 {
10960 /* Load words. */
10961 if (src_aligned && interleave_factor > 1)
10962 {
10963 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
10964 TRUE, srcbase, &srcoffset));
10965 src_autoinc += UNITS_PER_WORD * interleave_factor;
10966 }
10967 else
10968 {
10969 for (j = 0; j < interleave_factor; j++)
10970 {
10971 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
10972 - src_autoinc));
10973 mem = adjust_automodify_address (srcbase, SImode, addr,
10974 srcoffset + j * UNITS_PER_WORD);
10975 emit_insn (gen_unaligned_loadsi (regs[j], mem));
10976 }
10977 srcoffset += block_size_bytes;
10978 }
10979
10980 /* Store words. */
10981 if (dst_aligned && interleave_factor > 1)
10982 {
10983 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
10984 TRUE, dstbase, &dstoffset));
10985 dst_autoinc += UNITS_PER_WORD * interleave_factor;
10986 }
10987 else
10988 {
10989 for (j = 0; j < interleave_factor; j++)
10990 {
10991 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
10992 - dst_autoinc));
10993 mem = adjust_automodify_address (dstbase, SImode, addr,
10994 dstoffset + j * UNITS_PER_WORD);
10995 emit_insn (gen_unaligned_storesi (mem, regs[j]));
10996 }
10997 dstoffset += block_size_bytes;
10998 }
10999
11000 remaining -= block_size_bytes;
11001 }
11002
11003 /* Copy any whole words left (note these aren't interleaved with any
11004 subsequent halfword/byte load/stores in the interests of simplicity). */
11005
11006 words = remaining / UNITS_PER_WORD;
11007
11008 gcc_assert (words < interleave_factor);
11009
11010 if (src_aligned && words > 1)
11011 {
11012 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
11013 &srcoffset));
11014 src_autoinc += UNITS_PER_WORD * words;
11015 }
11016 else
11017 {
11018 for (j = 0; j < words; j++)
11019 {
11020 addr = plus_constant (Pmode, src,
11021 srcoffset + j * UNITS_PER_WORD - src_autoinc);
11022 mem = adjust_automodify_address (srcbase, SImode, addr,
11023 srcoffset + j * UNITS_PER_WORD);
11024 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11025 }
11026 srcoffset += words * UNITS_PER_WORD;
11027 }
11028
11029 if (dst_aligned && words > 1)
11030 {
11031 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
11032 &dstoffset));
11033 dst_autoinc += words * UNITS_PER_WORD;
11034 }
11035 else
11036 {
11037 for (j = 0; j < words; j++)
11038 {
11039 addr = plus_constant (Pmode, dst,
11040 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
11041 mem = adjust_automodify_address (dstbase, SImode, addr,
11042 dstoffset + j * UNITS_PER_WORD);
11043 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11044 }
11045 dstoffset += words * UNITS_PER_WORD;
11046 }
11047
11048 remaining -= words * UNITS_PER_WORD;
11049
11050 gcc_assert (remaining < 4);
11051
11052 /* Copy a halfword if necessary. */
11053
11054 if (remaining >= 2)
11055 {
11056 halfword_tmp = gen_reg_rtx (SImode);
11057
11058 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11059 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
11060 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
11061
11062 /* Either write out immediately, or delay until we've loaded the last
11063 byte, depending on interleave factor. */
11064 if (interleave_factor == 1)
11065 {
11066 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11067 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11068 emit_insn (gen_unaligned_storehi (mem,
11069 gen_lowpart (HImode, halfword_tmp)));
11070 halfword_tmp = NULL;
11071 dstoffset += 2;
11072 }
11073
11074 remaining -= 2;
11075 srcoffset += 2;
11076 }
11077
11078 gcc_assert (remaining < 2);
11079
11080 /* Copy last byte. */
11081
11082 if ((remaining & 1) != 0)
11083 {
11084 byte_tmp = gen_reg_rtx (SImode);
11085
11086 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11087 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
11088 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
11089
11090 if (interleave_factor == 1)
11091 {
11092 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11093 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11094 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11095 byte_tmp = NULL;
11096 dstoffset++;
11097 }
11098
11099 remaining--;
11100 srcoffset++;
11101 }
11102
11103 /* Store last halfword if we haven't done so already. */
11104
11105 if (halfword_tmp)
11106 {
11107 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11108 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11109 emit_insn (gen_unaligned_storehi (mem,
11110 gen_lowpart (HImode, halfword_tmp)));
11111 dstoffset += 2;
11112 }
11113
11114 /* Likewise for last byte. */
11115
11116 if (byte_tmp)
11117 {
11118 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11119 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11120 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11121 dstoffset++;
11122 }
11123
11124 gcc_assert (remaining == 0 && srcoffset == dstoffset);
11125 }
11126
11127 /* From mips_adjust_block_mem:
11128
11129 Helper function for doing a loop-based block operation on memory
11130 reference MEM. Each iteration of the loop will operate on LENGTH
11131 bytes of MEM.
11132
11133 Create a new base register for use within the loop and point it to
11134 the start of MEM. Create a new memory reference that uses this
11135 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
11136
11137 static void
11138 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
11139 rtx *loop_mem)
11140 {
11141 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
11142
11143 /* Although the new mem does not refer to a known location,
11144 it does keep up to LENGTH bytes of alignment. */
11145 *loop_mem = change_address (mem, BLKmode, *loop_reg);
11146 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
11147 }
11148
11149 /* From mips_block_move_loop:
11150
11151 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
11152 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
11153 the memory regions do not overlap. */
11154
11155 static void
11156 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
11157 unsigned int interleave_factor,
11158 HOST_WIDE_INT bytes_per_iter)
11159 {
11160 rtx label, src_reg, dest_reg, final_src, test;
11161 HOST_WIDE_INT leftover;
11162
11163 leftover = length % bytes_per_iter;
11164 length -= leftover;
11165
11166 /* Create registers and memory references for use within the loop. */
11167 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
11168 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
11169
11170 /* Calculate the value that SRC_REG should have after the last iteration of
11171 the loop. */
11172 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
11173 0, 0, OPTAB_WIDEN);
11174
11175 /* Emit the start of the loop. */
11176 label = gen_label_rtx ();
11177 emit_label (label);
11178
11179 /* Emit the loop body. */
11180 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
11181 interleave_factor);
11182
11183 /* Move on to the next block. */
11184 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
11185 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
11186
11187 /* Emit the loop condition. */
11188 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
11189 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
11190
11191 /* Mop up any left-over bytes. */
11192 if (leftover)
11193 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
11194 }
11195
11196 /* Emit a block move when either the source or destination is unaligned (not
11197 aligned to a four-byte boundary). This may need further tuning depending on
11198 core type, optimize_size setting, etc. */
11199
11200 static int
11201 arm_movmemqi_unaligned (rtx *operands)
11202 {
11203 HOST_WIDE_INT length = INTVAL (operands[2]);
11204
11205 if (optimize_size)
11206 {
11207 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
11208 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
11209 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
11210 size of code if optimizing for size. We'll use ldm/stm if src_aligned
11211 or dst_aligned though: allow more interleaving in those cases since the
11212 resulting code can be smaller. */
11213 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
11214 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
11215
11216 if (length > 12)
11217 arm_block_move_unaligned_loop (operands[0], operands[1], length,
11218 interleave_factor, bytes_per_iter);
11219 else
11220 arm_block_move_unaligned_straight (operands[0], operands[1], length,
11221 interleave_factor);
11222 }
11223 else
11224 {
11225 /* Note that the loop created by arm_block_move_unaligned_loop may be
11226 subject to loop unrolling, which makes tuning this condition a little
11227 redundant. */
11228 if (length > 32)
11229 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
11230 else
11231 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
11232 }
11233
11234 return 1;
11235 }
11236
11237 int
11238 arm_gen_movmemqi (rtx *operands)
11239 {
11240 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
11241 HOST_WIDE_INT srcoffset, dstoffset;
11242 int i;
11243 rtx src, dst, srcbase, dstbase;
11244 rtx part_bytes_reg = NULL;
11245 rtx mem;
11246
11247 if (GET_CODE (operands[2]) != CONST_INT
11248 || GET_CODE (operands[3]) != CONST_INT
11249 || INTVAL (operands[2]) > 64)
11250 return 0;
11251
11252 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
11253 return arm_movmemqi_unaligned (operands);
11254
11255 if (INTVAL (operands[3]) & 3)
11256 return 0;
11257
11258 dstbase = operands[0];
11259 srcbase = operands[1];
11260
11261 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
11262 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
11263
11264 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
11265 out_words_to_go = INTVAL (operands[2]) / 4;
11266 last_bytes = INTVAL (operands[2]) & 3;
11267 dstoffset = srcoffset = 0;
11268
11269 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
11270 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
11271
11272 for (i = 0; in_words_to_go >= 2; i+=4)
11273 {
11274 if (in_words_to_go > 4)
11275 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
11276 TRUE, srcbase, &srcoffset));
11277 else
11278 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
11279 src, FALSE, srcbase,
11280 &srcoffset));
11281
11282 if (out_words_to_go)
11283 {
11284 if (out_words_to_go > 4)
11285 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
11286 TRUE, dstbase, &dstoffset));
11287 else if (out_words_to_go != 1)
11288 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
11289 out_words_to_go, dst,
11290 (last_bytes == 0
11291 ? FALSE : TRUE),
11292 dstbase, &dstoffset));
11293 else
11294 {
11295 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11296 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
11297 if (last_bytes != 0)
11298 {
11299 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
11300 dstoffset += 4;
11301 }
11302 }
11303 }
11304
11305 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
11306 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
11307 }
11308
11309 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
11310 if (out_words_to_go)
11311 {
11312 rtx sreg;
11313
11314 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11315 sreg = copy_to_reg (mem);
11316
11317 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11318 emit_move_insn (mem, sreg);
11319 in_words_to_go--;
11320
11321 gcc_assert (!in_words_to_go); /* Sanity check */
11322 }
11323
11324 if (in_words_to_go)
11325 {
11326 gcc_assert (in_words_to_go > 0);
11327
11328 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11329 part_bytes_reg = copy_to_mode_reg (SImode, mem);
11330 }
11331
11332 gcc_assert (!last_bytes || part_bytes_reg);
11333
11334 if (BYTES_BIG_ENDIAN && last_bytes)
11335 {
11336 rtx tmp = gen_reg_rtx (SImode);
11337
11338 /* The bytes we want are in the top end of the word. */
11339 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
11340 GEN_INT (8 * (4 - last_bytes))));
11341 part_bytes_reg = tmp;
11342
11343 while (last_bytes)
11344 {
11345 mem = adjust_automodify_address (dstbase, QImode,
11346 plus_constant (Pmode, dst,
11347 last_bytes - 1),
11348 dstoffset + last_bytes - 1);
11349 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11350
11351 if (--last_bytes)
11352 {
11353 tmp = gen_reg_rtx (SImode);
11354 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
11355 part_bytes_reg = tmp;
11356 }
11357 }
11358
11359 }
11360 else
11361 {
11362 if (last_bytes > 1)
11363 {
11364 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
11365 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
11366 last_bytes -= 2;
11367 if (last_bytes)
11368 {
11369 rtx tmp = gen_reg_rtx (SImode);
11370 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
11371 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
11372 part_bytes_reg = tmp;
11373 dstoffset += 2;
11374 }
11375 }
11376
11377 if (last_bytes)
11378 {
11379 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
11380 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11381 }
11382 }
11383
11384 return 1;
11385 }
11386
11387 /* Select a dominance comparison mode if possible for a test of the general
11388 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
11389 COND_OR == DOM_CC_X_AND_Y => (X && Y)
11390 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
11391 COND_OR == DOM_CC_X_OR_Y => (X || Y)
11392 In all cases OP will be either EQ or NE, but we don't need to know which
11393 here. If we are unable to support a dominance comparison we return
11394 CC mode. This will then fail to match for the RTL expressions that
11395 generate this call. */
11396 enum machine_mode
11397 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
11398 {
11399 enum rtx_code cond1, cond2;
11400 int swapped = 0;
11401
11402 /* Currently we will probably get the wrong result if the individual
11403 comparisons are not simple. This also ensures that it is safe to
11404 reverse a comparison if necessary. */
11405 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
11406 != CCmode)
11407 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
11408 != CCmode))
11409 return CCmode;
11410
11411 /* The if_then_else variant of this tests the second condition if the
11412 first passes, but is true if the first fails. Reverse the first
11413 condition to get a true "inclusive-or" expression. */
11414 if (cond_or == DOM_CC_NX_OR_Y)
11415 cond1 = reverse_condition (cond1);
11416
11417 /* If the comparisons are not equal, and one doesn't dominate the other,
11418 then we can't do this. */
11419 if (cond1 != cond2
11420 && !comparison_dominates_p (cond1, cond2)
11421 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
11422 return CCmode;
11423
11424 if (swapped)
11425 {
11426 enum rtx_code temp = cond1;
11427 cond1 = cond2;
11428 cond2 = temp;
11429 }
11430
11431 switch (cond1)
11432 {
11433 case EQ:
11434 if (cond_or == DOM_CC_X_AND_Y)
11435 return CC_DEQmode;
11436
11437 switch (cond2)
11438 {
11439 case EQ: return CC_DEQmode;
11440 case LE: return CC_DLEmode;
11441 case LEU: return CC_DLEUmode;
11442 case GE: return CC_DGEmode;
11443 case GEU: return CC_DGEUmode;
11444 default: gcc_unreachable ();
11445 }
11446
11447 case LT:
11448 if (cond_or == DOM_CC_X_AND_Y)
11449 return CC_DLTmode;
11450
11451 switch (cond2)
11452 {
11453 case LT:
11454 return CC_DLTmode;
11455 case LE:
11456 return CC_DLEmode;
11457 case NE:
11458 return CC_DNEmode;
11459 default:
11460 gcc_unreachable ();
11461 }
11462
11463 case GT:
11464 if (cond_or == DOM_CC_X_AND_Y)
11465 return CC_DGTmode;
11466
11467 switch (cond2)
11468 {
11469 case GT:
11470 return CC_DGTmode;
11471 case GE:
11472 return CC_DGEmode;
11473 case NE:
11474 return CC_DNEmode;
11475 default:
11476 gcc_unreachable ();
11477 }
11478
11479 case LTU:
11480 if (cond_or == DOM_CC_X_AND_Y)
11481 return CC_DLTUmode;
11482
11483 switch (cond2)
11484 {
11485 case LTU:
11486 return CC_DLTUmode;
11487 case LEU:
11488 return CC_DLEUmode;
11489 case NE:
11490 return CC_DNEmode;
11491 default:
11492 gcc_unreachable ();
11493 }
11494
11495 case GTU:
11496 if (cond_or == DOM_CC_X_AND_Y)
11497 return CC_DGTUmode;
11498
11499 switch (cond2)
11500 {
11501 case GTU:
11502 return CC_DGTUmode;
11503 case GEU:
11504 return CC_DGEUmode;
11505 case NE:
11506 return CC_DNEmode;
11507 default:
11508 gcc_unreachable ();
11509 }
11510
11511 /* The remaining cases only occur when both comparisons are the
11512 same. */
11513 case NE:
11514 gcc_assert (cond1 == cond2);
11515 return CC_DNEmode;
11516
11517 case LE:
11518 gcc_assert (cond1 == cond2);
11519 return CC_DLEmode;
11520
11521 case GE:
11522 gcc_assert (cond1 == cond2);
11523 return CC_DGEmode;
11524
11525 case LEU:
11526 gcc_assert (cond1 == cond2);
11527 return CC_DLEUmode;
11528
11529 case GEU:
11530 gcc_assert (cond1 == cond2);
11531 return CC_DGEUmode;
11532
11533 default:
11534 gcc_unreachable ();
11535 }
11536 }
11537
11538 enum machine_mode
11539 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
11540 {
11541 /* All floating point compares return CCFP if it is an equality
11542 comparison, and CCFPE otherwise. */
11543 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11544 {
11545 switch (op)
11546 {
11547 case EQ:
11548 case NE:
11549 case UNORDERED:
11550 case ORDERED:
11551 case UNLT:
11552 case UNLE:
11553 case UNGT:
11554 case UNGE:
11555 case UNEQ:
11556 case LTGT:
11557 return CCFPmode;
11558
11559 case LT:
11560 case LE:
11561 case GT:
11562 case GE:
11563 return CCFPEmode;
11564
11565 default:
11566 gcc_unreachable ();
11567 }
11568 }
11569
11570 /* A compare with a shifted operand. Because of canonicalization, the
11571 comparison will have to be swapped when we emit the assembler. */
11572 if (GET_MODE (y) == SImode
11573 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11574 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11575 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
11576 || GET_CODE (x) == ROTATERT))
11577 return CC_SWPmode;
11578
11579 /* This operation is performed swapped, but since we only rely on the Z
11580 flag we don't need an additional mode. */
11581 if (GET_MODE (y) == SImode
11582 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11583 && GET_CODE (x) == NEG
11584 && (op == EQ || op == NE))
11585 return CC_Zmode;
11586
11587 /* This is a special case that is used by combine to allow a
11588 comparison of a shifted byte load to be split into a zero-extend
11589 followed by a comparison of the shifted integer (only valid for
11590 equalities and unsigned inequalities). */
11591 if (GET_MODE (x) == SImode
11592 && GET_CODE (x) == ASHIFT
11593 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
11594 && GET_CODE (XEXP (x, 0)) == SUBREG
11595 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
11596 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
11597 && (op == EQ || op == NE
11598 || op == GEU || op == GTU || op == LTU || op == LEU)
11599 && GET_CODE (y) == CONST_INT)
11600 return CC_Zmode;
11601
11602 /* A construct for a conditional compare, if the false arm contains
11603 0, then both conditions must be true, otherwise either condition
11604 must be true. Not all conditions are possible, so CCmode is
11605 returned if it can't be done. */
11606 if (GET_CODE (x) == IF_THEN_ELSE
11607 && (XEXP (x, 2) == const0_rtx
11608 || XEXP (x, 2) == const1_rtx)
11609 && COMPARISON_P (XEXP (x, 0))
11610 && COMPARISON_P (XEXP (x, 1)))
11611 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11612 INTVAL (XEXP (x, 2)));
11613
11614 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
11615 if (GET_CODE (x) == AND
11616 && (op == EQ || op == NE)
11617 && COMPARISON_P (XEXP (x, 0))
11618 && COMPARISON_P (XEXP (x, 1)))
11619 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11620 DOM_CC_X_AND_Y);
11621
11622 if (GET_CODE (x) == IOR
11623 && (op == EQ || op == NE)
11624 && COMPARISON_P (XEXP (x, 0))
11625 && COMPARISON_P (XEXP (x, 1)))
11626 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11627 DOM_CC_X_OR_Y);
11628
11629 /* An operation (on Thumb) where we want to test for a single bit.
11630 This is done by shifting that bit up into the top bit of a
11631 scratch register; we can then branch on the sign bit. */
11632 if (TARGET_THUMB1
11633 && GET_MODE (x) == SImode
11634 && (op == EQ || op == NE)
11635 && GET_CODE (x) == ZERO_EXTRACT
11636 && XEXP (x, 1) == const1_rtx)
11637 return CC_Nmode;
11638
11639 /* An operation that sets the condition codes as a side-effect, the
11640 V flag is not set correctly, so we can only use comparisons where
11641 this doesn't matter. (For LT and GE we can use "mi" and "pl"
11642 instead.) */
11643 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
11644 if (GET_MODE (x) == SImode
11645 && y == const0_rtx
11646 && (op == EQ || op == NE || op == LT || op == GE)
11647 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
11648 || GET_CODE (x) == AND || GET_CODE (x) == IOR
11649 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
11650 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
11651 || GET_CODE (x) == LSHIFTRT
11652 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11653 || GET_CODE (x) == ROTATERT
11654 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
11655 return CC_NOOVmode;
11656
11657 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
11658 return CC_Zmode;
11659
11660 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
11661 && GET_CODE (x) == PLUS
11662 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
11663 return CC_Cmode;
11664
11665 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
11666 {
11667 switch (op)
11668 {
11669 case EQ:
11670 case NE:
11671 /* A DImode comparison against zero can be implemented by
11672 or'ing the two halves together. */
11673 if (y == const0_rtx)
11674 return CC_Zmode;
11675
11676 /* We can do an equality test in three Thumb instructions. */
11677 if (!TARGET_32BIT)
11678 return CC_Zmode;
11679
11680 /* FALLTHROUGH */
11681
11682 case LTU:
11683 case LEU:
11684 case GTU:
11685 case GEU:
11686 /* DImode unsigned comparisons can be implemented by cmp +
11687 cmpeq without a scratch register. Not worth doing in
11688 Thumb-2. */
11689 if (TARGET_32BIT)
11690 return CC_CZmode;
11691
11692 /* FALLTHROUGH */
11693
11694 case LT:
11695 case LE:
11696 case GT:
11697 case GE:
11698 /* DImode signed and unsigned comparisons can be implemented
11699 by cmp + sbcs with a scratch register, but that does not
11700 set the Z flag - we must reverse GT/LE/GTU/LEU. */
11701 gcc_assert (op != EQ && op != NE);
11702 return CC_NCVmode;
11703
11704 default:
11705 gcc_unreachable ();
11706 }
11707 }
11708
11709 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
11710 return GET_MODE (x);
11711
11712 return CCmode;
11713 }
11714
11715 /* X and Y are two things to compare using CODE. Emit the compare insn and
11716 return the rtx for register 0 in the proper mode. FP means this is a
11717 floating point compare: I don't think that it is needed on the arm. */
11718 rtx
11719 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
11720 {
11721 enum machine_mode mode;
11722 rtx cc_reg;
11723 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
11724
11725 /* We might have X as a constant, Y as a register because of the predicates
11726 used for cmpdi. If so, force X to a register here. */
11727 if (dimode_comparison && !REG_P (x))
11728 x = force_reg (DImode, x);
11729
11730 mode = SELECT_CC_MODE (code, x, y);
11731 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
11732
11733 if (dimode_comparison
11734 && mode != CC_CZmode)
11735 {
11736 rtx clobber, set;
11737
11738 /* To compare two non-zero values for equality, XOR them and
11739 then compare against zero. Not used for ARM mode; there
11740 CC_CZmode is cheaper. */
11741 if (mode == CC_Zmode && y != const0_rtx)
11742 {
11743 gcc_assert (!reload_completed);
11744 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
11745 y = const0_rtx;
11746 }
11747
11748 /* A scratch register is required. */
11749 if (reload_completed)
11750 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
11751 else
11752 scratch = gen_rtx_SCRATCH (SImode);
11753
11754 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
11755 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
11756 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11757 }
11758 else
11759 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
11760
11761 return cc_reg;
11762 }
11763
11764 /* Generate a sequence of insns that will generate the correct return
11765 address mask depending on the physical architecture that the program
11766 is running on. */
11767 rtx
11768 arm_gen_return_addr_mask (void)
11769 {
11770 rtx reg = gen_reg_rtx (Pmode);
11771
11772 emit_insn (gen_return_addr_mask (reg));
11773 return reg;
11774 }
11775
11776 void
11777 arm_reload_in_hi (rtx *operands)
11778 {
11779 rtx ref = operands[1];
11780 rtx base, scratch;
11781 HOST_WIDE_INT offset = 0;
11782
11783 if (GET_CODE (ref) == SUBREG)
11784 {
11785 offset = SUBREG_BYTE (ref);
11786 ref = SUBREG_REG (ref);
11787 }
11788
11789 if (GET_CODE (ref) == REG)
11790 {
11791 /* We have a pseudo which has been spilt onto the stack; there
11792 are two cases here: the first where there is a simple
11793 stack-slot replacement and a second where the stack-slot is
11794 out of range, or is used as a subreg. */
11795 if (reg_equiv_mem (REGNO (ref)))
11796 {
11797 ref = reg_equiv_mem (REGNO (ref));
11798 base = find_replacement (&XEXP (ref, 0));
11799 }
11800 else
11801 /* The slot is out of range, or was dressed up in a SUBREG. */
11802 base = reg_equiv_address (REGNO (ref));
11803 }
11804 else
11805 base = find_replacement (&XEXP (ref, 0));
11806
11807 /* Handle the case where the address is too complex to be offset by 1. */
11808 if (GET_CODE (base) == MINUS
11809 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11810 {
11811 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11812
11813 emit_set_insn (base_plus, base);
11814 base = base_plus;
11815 }
11816 else if (GET_CODE (base) == PLUS)
11817 {
11818 /* The addend must be CONST_INT, or we would have dealt with it above. */
11819 HOST_WIDE_INT hi, lo;
11820
11821 offset += INTVAL (XEXP (base, 1));
11822 base = XEXP (base, 0);
11823
11824 /* Rework the address into a legal sequence of insns. */
11825 /* Valid range for lo is -4095 -> 4095 */
11826 lo = (offset >= 0
11827 ? (offset & 0xfff)
11828 : -((-offset) & 0xfff));
11829
11830 /* Corner case, if lo is the max offset then we would be out of range
11831 once we have added the additional 1 below, so bump the msb into the
11832 pre-loading insn(s). */
11833 if (lo == 4095)
11834 lo &= 0x7ff;
11835
11836 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11837 ^ (HOST_WIDE_INT) 0x80000000)
11838 - (HOST_WIDE_INT) 0x80000000);
11839
11840 gcc_assert (hi + lo == offset);
11841
11842 if (hi != 0)
11843 {
11844 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11845
11846 /* Get the base address; addsi3 knows how to handle constants
11847 that require more than one insn. */
11848 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11849 base = base_plus;
11850 offset = lo;
11851 }
11852 }
11853
11854 /* Operands[2] may overlap operands[0] (though it won't overlap
11855 operands[1]), that's why we asked for a DImode reg -- so we can
11856 use the bit that does not overlap. */
11857 if (REGNO (operands[2]) == REGNO (operands[0]))
11858 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11859 else
11860 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11861
11862 emit_insn (gen_zero_extendqisi2 (scratch,
11863 gen_rtx_MEM (QImode,
11864 plus_constant (Pmode, base,
11865 offset))));
11866 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
11867 gen_rtx_MEM (QImode,
11868 plus_constant (Pmode, base,
11869 offset + 1))));
11870 if (!BYTES_BIG_ENDIAN)
11871 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11872 gen_rtx_IOR (SImode,
11873 gen_rtx_ASHIFT
11874 (SImode,
11875 gen_rtx_SUBREG (SImode, operands[0], 0),
11876 GEN_INT (8)),
11877 scratch));
11878 else
11879 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11880 gen_rtx_IOR (SImode,
11881 gen_rtx_ASHIFT (SImode, scratch,
11882 GEN_INT (8)),
11883 gen_rtx_SUBREG (SImode, operands[0], 0)));
11884 }
11885
11886 /* Handle storing a half-word to memory during reload by synthesizing as two
11887 byte stores. Take care not to clobber the input values until after we
11888 have moved them somewhere safe. This code assumes that if the DImode
11889 scratch in operands[2] overlaps either the input value or output address
11890 in some way, then that value must die in this insn (we absolutely need
11891 two scratch registers for some corner cases). */
11892 void
11893 arm_reload_out_hi (rtx *operands)
11894 {
11895 rtx ref = operands[0];
11896 rtx outval = operands[1];
11897 rtx base, scratch;
11898 HOST_WIDE_INT offset = 0;
11899
11900 if (GET_CODE (ref) == SUBREG)
11901 {
11902 offset = SUBREG_BYTE (ref);
11903 ref = SUBREG_REG (ref);
11904 }
11905
11906 if (GET_CODE (ref) == REG)
11907 {
11908 /* We have a pseudo which has been spilt onto the stack; there
11909 are two cases here: the first where there is a simple
11910 stack-slot replacement and a second where the stack-slot is
11911 out of range, or is used as a subreg. */
11912 if (reg_equiv_mem (REGNO (ref)))
11913 {
11914 ref = reg_equiv_mem (REGNO (ref));
11915 base = find_replacement (&XEXP (ref, 0));
11916 }
11917 else
11918 /* The slot is out of range, or was dressed up in a SUBREG. */
11919 base = reg_equiv_address (REGNO (ref));
11920 }
11921 else
11922 base = find_replacement (&XEXP (ref, 0));
11923
11924 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11925
11926 /* Handle the case where the address is too complex to be offset by 1. */
11927 if (GET_CODE (base) == MINUS
11928 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11929 {
11930 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11931
11932 /* Be careful not to destroy OUTVAL. */
11933 if (reg_overlap_mentioned_p (base_plus, outval))
11934 {
11935 /* Updating base_plus might destroy outval, see if we can
11936 swap the scratch and base_plus. */
11937 if (!reg_overlap_mentioned_p (scratch, outval))
11938 {
11939 rtx tmp = scratch;
11940 scratch = base_plus;
11941 base_plus = tmp;
11942 }
11943 else
11944 {
11945 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11946
11947 /* Be conservative and copy OUTVAL into the scratch now,
11948 this should only be necessary if outval is a subreg
11949 of something larger than a word. */
11950 /* XXX Might this clobber base? I can't see how it can,
11951 since scratch is known to overlap with OUTVAL, and
11952 must be wider than a word. */
11953 emit_insn (gen_movhi (scratch_hi, outval));
11954 outval = scratch_hi;
11955 }
11956 }
11957
11958 emit_set_insn (base_plus, base);
11959 base = base_plus;
11960 }
11961 else if (GET_CODE (base) == PLUS)
11962 {
11963 /* The addend must be CONST_INT, or we would have dealt with it above. */
11964 HOST_WIDE_INT hi, lo;
11965
11966 offset += INTVAL (XEXP (base, 1));
11967 base = XEXP (base, 0);
11968
11969 /* Rework the address into a legal sequence of insns. */
11970 /* Valid range for lo is -4095 -> 4095 */
11971 lo = (offset >= 0
11972 ? (offset & 0xfff)
11973 : -((-offset) & 0xfff));
11974
11975 /* Corner case, if lo is the max offset then we would be out of range
11976 once we have added the additional 1 below, so bump the msb into the
11977 pre-loading insn(s). */
11978 if (lo == 4095)
11979 lo &= 0x7ff;
11980
11981 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11982 ^ (HOST_WIDE_INT) 0x80000000)
11983 - (HOST_WIDE_INT) 0x80000000);
11984
11985 gcc_assert (hi + lo == offset);
11986
11987 if (hi != 0)
11988 {
11989 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11990
11991 /* Be careful not to destroy OUTVAL. */
11992 if (reg_overlap_mentioned_p (base_plus, outval))
11993 {
11994 /* Updating base_plus might destroy outval, see if we
11995 can swap the scratch and base_plus. */
11996 if (!reg_overlap_mentioned_p (scratch, outval))
11997 {
11998 rtx tmp = scratch;
11999 scratch = base_plus;
12000 base_plus = tmp;
12001 }
12002 else
12003 {
12004 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12005
12006 /* Be conservative and copy outval into scratch now,
12007 this should only be necessary if outval is a
12008 subreg of something larger than a word. */
12009 /* XXX Might this clobber base? I can't see how it
12010 can, since scratch is known to overlap with
12011 outval. */
12012 emit_insn (gen_movhi (scratch_hi, outval));
12013 outval = scratch_hi;
12014 }
12015 }
12016
12017 /* Get the base address; addsi3 knows how to handle constants
12018 that require more than one insn. */
12019 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
12020 base = base_plus;
12021 offset = lo;
12022 }
12023 }
12024
12025 if (BYTES_BIG_ENDIAN)
12026 {
12027 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12028 plus_constant (Pmode, base,
12029 offset + 1)),
12030 gen_lowpart (QImode, outval)));
12031 emit_insn (gen_lshrsi3 (scratch,
12032 gen_rtx_SUBREG (SImode, outval, 0),
12033 GEN_INT (8)));
12034 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
12035 offset)),
12036 gen_lowpart (QImode, scratch)));
12037 }
12038 else
12039 {
12040 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
12041 offset)),
12042 gen_lowpart (QImode, outval)));
12043 emit_insn (gen_lshrsi3 (scratch,
12044 gen_rtx_SUBREG (SImode, outval, 0),
12045 GEN_INT (8)));
12046 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12047 plus_constant (Pmode, base,
12048 offset + 1)),
12049 gen_lowpart (QImode, scratch)));
12050 }
12051 }
12052
12053 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
12054 (padded to the size of a word) should be passed in a register. */
12055
12056 static bool
12057 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
12058 {
12059 if (TARGET_AAPCS_BASED)
12060 return must_pass_in_stack_var_size (mode, type);
12061 else
12062 return must_pass_in_stack_var_size_or_pad (mode, type);
12063 }
12064
12065
12066 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
12067 Return true if an argument passed on the stack should be padded upwards,
12068 i.e. if the least-significant byte has useful data.
12069 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
12070 aggregate types are placed in the lowest memory address. */
12071
12072 bool
12073 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
12074 {
12075 if (!TARGET_AAPCS_BASED)
12076 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
12077
12078 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
12079 return false;
12080
12081 return true;
12082 }
12083
12084
12085 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
12086 Return !BYTES_BIG_ENDIAN if the least significant byte of the
12087 register has useful data, and return the opposite if the most
12088 significant byte does. */
12089
12090 bool
12091 arm_pad_reg_upward (enum machine_mode mode,
12092 tree type, int first ATTRIBUTE_UNUSED)
12093 {
12094 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
12095 {
12096 /* For AAPCS, small aggregates, small fixed-point types,
12097 and small complex types are always padded upwards. */
12098 if (type)
12099 {
12100 if ((AGGREGATE_TYPE_P (type)
12101 || TREE_CODE (type) == COMPLEX_TYPE
12102 || FIXED_POINT_TYPE_P (type))
12103 && int_size_in_bytes (type) <= 4)
12104 return true;
12105 }
12106 else
12107 {
12108 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
12109 && GET_MODE_SIZE (mode) <= 4)
12110 return true;
12111 }
12112 }
12113
12114 /* Otherwise, use default padding. */
12115 return !BYTES_BIG_ENDIAN;
12116 }
12117
12118 \f
12119 /* Print a symbolic form of X to the debug file, F. */
12120 static void
12121 arm_print_value (FILE *f, rtx x)
12122 {
12123 switch (GET_CODE (x))
12124 {
12125 case CONST_INT:
12126 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
12127 return;
12128
12129 case CONST_DOUBLE:
12130 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
12131 return;
12132
12133 case CONST_VECTOR:
12134 {
12135 int i;
12136
12137 fprintf (f, "<");
12138 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
12139 {
12140 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
12141 if (i < (CONST_VECTOR_NUNITS (x) - 1))
12142 fputc (',', f);
12143 }
12144 fprintf (f, ">");
12145 }
12146 return;
12147
12148 case CONST_STRING:
12149 fprintf (f, "\"%s\"", XSTR (x, 0));
12150 return;
12151
12152 case SYMBOL_REF:
12153 fprintf (f, "`%s'", XSTR (x, 0));
12154 return;
12155
12156 case LABEL_REF:
12157 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
12158 return;
12159
12160 case CONST:
12161 arm_print_value (f, XEXP (x, 0));
12162 return;
12163
12164 case PLUS:
12165 arm_print_value (f, XEXP (x, 0));
12166 fprintf (f, "+");
12167 arm_print_value (f, XEXP (x, 1));
12168 return;
12169
12170 case PC:
12171 fprintf (f, "pc");
12172 return;
12173
12174 default:
12175 fprintf (f, "????");
12176 return;
12177 }
12178 }
12179 \f
12180 /* Routines for manipulation of the constant pool. */
12181
12182 /* Arm instructions cannot load a large constant directly into a
12183 register; they have to come from a pc relative load. The constant
12184 must therefore be placed in the addressable range of the pc
12185 relative load. Depending on the precise pc relative load
12186 instruction the range is somewhere between 256 bytes and 4k. This
12187 means that we often have to dump a constant inside a function, and
12188 generate code to branch around it.
12189
12190 It is important to minimize this, since the branches will slow
12191 things down and make the code larger.
12192
12193 Normally we can hide the table after an existing unconditional
12194 branch so that there is no interruption of the flow, but in the
12195 worst case the code looks like this:
12196
12197 ldr rn, L1
12198 ...
12199 b L2
12200 align
12201 L1: .long value
12202 L2:
12203 ...
12204
12205 ldr rn, L3
12206 ...
12207 b L4
12208 align
12209 L3: .long value
12210 L4:
12211 ...
12212
12213 We fix this by performing a scan after scheduling, which notices
12214 which instructions need to have their operands fetched from the
12215 constant table and builds the table.
12216
12217 The algorithm starts by building a table of all the constants that
12218 need fixing up and all the natural barriers in the function (places
12219 where a constant table can be dropped without breaking the flow).
12220 For each fixup we note how far the pc-relative replacement will be
12221 able to reach and the offset of the instruction into the function.
12222
12223 Having built the table we then group the fixes together to form
12224 tables that are as large as possible (subject to addressing
12225 constraints) and emit each table of constants after the last
12226 barrier that is within range of all the instructions in the group.
12227 If a group does not contain a barrier, then we forcibly create one
12228 by inserting a jump instruction into the flow. Once the table has
12229 been inserted, the insns are then modified to reference the
12230 relevant entry in the pool.
12231
12232 Possible enhancements to the algorithm (not implemented) are:
12233
12234 1) For some processors and object formats, there may be benefit in
12235 aligning the pools to the start of cache lines; this alignment
12236 would need to be taken into account when calculating addressability
12237 of a pool. */
12238
12239 /* These typedefs are located at the start of this file, so that
12240 they can be used in the prototypes there. This comment is to
12241 remind readers of that fact so that the following structures
12242 can be understood more easily.
12243
12244 typedef struct minipool_node Mnode;
12245 typedef struct minipool_fixup Mfix; */
12246
12247 struct minipool_node
12248 {
12249 /* Doubly linked chain of entries. */
12250 Mnode * next;
12251 Mnode * prev;
12252 /* The maximum offset into the code that this entry can be placed. While
12253 pushing fixes for forward references, all entries are sorted in order
12254 of increasing max_address. */
12255 HOST_WIDE_INT max_address;
12256 /* Similarly for an entry inserted for a backwards ref. */
12257 HOST_WIDE_INT min_address;
12258 /* The number of fixes referencing this entry. This can become zero
12259 if we "unpush" an entry. In this case we ignore the entry when we
12260 come to emit the code. */
12261 int refcount;
12262 /* The offset from the start of the minipool. */
12263 HOST_WIDE_INT offset;
12264 /* The value in table. */
12265 rtx value;
12266 /* The mode of value. */
12267 enum machine_mode mode;
12268 /* The size of the value. With iWMMXt enabled
12269 sizes > 4 also imply an alignment of 8-bytes. */
12270 int fix_size;
12271 };
12272
12273 struct minipool_fixup
12274 {
12275 Mfix * next;
12276 rtx insn;
12277 HOST_WIDE_INT address;
12278 rtx * loc;
12279 enum machine_mode mode;
12280 int fix_size;
12281 rtx value;
12282 Mnode * minipool;
12283 HOST_WIDE_INT forwards;
12284 HOST_WIDE_INT backwards;
12285 };
12286
12287 /* Fixes less than a word need padding out to a word boundary. */
12288 #define MINIPOOL_FIX_SIZE(mode) \
12289 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
12290
12291 static Mnode * minipool_vector_head;
12292 static Mnode * minipool_vector_tail;
12293 static rtx minipool_vector_label;
12294 static int minipool_pad;
12295
12296 /* The linked list of all minipool fixes required for this function. */
12297 Mfix * minipool_fix_head;
12298 Mfix * minipool_fix_tail;
12299 /* The fix entry for the current minipool, once it has been placed. */
12300 Mfix * minipool_barrier;
12301
12302 /* Determines if INSN is the start of a jump table. Returns the end
12303 of the TABLE or NULL_RTX. */
12304 static rtx
12305 is_jump_table (rtx insn)
12306 {
12307 rtx table;
12308
12309 if (jump_to_label_p (insn)
12310 && ((table = next_real_insn (JUMP_LABEL (insn)))
12311 == next_real_insn (insn))
12312 && table != NULL
12313 && GET_CODE (table) == JUMP_INSN
12314 && (GET_CODE (PATTERN (table)) == ADDR_VEC
12315 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
12316 return table;
12317
12318 return NULL_RTX;
12319 }
12320
12321 #ifndef JUMP_TABLES_IN_TEXT_SECTION
12322 #define JUMP_TABLES_IN_TEXT_SECTION 0
12323 #endif
12324
12325 static HOST_WIDE_INT
12326 get_jump_table_size (rtx insn)
12327 {
12328 /* ADDR_VECs only take room if read-only data does into the text
12329 section. */
12330 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
12331 {
12332 rtx body = PATTERN (insn);
12333 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
12334 HOST_WIDE_INT size;
12335 HOST_WIDE_INT modesize;
12336
12337 modesize = GET_MODE_SIZE (GET_MODE (body));
12338 size = modesize * XVECLEN (body, elt);
12339 switch (modesize)
12340 {
12341 case 1:
12342 /* Round up size of TBB table to a halfword boundary. */
12343 size = (size + 1) & ~(HOST_WIDE_INT)1;
12344 break;
12345 case 2:
12346 /* No padding necessary for TBH. */
12347 break;
12348 case 4:
12349 /* Add two bytes for alignment on Thumb. */
12350 if (TARGET_THUMB)
12351 size += 2;
12352 break;
12353 default:
12354 gcc_unreachable ();
12355 }
12356 return size;
12357 }
12358
12359 return 0;
12360 }
12361
12362 /* Return the maximum amount of padding that will be inserted before
12363 label LABEL. */
12364
12365 static HOST_WIDE_INT
12366 get_label_padding (rtx label)
12367 {
12368 HOST_WIDE_INT align, min_insn_size;
12369
12370 align = 1 << label_to_alignment (label);
12371 min_insn_size = TARGET_THUMB ? 2 : 4;
12372 return align > min_insn_size ? align - min_insn_size : 0;
12373 }
12374
12375 /* Move a minipool fix MP from its current location to before MAX_MP.
12376 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
12377 constraints may need updating. */
12378 static Mnode *
12379 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
12380 HOST_WIDE_INT max_address)
12381 {
12382 /* The code below assumes these are different. */
12383 gcc_assert (mp != max_mp);
12384
12385 if (max_mp == NULL)
12386 {
12387 if (max_address < mp->max_address)
12388 mp->max_address = max_address;
12389 }
12390 else
12391 {
12392 if (max_address > max_mp->max_address - mp->fix_size)
12393 mp->max_address = max_mp->max_address - mp->fix_size;
12394 else
12395 mp->max_address = max_address;
12396
12397 /* Unlink MP from its current position. Since max_mp is non-null,
12398 mp->prev must be non-null. */
12399 mp->prev->next = mp->next;
12400 if (mp->next != NULL)
12401 mp->next->prev = mp->prev;
12402 else
12403 minipool_vector_tail = mp->prev;
12404
12405 /* Re-insert it before MAX_MP. */
12406 mp->next = max_mp;
12407 mp->prev = max_mp->prev;
12408 max_mp->prev = mp;
12409
12410 if (mp->prev != NULL)
12411 mp->prev->next = mp;
12412 else
12413 minipool_vector_head = mp;
12414 }
12415
12416 /* Save the new entry. */
12417 max_mp = mp;
12418
12419 /* Scan over the preceding entries and adjust their addresses as
12420 required. */
12421 while (mp->prev != NULL
12422 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12423 {
12424 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12425 mp = mp->prev;
12426 }
12427
12428 return max_mp;
12429 }
12430
12431 /* Add a constant to the minipool for a forward reference. Returns the
12432 node added or NULL if the constant will not fit in this pool. */
12433 static Mnode *
12434 add_minipool_forward_ref (Mfix *fix)
12435 {
12436 /* If set, max_mp is the first pool_entry that has a lower
12437 constraint than the one we are trying to add. */
12438 Mnode * max_mp = NULL;
12439 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
12440 Mnode * mp;
12441
12442 /* If the minipool starts before the end of FIX->INSN then this FIX
12443 can not be placed into the current pool. Furthermore, adding the
12444 new constant pool entry may cause the pool to start FIX_SIZE bytes
12445 earlier. */
12446 if (minipool_vector_head &&
12447 (fix->address + get_attr_length (fix->insn)
12448 >= minipool_vector_head->max_address - fix->fix_size))
12449 return NULL;
12450
12451 /* Scan the pool to see if a constant with the same value has
12452 already been added. While we are doing this, also note the
12453 location where we must insert the constant if it doesn't already
12454 exist. */
12455 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12456 {
12457 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12458 && fix->mode == mp->mode
12459 && (GET_CODE (fix->value) != CODE_LABEL
12460 || (CODE_LABEL_NUMBER (fix->value)
12461 == CODE_LABEL_NUMBER (mp->value)))
12462 && rtx_equal_p (fix->value, mp->value))
12463 {
12464 /* More than one fix references this entry. */
12465 mp->refcount++;
12466 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
12467 }
12468
12469 /* Note the insertion point if necessary. */
12470 if (max_mp == NULL
12471 && mp->max_address > max_address)
12472 max_mp = mp;
12473
12474 /* If we are inserting an 8-bytes aligned quantity and
12475 we have not already found an insertion point, then
12476 make sure that all such 8-byte aligned quantities are
12477 placed at the start of the pool. */
12478 if (ARM_DOUBLEWORD_ALIGN
12479 && max_mp == NULL
12480 && fix->fix_size >= 8
12481 && mp->fix_size < 8)
12482 {
12483 max_mp = mp;
12484 max_address = mp->max_address;
12485 }
12486 }
12487
12488 /* The value is not currently in the minipool, so we need to create
12489 a new entry for it. If MAX_MP is NULL, the entry will be put on
12490 the end of the list since the placement is less constrained than
12491 any existing entry. Otherwise, we insert the new fix before
12492 MAX_MP and, if necessary, adjust the constraints on the other
12493 entries. */
12494 mp = XNEW (Mnode);
12495 mp->fix_size = fix->fix_size;
12496 mp->mode = fix->mode;
12497 mp->value = fix->value;
12498 mp->refcount = 1;
12499 /* Not yet required for a backwards ref. */
12500 mp->min_address = -65536;
12501
12502 if (max_mp == NULL)
12503 {
12504 mp->max_address = max_address;
12505 mp->next = NULL;
12506 mp->prev = minipool_vector_tail;
12507
12508 if (mp->prev == NULL)
12509 {
12510 minipool_vector_head = mp;
12511 minipool_vector_label = gen_label_rtx ();
12512 }
12513 else
12514 mp->prev->next = mp;
12515
12516 minipool_vector_tail = mp;
12517 }
12518 else
12519 {
12520 if (max_address > max_mp->max_address - mp->fix_size)
12521 mp->max_address = max_mp->max_address - mp->fix_size;
12522 else
12523 mp->max_address = max_address;
12524
12525 mp->next = max_mp;
12526 mp->prev = max_mp->prev;
12527 max_mp->prev = mp;
12528 if (mp->prev != NULL)
12529 mp->prev->next = mp;
12530 else
12531 minipool_vector_head = mp;
12532 }
12533
12534 /* Save the new entry. */
12535 max_mp = mp;
12536
12537 /* Scan over the preceding entries and adjust their addresses as
12538 required. */
12539 while (mp->prev != NULL
12540 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12541 {
12542 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12543 mp = mp->prev;
12544 }
12545
12546 return max_mp;
12547 }
12548
12549 static Mnode *
12550 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
12551 HOST_WIDE_INT min_address)
12552 {
12553 HOST_WIDE_INT offset;
12554
12555 /* The code below assumes these are different. */
12556 gcc_assert (mp != min_mp);
12557
12558 if (min_mp == NULL)
12559 {
12560 if (min_address > mp->min_address)
12561 mp->min_address = min_address;
12562 }
12563 else
12564 {
12565 /* We will adjust this below if it is too loose. */
12566 mp->min_address = min_address;
12567
12568 /* Unlink MP from its current position. Since min_mp is non-null,
12569 mp->next must be non-null. */
12570 mp->next->prev = mp->prev;
12571 if (mp->prev != NULL)
12572 mp->prev->next = mp->next;
12573 else
12574 minipool_vector_head = mp->next;
12575
12576 /* Reinsert it after MIN_MP. */
12577 mp->prev = min_mp;
12578 mp->next = min_mp->next;
12579 min_mp->next = mp;
12580 if (mp->next != NULL)
12581 mp->next->prev = mp;
12582 else
12583 minipool_vector_tail = mp;
12584 }
12585
12586 min_mp = mp;
12587
12588 offset = 0;
12589 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12590 {
12591 mp->offset = offset;
12592 if (mp->refcount > 0)
12593 offset += mp->fix_size;
12594
12595 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
12596 mp->next->min_address = mp->min_address + mp->fix_size;
12597 }
12598
12599 return min_mp;
12600 }
12601
12602 /* Add a constant to the minipool for a backward reference. Returns the
12603 node added or NULL if the constant will not fit in this pool.
12604
12605 Note that the code for insertion for a backwards reference can be
12606 somewhat confusing because the calculated offsets for each fix do
12607 not take into account the size of the pool (which is still under
12608 construction. */
12609 static Mnode *
12610 add_minipool_backward_ref (Mfix *fix)
12611 {
12612 /* If set, min_mp is the last pool_entry that has a lower constraint
12613 than the one we are trying to add. */
12614 Mnode *min_mp = NULL;
12615 /* This can be negative, since it is only a constraint. */
12616 HOST_WIDE_INT min_address = fix->address - fix->backwards;
12617 Mnode *mp;
12618
12619 /* If we can't reach the current pool from this insn, or if we can't
12620 insert this entry at the end of the pool without pushing other
12621 fixes out of range, then we don't try. This ensures that we
12622 can't fail later on. */
12623 if (min_address >= minipool_barrier->address
12624 || (minipool_vector_tail->min_address + fix->fix_size
12625 >= minipool_barrier->address))
12626 return NULL;
12627
12628 /* Scan the pool to see if a constant with the same value has
12629 already been added. While we are doing this, also note the
12630 location where we must insert the constant if it doesn't already
12631 exist. */
12632 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
12633 {
12634 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12635 && fix->mode == mp->mode
12636 && (GET_CODE (fix->value) != CODE_LABEL
12637 || (CODE_LABEL_NUMBER (fix->value)
12638 == CODE_LABEL_NUMBER (mp->value)))
12639 && rtx_equal_p (fix->value, mp->value)
12640 /* Check that there is enough slack to move this entry to the
12641 end of the table (this is conservative). */
12642 && (mp->max_address
12643 > (minipool_barrier->address
12644 + minipool_vector_tail->offset
12645 + minipool_vector_tail->fix_size)))
12646 {
12647 mp->refcount++;
12648 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
12649 }
12650
12651 if (min_mp != NULL)
12652 mp->min_address += fix->fix_size;
12653 else
12654 {
12655 /* Note the insertion point if necessary. */
12656 if (mp->min_address < min_address)
12657 {
12658 /* For now, we do not allow the insertion of 8-byte alignment
12659 requiring nodes anywhere but at the start of the pool. */
12660 if (ARM_DOUBLEWORD_ALIGN
12661 && fix->fix_size >= 8 && mp->fix_size < 8)
12662 return NULL;
12663 else
12664 min_mp = mp;
12665 }
12666 else if (mp->max_address
12667 < minipool_barrier->address + mp->offset + fix->fix_size)
12668 {
12669 /* Inserting before this entry would push the fix beyond
12670 its maximum address (which can happen if we have
12671 re-located a forwards fix); force the new fix to come
12672 after it. */
12673 if (ARM_DOUBLEWORD_ALIGN
12674 && fix->fix_size >= 8 && mp->fix_size < 8)
12675 return NULL;
12676 else
12677 {
12678 min_mp = mp;
12679 min_address = mp->min_address + fix->fix_size;
12680 }
12681 }
12682 /* Do not insert a non-8-byte aligned quantity before 8-byte
12683 aligned quantities. */
12684 else if (ARM_DOUBLEWORD_ALIGN
12685 && fix->fix_size < 8
12686 && mp->fix_size >= 8)
12687 {
12688 min_mp = mp;
12689 min_address = mp->min_address + fix->fix_size;
12690 }
12691 }
12692 }
12693
12694 /* We need to create a new entry. */
12695 mp = XNEW (Mnode);
12696 mp->fix_size = fix->fix_size;
12697 mp->mode = fix->mode;
12698 mp->value = fix->value;
12699 mp->refcount = 1;
12700 mp->max_address = minipool_barrier->address + 65536;
12701
12702 mp->min_address = min_address;
12703
12704 if (min_mp == NULL)
12705 {
12706 mp->prev = NULL;
12707 mp->next = minipool_vector_head;
12708
12709 if (mp->next == NULL)
12710 {
12711 minipool_vector_tail = mp;
12712 minipool_vector_label = gen_label_rtx ();
12713 }
12714 else
12715 mp->next->prev = mp;
12716
12717 minipool_vector_head = mp;
12718 }
12719 else
12720 {
12721 mp->next = min_mp->next;
12722 mp->prev = min_mp;
12723 min_mp->next = mp;
12724
12725 if (mp->next != NULL)
12726 mp->next->prev = mp;
12727 else
12728 minipool_vector_tail = mp;
12729 }
12730
12731 /* Save the new entry. */
12732 min_mp = mp;
12733
12734 if (mp->prev)
12735 mp = mp->prev;
12736 else
12737 mp->offset = 0;
12738
12739 /* Scan over the following entries and adjust their offsets. */
12740 while (mp->next != NULL)
12741 {
12742 if (mp->next->min_address < mp->min_address + mp->fix_size)
12743 mp->next->min_address = mp->min_address + mp->fix_size;
12744
12745 if (mp->refcount)
12746 mp->next->offset = mp->offset + mp->fix_size;
12747 else
12748 mp->next->offset = mp->offset;
12749
12750 mp = mp->next;
12751 }
12752
12753 return min_mp;
12754 }
12755
12756 static void
12757 assign_minipool_offsets (Mfix *barrier)
12758 {
12759 HOST_WIDE_INT offset = 0;
12760 Mnode *mp;
12761
12762 minipool_barrier = barrier;
12763
12764 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12765 {
12766 mp->offset = offset;
12767
12768 if (mp->refcount > 0)
12769 offset += mp->fix_size;
12770 }
12771 }
12772
12773 /* Output the literal table */
12774 static void
12775 dump_minipool (rtx scan)
12776 {
12777 Mnode * mp;
12778 Mnode * nmp;
12779 int align64 = 0;
12780
12781 if (ARM_DOUBLEWORD_ALIGN)
12782 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12783 if (mp->refcount > 0 && mp->fix_size >= 8)
12784 {
12785 align64 = 1;
12786 break;
12787 }
12788
12789 if (dump_file)
12790 fprintf (dump_file,
12791 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
12792 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
12793
12794 scan = emit_label_after (gen_label_rtx (), scan);
12795 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
12796 scan = emit_label_after (minipool_vector_label, scan);
12797
12798 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
12799 {
12800 if (mp->refcount > 0)
12801 {
12802 if (dump_file)
12803 {
12804 fprintf (dump_file,
12805 ";; Offset %u, min %ld, max %ld ",
12806 (unsigned) mp->offset, (unsigned long) mp->min_address,
12807 (unsigned long) mp->max_address);
12808 arm_print_value (dump_file, mp->value);
12809 fputc ('\n', dump_file);
12810 }
12811
12812 switch (mp->fix_size)
12813 {
12814 #ifdef HAVE_consttable_1
12815 case 1:
12816 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
12817 break;
12818
12819 #endif
12820 #ifdef HAVE_consttable_2
12821 case 2:
12822 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
12823 break;
12824
12825 #endif
12826 #ifdef HAVE_consttable_4
12827 case 4:
12828 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
12829 break;
12830
12831 #endif
12832 #ifdef HAVE_consttable_8
12833 case 8:
12834 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
12835 break;
12836
12837 #endif
12838 #ifdef HAVE_consttable_16
12839 case 16:
12840 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
12841 break;
12842
12843 #endif
12844 default:
12845 gcc_unreachable ();
12846 }
12847 }
12848
12849 nmp = mp->next;
12850 free (mp);
12851 }
12852
12853 minipool_vector_head = minipool_vector_tail = NULL;
12854 scan = emit_insn_after (gen_consttable_end (), scan);
12855 scan = emit_barrier_after (scan);
12856 }
12857
12858 /* Return the cost of forcibly inserting a barrier after INSN. */
12859 static int
12860 arm_barrier_cost (rtx insn)
12861 {
12862 /* Basing the location of the pool on the loop depth is preferable,
12863 but at the moment, the basic block information seems to be
12864 corrupt by this stage of the compilation. */
12865 int base_cost = 50;
12866 rtx next = next_nonnote_insn (insn);
12867
12868 if (next != NULL && GET_CODE (next) == CODE_LABEL)
12869 base_cost -= 20;
12870
12871 switch (GET_CODE (insn))
12872 {
12873 case CODE_LABEL:
12874 /* It will always be better to place the table before the label, rather
12875 than after it. */
12876 return 50;
12877
12878 case INSN:
12879 case CALL_INSN:
12880 return base_cost;
12881
12882 case JUMP_INSN:
12883 return base_cost - 10;
12884
12885 default:
12886 return base_cost + 10;
12887 }
12888 }
12889
12890 /* Find the best place in the insn stream in the range
12891 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
12892 Create the barrier by inserting a jump and add a new fix entry for
12893 it. */
12894 static Mfix *
12895 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
12896 {
12897 HOST_WIDE_INT count = 0;
12898 rtx barrier;
12899 rtx from = fix->insn;
12900 /* The instruction after which we will insert the jump. */
12901 rtx selected = NULL;
12902 int selected_cost;
12903 /* The address at which the jump instruction will be placed. */
12904 HOST_WIDE_INT selected_address;
12905 Mfix * new_fix;
12906 HOST_WIDE_INT max_count = max_address - fix->address;
12907 rtx label = gen_label_rtx ();
12908
12909 selected_cost = arm_barrier_cost (from);
12910 selected_address = fix->address;
12911
12912 while (from && count < max_count)
12913 {
12914 rtx tmp;
12915 int new_cost;
12916
12917 /* This code shouldn't have been called if there was a natural barrier
12918 within range. */
12919 gcc_assert (GET_CODE (from) != BARRIER);
12920
12921 /* Count the length of this insn. This must stay in sync with the
12922 code that pushes minipool fixes. */
12923 if (LABEL_P (from))
12924 count += get_label_padding (from);
12925 else
12926 count += get_attr_length (from);
12927
12928 /* If there is a jump table, add its length. */
12929 tmp = is_jump_table (from);
12930 if (tmp != NULL)
12931 {
12932 count += get_jump_table_size (tmp);
12933
12934 /* Jump tables aren't in a basic block, so base the cost on
12935 the dispatch insn. If we select this location, we will
12936 still put the pool after the table. */
12937 new_cost = arm_barrier_cost (from);
12938
12939 if (count < max_count
12940 && (!selected || new_cost <= selected_cost))
12941 {
12942 selected = tmp;
12943 selected_cost = new_cost;
12944 selected_address = fix->address + count;
12945 }
12946
12947 /* Continue after the dispatch table. */
12948 from = NEXT_INSN (tmp);
12949 continue;
12950 }
12951
12952 new_cost = arm_barrier_cost (from);
12953
12954 if (count < max_count
12955 && (!selected || new_cost <= selected_cost))
12956 {
12957 selected = from;
12958 selected_cost = new_cost;
12959 selected_address = fix->address + count;
12960 }
12961
12962 from = NEXT_INSN (from);
12963 }
12964
12965 /* Make sure that we found a place to insert the jump. */
12966 gcc_assert (selected);
12967
12968 /* Make sure we do not split a call and its corresponding
12969 CALL_ARG_LOCATION note. */
12970 if (CALL_P (selected))
12971 {
12972 rtx next = NEXT_INSN (selected);
12973 if (next && NOTE_P (next)
12974 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
12975 selected = next;
12976 }
12977
12978 /* Create a new JUMP_INSN that branches around a barrier. */
12979 from = emit_jump_insn_after (gen_jump (label), selected);
12980 JUMP_LABEL (from) = label;
12981 barrier = emit_barrier_after (from);
12982 emit_label_after (label, barrier);
12983
12984 /* Create a minipool barrier entry for the new barrier. */
12985 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
12986 new_fix->insn = barrier;
12987 new_fix->address = selected_address;
12988 new_fix->next = fix->next;
12989 fix->next = new_fix;
12990
12991 return new_fix;
12992 }
12993
12994 /* Record that there is a natural barrier in the insn stream at
12995 ADDRESS. */
12996 static void
12997 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
12998 {
12999 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13000
13001 fix->insn = insn;
13002 fix->address = address;
13003
13004 fix->next = NULL;
13005 if (minipool_fix_head != NULL)
13006 minipool_fix_tail->next = fix;
13007 else
13008 minipool_fix_head = fix;
13009
13010 minipool_fix_tail = fix;
13011 }
13012
13013 /* Record INSN, which will need fixing up to load a value from the
13014 minipool. ADDRESS is the offset of the insn since the start of the
13015 function; LOC is a pointer to the part of the insn which requires
13016 fixing; VALUE is the constant that must be loaded, which is of type
13017 MODE. */
13018 static void
13019 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
13020 enum machine_mode mode, rtx value)
13021 {
13022 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13023
13024 fix->insn = insn;
13025 fix->address = address;
13026 fix->loc = loc;
13027 fix->mode = mode;
13028 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
13029 fix->value = value;
13030 fix->forwards = get_attr_pool_range (insn);
13031 fix->backwards = get_attr_neg_pool_range (insn);
13032 fix->minipool = NULL;
13033
13034 /* If an insn doesn't have a range defined for it, then it isn't
13035 expecting to be reworked by this code. Better to stop now than
13036 to generate duff assembly code. */
13037 gcc_assert (fix->forwards || fix->backwards);
13038
13039 /* If an entry requires 8-byte alignment then assume all constant pools
13040 require 4 bytes of padding. Trying to do this later on a per-pool
13041 basis is awkward because existing pool entries have to be modified. */
13042 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
13043 minipool_pad = 4;
13044
13045 if (dump_file)
13046 {
13047 fprintf (dump_file,
13048 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
13049 GET_MODE_NAME (mode),
13050 INSN_UID (insn), (unsigned long) address,
13051 -1 * (long)fix->backwards, (long)fix->forwards);
13052 arm_print_value (dump_file, fix->value);
13053 fprintf (dump_file, "\n");
13054 }
13055
13056 /* Add it to the chain of fixes. */
13057 fix->next = NULL;
13058
13059 if (minipool_fix_head != NULL)
13060 minipool_fix_tail->next = fix;
13061 else
13062 minipool_fix_head = fix;
13063
13064 minipool_fix_tail = fix;
13065 }
13066
13067 /* Return the cost of synthesizing a 64-bit constant VAL inline.
13068 Returns the number of insns needed, or 99 if we don't know how to
13069 do it. */
13070 int
13071 arm_const_double_inline_cost (rtx val)
13072 {
13073 rtx lowpart, highpart;
13074 enum machine_mode mode;
13075
13076 mode = GET_MODE (val);
13077
13078 if (mode == VOIDmode)
13079 mode = DImode;
13080
13081 gcc_assert (GET_MODE_SIZE (mode) == 8);
13082
13083 lowpart = gen_lowpart (SImode, val);
13084 highpart = gen_highpart_mode (SImode, mode, val);
13085
13086 gcc_assert (GET_CODE (lowpart) == CONST_INT);
13087 gcc_assert (GET_CODE (highpart) == CONST_INT);
13088
13089 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
13090 NULL_RTX, NULL_RTX, 0, 0)
13091 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
13092 NULL_RTX, NULL_RTX, 0, 0));
13093 }
13094
13095 /* Return true if it is worthwhile to split a 64-bit constant into two
13096 32-bit operations. This is the case if optimizing for size, or
13097 if we have load delay slots, or if one 32-bit part can be done with
13098 a single data operation. */
13099 bool
13100 arm_const_double_by_parts (rtx val)
13101 {
13102 enum machine_mode mode = GET_MODE (val);
13103 rtx part;
13104
13105 if (optimize_size || arm_ld_sched)
13106 return true;
13107
13108 if (mode == VOIDmode)
13109 mode = DImode;
13110
13111 part = gen_highpart_mode (SImode, mode, val);
13112
13113 gcc_assert (GET_CODE (part) == CONST_INT);
13114
13115 if (const_ok_for_arm (INTVAL (part))
13116 || const_ok_for_arm (~INTVAL (part)))
13117 return true;
13118
13119 part = gen_lowpart (SImode, val);
13120
13121 gcc_assert (GET_CODE (part) == CONST_INT);
13122
13123 if (const_ok_for_arm (INTVAL (part))
13124 || const_ok_for_arm (~INTVAL (part)))
13125 return true;
13126
13127 return false;
13128 }
13129
13130 /* Return true if it is possible to inline both the high and low parts
13131 of a 64-bit constant into 32-bit data processing instructions. */
13132 bool
13133 arm_const_double_by_immediates (rtx val)
13134 {
13135 enum machine_mode mode = GET_MODE (val);
13136 rtx part;
13137
13138 if (mode == VOIDmode)
13139 mode = DImode;
13140
13141 part = gen_highpart_mode (SImode, mode, val);
13142
13143 gcc_assert (GET_CODE (part) == CONST_INT);
13144
13145 if (!const_ok_for_arm (INTVAL (part)))
13146 return false;
13147
13148 part = gen_lowpart (SImode, val);
13149
13150 gcc_assert (GET_CODE (part) == CONST_INT);
13151
13152 if (!const_ok_for_arm (INTVAL (part)))
13153 return false;
13154
13155 return true;
13156 }
13157
13158 /* Scan INSN and note any of its operands that need fixing.
13159 If DO_PUSHES is false we do not actually push any of the fixups
13160 needed. */
13161 static void
13162 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
13163 {
13164 int opno;
13165
13166 extract_insn (insn);
13167
13168 if (!constrain_operands (1))
13169 fatal_insn_not_found (insn);
13170
13171 if (recog_data.n_alternatives == 0)
13172 return;
13173
13174 /* Fill in recog_op_alt with information about the constraints of
13175 this insn. */
13176 preprocess_constraints ();
13177
13178 for (opno = 0; opno < recog_data.n_operands; opno++)
13179 {
13180 /* Things we need to fix can only occur in inputs. */
13181 if (recog_data.operand_type[opno] != OP_IN)
13182 continue;
13183
13184 /* If this alternative is a memory reference, then any mention
13185 of constants in this alternative is really to fool reload
13186 into allowing us to accept one there. We need to fix them up
13187 now so that we output the right code. */
13188 if (recog_op_alt[opno][which_alternative].memory_ok)
13189 {
13190 rtx op = recog_data.operand[opno];
13191
13192 if (CONSTANT_P (op))
13193 {
13194 if (do_pushes)
13195 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
13196 recog_data.operand_mode[opno], op);
13197 }
13198 else if (GET_CODE (op) == MEM
13199 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
13200 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
13201 {
13202 if (do_pushes)
13203 {
13204 rtx cop = avoid_constant_pool_reference (op);
13205
13206 /* Casting the address of something to a mode narrower
13207 than a word can cause avoid_constant_pool_reference()
13208 to return the pool reference itself. That's no good to
13209 us here. Lets just hope that we can use the
13210 constant pool value directly. */
13211 if (op == cop)
13212 cop = get_pool_constant (XEXP (op, 0));
13213
13214 push_minipool_fix (insn, address,
13215 recog_data.operand_loc[opno],
13216 recog_data.operand_mode[opno], cop);
13217 }
13218
13219 }
13220 }
13221 }
13222
13223 return;
13224 }
13225
13226 /* Convert instructions to their cc-clobbering variant if possible, since
13227 that allows us to use smaller encodings. */
13228
13229 static void
13230 thumb2_reorg (void)
13231 {
13232 basic_block bb;
13233 regset_head live;
13234
13235 INIT_REG_SET (&live);
13236
13237 /* We are freeing block_for_insn in the toplev to keep compatibility
13238 with old MDEP_REORGS that are not CFG based. Recompute it now. */
13239 compute_bb_for_insn ();
13240 df_analyze ();
13241
13242 FOR_EACH_BB (bb)
13243 {
13244 rtx insn;
13245
13246 COPY_REG_SET (&live, DF_LR_OUT (bb));
13247 df_simulate_initialize_backwards (bb, &live);
13248 FOR_BB_INSNS_REVERSE (bb, insn)
13249 {
13250 if (NONJUMP_INSN_P (insn)
13251 && !REGNO_REG_SET_P (&live, CC_REGNUM)
13252 && GET_CODE (PATTERN (insn)) == SET)
13253 {
13254 enum {SKIP, CONV, SWAP_CONV} action = SKIP;
13255 rtx pat = PATTERN (insn);
13256 rtx dst = XEXP (pat, 0);
13257 rtx src = XEXP (pat, 1);
13258 rtx op0 = NULL_RTX, op1 = NULL_RTX;
13259
13260 if (!OBJECT_P (src))
13261 op0 = XEXP (src, 0);
13262
13263 if (BINARY_P (src))
13264 op1 = XEXP (src, 1);
13265
13266 if (low_register_operand (dst, SImode))
13267 {
13268 switch (GET_CODE (src))
13269 {
13270 case PLUS:
13271 if (low_register_operand (op0, SImode))
13272 {
13273 /* ADDS <Rd>,<Rn>,<Rm> */
13274 if (low_register_operand (op1, SImode))
13275 action = CONV;
13276 /* ADDS <Rdn>,#<imm8> */
13277 /* SUBS <Rdn>,#<imm8> */
13278 else if (rtx_equal_p (dst, op0)
13279 && CONST_INT_P (op1)
13280 && IN_RANGE (INTVAL (op1), -255, 255))
13281 action = CONV;
13282 /* ADDS <Rd>,<Rn>,#<imm3> */
13283 /* SUBS <Rd>,<Rn>,#<imm3> */
13284 else if (CONST_INT_P (op1)
13285 && IN_RANGE (INTVAL (op1), -7, 7))
13286 action = CONV;
13287 }
13288 break;
13289
13290 case MINUS:
13291 /* RSBS <Rd>,<Rn>,#0
13292 Not handled here: see NEG below. */
13293 /* SUBS <Rd>,<Rn>,#<imm3>
13294 SUBS <Rdn>,#<imm8>
13295 Not handled here: see PLUS above. */
13296 /* SUBS <Rd>,<Rn>,<Rm> */
13297 if (low_register_operand (op0, SImode)
13298 && low_register_operand (op1, SImode))
13299 action = CONV;
13300 break;
13301
13302 case MULT:
13303 /* MULS <Rdm>,<Rn>,<Rdm>
13304 As an exception to the rule, this is only used
13305 when optimizing for size since MULS is slow on all
13306 known implementations. We do not even want to use
13307 MULS in cold code, if optimizing for speed, so we
13308 test the global flag here. */
13309 if (!optimize_size)
13310 break;
13311 /* else fall through. */
13312 case AND:
13313 case IOR:
13314 case XOR:
13315 /* ANDS <Rdn>,<Rm> */
13316 if (rtx_equal_p (dst, op0)
13317 && low_register_operand (op1, SImode))
13318 action = CONV;
13319 else if (rtx_equal_p (dst, op1)
13320 && low_register_operand (op0, SImode))
13321 action = SWAP_CONV;
13322 break;
13323
13324 case ASHIFTRT:
13325 case ASHIFT:
13326 case LSHIFTRT:
13327 /* ASRS <Rdn>,<Rm> */
13328 /* LSRS <Rdn>,<Rm> */
13329 /* LSLS <Rdn>,<Rm> */
13330 if (rtx_equal_p (dst, op0)
13331 && low_register_operand (op1, SImode))
13332 action = CONV;
13333 /* ASRS <Rd>,<Rm>,#<imm5> */
13334 /* LSRS <Rd>,<Rm>,#<imm5> */
13335 /* LSLS <Rd>,<Rm>,#<imm5> */
13336 else if (low_register_operand (op0, SImode)
13337 && CONST_INT_P (op1)
13338 && IN_RANGE (INTVAL (op1), 0, 31))
13339 action = CONV;
13340 break;
13341
13342 case ROTATERT:
13343 /* RORS <Rdn>,<Rm> */
13344 if (rtx_equal_p (dst, op0)
13345 && low_register_operand (op1, SImode))
13346 action = CONV;
13347 break;
13348
13349 case NOT:
13350 case NEG:
13351 /* MVNS <Rd>,<Rm> */
13352 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
13353 if (low_register_operand (op0, SImode))
13354 action = CONV;
13355 break;
13356
13357 case CONST_INT:
13358 /* MOVS <Rd>,#<imm8> */
13359 if (CONST_INT_P (src)
13360 && IN_RANGE (INTVAL (src), 0, 255))
13361 action = CONV;
13362 break;
13363
13364 case REG:
13365 /* MOVS and MOV<c> with registers have different
13366 encodings, so are not relevant here. */
13367 break;
13368
13369 default:
13370 break;
13371 }
13372 }
13373
13374 if (action != SKIP)
13375 {
13376 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
13377 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
13378 rtvec vec;
13379
13380 if (action == SWAP_CONV)
13381 {
13382 src = copy_rtx (src);
13383 XEXP (src, 0) = op1;
13384 XEXP (src, 1) = op0;
13385 pat = gen_rtx_SET (VOIDmode, dst, src);
13386 vec = gen_rtvec (2, pat, clobber);
13387 }
13388 else /* action == CONV */
13389 vec = gen_rtvec (2, pat, clobber);
13390
13391 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
13392 INSN_CODE (insn) = -1;
13393 }
13394 }
13395
13396 if (NONDEBUG_INSN_P (insn))
13397 df_simulate_one_insn_backwards (bb, insn, &live);
13398 }
13399 }
13400
13401 CLEAR_REG_SET (&live);
13402 }
13403
13404 /* Gcc puts the pool in the wrong place for ARM, since we can only
13405 load addresses a limited distance around the pc. We do some
13406 special munging to move the constant pool values to the correct
13407 point in the code. */
13408 static void
13409 arm_reorg (void)
13410 {
13411 rtx insn;
13412 HOST_WIDE_INT address = 0;
13413 Mfix * fix;
13414
13415 if (TARGET_THUMB2)
13416 thumb2_reorg ();
13417
13418 /* Ensure all insns that must be split have been split at this point.
13419 Otherwise, the pool placement code below may compute incorrect
13420 insn lengths. Note that when optimizing, all insns have already
13421 been split at this point. */
13422 if (!optimize)
13423 split_all_insns_noflow ();
13424
13425 minipool_fix_head = minipool_fix_tail = NULL;
13426
13427 /* The first insn must always be a note, or the code below won't
13428 scan it properly. */
13429 insn = get_insns ();
13430 gcc_assert (GET_CODE (insn) == NOTE);
13431 minipool_pad = 0;
13432
13433 /* Scan all the insns and record the operands that will need fixing. */
13434 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
13435 {
13436 if (GET_CODE (insn) == BARRIER)
13437 push_minipool_barrier (insn, address);
13438 else if (INSN_P (insn))
13439 {
13440 rtx table;
13441
13442 note_invalid_constants (insn, address, true);
13443 address += get_attr_length (insn);
13444
13445 /* If the insn is a vector jump, add the size of the table
13446 and skip the table. */
13447 if ((table = is_jump_table (insn)) != NULL)
13448 {
13449 address += get_jump_table_size (table);
13450 insn = table;
13451 }
13452 }
13453 else if (LABEL_P (insn))
13454 /* Add the worst-case padding due to alignment. We don't add
13455 the _current_ padding because the minipool insertions
13456 themselves might change it. */
13457 address += get_label_padding (insn);
13458 }
13459
13460 fix = minipool_fix_head;
13461
13462 /* Now scan the fixups and perform the required changes. */
13463 while (fix)
13464 {
13465 Mfix * ftmp;
13466 Mfix * fdel;
13467 Mfix * last_added_fix;
13468 Mfix * last_barrier = NULL;
13469 Mfix * this_fix;
13470
13471 /* Skip any further barriers before the next fix. */
13472 while (fix && GET_CODE (fix->insn) == BARRIER)
13473 fix = fix->next;
13474
13475 /* No more fixes. */
13476 if (fix == NULL)
13477 break;
13478
13479 last_added_fix = NULL;
13480
13481 for (ftmp = fix; ftmp; ftmp = ftmp->next)
13482 {
13483 if (GET_CODE (ftmp->insn) == BARRIER)
13484 {
13485 if (ftmp->address >= minipool_vector_head->max_address)
13486 break;
13487
13488 last_barrier = ftmp;
13489 }
13490 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
13491 break;
13492
13493 last_added_fix = ftmp; /* Keep track of the last fix added. */
13494 }
13495
13496 /* If we found a barrier, drop back to that; any fixes that we
13497 could have reached but come after the barrier will now go in
13498 the next mini-pool. */
13499 if (last_barrier != NULL)
13500 {
13501 /* Reduce the refcount for those fixes that won't go into this
13502 pool after all. */
13503 for (fdel = last_barrier->next;
13504 fdel && fdel != ftmp;
13505 fdel = fdel->next)
13506 {
13507 fdel->minipool->refcount--;
13508 fdel->minipool = NULL;
13509 }
13510
13511 ftmp = last_barrier;
13512 }
13513 else
13514 {
13515 /* ftmp is first fix that we can't fit into this pool and
13516 there no natural barriers that we could use. Insert a
13517 new barrier in the code somewhere between the previous
13518 fix and this one, and arrange to jump around it. */
13519 HOST_WIDE_INT max_address;
13520
13521 /* The last item on the list of fixes must be a barrier, so
13522 we can never run off the end of the list of fixes without
13523 last_barrier being set. */
13524 gcc_assert (ftmp);
13525
13526 max_address = minipool_vector_head->max_address;
13527 /* Check that there isn't another fix that is in range that
13528 we couldn't fit into this pool because the pool was
13529 already too large: we need to put the pool before such an
13530 instruction. The pool itself may come just after the
13531 fix because create_fix_barrier also allows space for a
13532 jump instruction. */
13533 if (ftmp->address < max_address)
13534 max_address = ftmp->address + 1;
13535
13536 last_barrier = create_fix_barrier (last_added_fix, max_address);
13537 }
13538
13539 assign_minipool_offsets (last_barrier);
13540
13541 while (ftmp)
13542 {
13543 if (GET_CODE (ftmp->insn) != BARRIER
13544 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
13545 == NULL))
13546 break;
13547
13548 ftmp = ftmp->next;
13549 }
13550
13551 /* Scan over the fixes we have identified for this pool, fixing them
13552 up and adding the constants to the pool itself. */
13553 for (this_fix = fix; this_fix && ftmp != this_fix;
13554 this_fix = this_fix->next)
13555 if (GET_CODE (this_fix->insn) != BARRIER)
13556 {
13557 rtx addr
13558 = plus_constant (Pmode,
13559 gen_rtx_LABEL_REF (VOIDmode,
13560 minipool_vector_label),
13561 this_fix->minipool->offset);
13562 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
13563 }
13564
13565 dump_minipool (last_barrier->insn);
13566 fix = ftmp;
13567 }
13568
13569 /* From now on we must synthesize any constants that we can't handle
13570 directly. This can happen if the RTL gets split during final
13571 instruction generation. */
13572 after_arm_reorg = 1;
13573
13574 /* Free the minipool memory. */
13575 obstack_free (&minipool_obstack, minipool_startobj);
13576 }
13577 \f
13578 /* Routines to output assembly language. */
13579
13580 /* If the rtx is the correct value then return the string of the number.
13581 In this way we can ensure that valid double constants are generated even
13582 when cross compiling. */
13583 const char *
13584 fp_immediate_constant (rtx x)
13585 {
13586 REAL_VALUE_TYPE r;
13587
13588 if (!fp_consts_inited)
13589 init_fp_table ();
13590
13591 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13592
13593 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
13594 return "0";
13595 }
13596
13597 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
13598 static const char *
13599 fp_const_from_val (REAL_VALUE_TYPE *r)
13600 {
13601 if (!fp_consts_inited)
13602 init_fp_table ();
13603
13604 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
13605 return "0";
13606 }
13607
13608 /* OPERANDS[0] is the entire list of insns that constitute pop,
13609 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
13610 is in the list, UPDATE is true iff the list contains explicit
13611 update of base register. */
13612 void
13613 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
13614 bool update)
13615 {
13616 int i;
13617 char pattern[100];
13618 int offset;
13619 const char *conditional;
13620 int num_saves = XVECLEN (operands[0], 0);
13621 unsigned int regno;
13622 unsigned int regno_base = REGNO (operands[1]);
13623
13624 offset = 0;
13625 offset += update ? 1 : 0;
13626 offset += return_pc ? 1 : 0;
13627
13628 /* Is the base register in the list? */
13629 for (i = offset; i < num_saves; i++)
13630 {
13631 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
13632 /* If SP is in the list, then the base register must be SP. */
13633 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
13634 /* If base register is in the list, there must be no explicit update. */
13635 if (regno == regno_base)
13636 gcc_assert (!update);
13637 }
13638
13639 conditional = reverse ? "%?%D0" : "%?%d0";
13640 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
13641 {
13642 /* Output pop (not stmfd) because it has a shorter encoding. */
13643 gcc_assert (update);
13644 sprintf (pattern, "pop%s\t{", conditional);
13645 }
13646 else
13647 {
13648 /* Output ldmfd when the base register is SP, otherwise output ldmia.
13649 It's just a convention, their semantics are identical. */
13650 if (regno_base == SP_REGNUM)
13651 sprintf (pattern, "ldm%sfd\t", conditional);
13652 else if (TARGET_UNIFIED_ASM)
13653 sprintf (pattern, "ldmia%s\t", conditional);
13654 else
13655 sprintf (pattern, "ldm%sia\t", conditional);
13656
13657 strcat (pattern, reg_names[regno_base]);
13658 if (update)
13659 strcat (pattern, "!, {");
13660 else
13661 strcat (pattern, ", {");
13662 }
13663
13664 /* Output the first destination register. */
13665 strcat (pattern,
13666 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
13667
13668 /* Output the rest of the destination registers. */
13669 for (i = offset + 1; i < num_saves; i++)
13670 {
13671 strcat (pattern, ", ");
13672 strcat (pattern,
13673 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
13674 }
13675
13676 strcat (pattern, "}");
13677
13678 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
13679 strcat (pattern, "^");
13680
13681 output_asm_insn (pattern, &cond);
13682 }
13683
13684
13685 /* Output the assembly for a store multiple. */
13686
13687 const char *
13688 vfp_output_fstmd (rtx * operands)
13689 {
13690 char pattern[100];
13691 int p;
13692 int base;
13693 int i;
13694
13695 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
13696 p = strlen (pattern);
13697
13698 gcc_assert (GET_CODE (operands[1]) == REG);
13699
13700 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
13701 for (i = 1; i < XVECLEN (operands[2], 0); i++)
13702 {
13703 p += sprintf (&pattern[p], ", d%d", base + i);
13704 }
13705 strcpy (&pattern[p], "}");
13706
13707 output_asm_insn (pattern, operands);
13708 return "";
13709 }
13710
13711
13712 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
13713 number of bytes pushed. */
13714
13715 static int
13716 vfp_emit_fstmd (int base_reg, int count)
13717 {
13718 rtx par;
13719 rtx dwarf;
13720 rtx tmp, reg;
13721 int i;
13722
13723 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
13724 register pairs are stored by a store multiple insn. We avoid this
13725 by pushing an extra pair. */
13726 if (count == 2 && !arm_arch6)
13727 {
13728 if (base_reg == LAST_VFP_REGNUM - 3)
13729 base_reg -= 2;
13730 count++;
13731 }
13732
13733 /* FSTMD may not store more than 16 doubleword registers at once. Split
13734 larger stores into multiple parts (up to a maximum of two, in
13735 practice). */
13736 if (count > 16)
13737 {
13738 int saved;
13739 /* NOTE: base_reg is an internal register number, so each D register
13740 counts as 2. */
13741 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
13742 saved += vfp_emit_fstmd (base_reg, 16);
13743 return saved;
13744 }
13745
13746 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
13747 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
13748
13749 reg = gen_rtx_REG (DFmode, base_reg);
13750 base_reg += 2;
13751
13752 XVECEXP (par, 0, 0)
13753 = gen_rtx_SET (VOIDmode,
13754 gen_frame_mem
13755 (BLKmode,
13756 gen_rtx_PRE_MODIFY (Pmode,
13757 stack_pointer_rtx,
13758 plus_constant
13759 (Pmode, stack_pointer_rtx,
13760 - (count * 8)))
13761 ),
13762 gen_rtx_UNSPEC (BLKmode,
13763 gen_rtvec (1, reg),
13764 UNSPEC_PUSH_MULT));
13765
13766 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13767 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
13768 RTX_FRAME_RELATED_P (tmp) = 1;
13769 XVECEXP (dwarf, 0, 0) = tmp;
13770
13771 tmp = gen_rtx_SET (VOIDmode,
13772 gen_frame_mem (DFmode, stack_pointer_rtx),
13773 reg);
13774 RTX_FRAME_RELATED_P (tmp) = 1;
13775 XVECEXP (dwarf, 0, 1) = tmp;
13776
13777 for (i = 1; i < count; i++)
13778 {
13779 reg = gen_rtx_REG (DFmode, base_reg);
13780 base_reg += 2;
13781 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
13782
13783 tmp = gen_rtx_SET (VOIDmode,
13784 gen_frame_mem (DFmode,
13785 plus_constant (Pmode,
13786 stack_pointer_rtx,
13787 i * 8)),
13788 reg);
13789 RTX_FRAME_RELATED_P (tmp) = 1;
13790 XVECEXP (dwarf, 0, i + 1) = tmp;
13791 }
13792
13793 par = emit_insn (par);
13794 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
13795 RTX_FRAME_RELATED_P (par) = 1;
13796
13797 return count * 8;
13798 }
13799
13800 /* Emit a call instruction with pattern PAT. ADDR is the address of
13801 the call target. */
13802
13803 void
13804 arm_emit_call_insn (rtx pat, rtx addr)
13805 {
13806 rtx insn;
13807
13808 insn = emit_call_insn (pat);
13809
13810 /* The PIC register is live on entry to VxWorks PIC PLT entries.
13811 If the call might use such an entry, add a use of the PIC register
13812 to the instruction's CALL_INSN_FUNCTION_USAGE. */
13813 if (TARGET_VXWORKS_RTP
13814 && flag_pic
13815 && GET_CODE (addr) == SYMBOL_REF
13816 && (SYMBOL_REF_DECL (addr)
13817 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
13818 : !SYMBOL_REF_LOCAL_P (addr)))
13819 {
13820 require_pic_register ();
13821 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
13822 }
13823 }
13824
13825 /* Output a 'call' insn. */
13826 const char *
13827 output_call (rtx *operands)
13828 {
13829 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
13830
13831 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
13832 if (REGNO (operands[0]) == LR_REGNUM)
13833 {
13834 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
13835 output_asm_insn ("mov%?\t%0, %|lr", operands);
13836 }
13837
13838 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13839
13840 if (TARGET_INTERWORK || arm_arch4t)
13841 output_asm_insn ("bx%?\t%0", operands);
13842 else
13843 output_asm_insn ("mov%?\t%|pc, %0", operands);
13844
13845 return "";
13846 }
13847
13848 /* Output a 'call' insn that is a reference in memory. This is
13849 disabled for ARMv5 and we prefer a blx instead because otherwise
13850 there's a significant performance overhead. */
13851 const char *
13852 output_call_mem (rtx *operands)
13853 {
13854 gcc_assert (!arm_arch5);
13855 if (TARGET_INTERWORK)
13856 {
13857 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13858 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13859 output_asm_insn ("bx%?\t%|ip", operands);
13860 }
13861 else if (regno_use_in (LR_REGNUM, operands[0]))
13862 {
13863 /* LR is used in the memory address. We load the address in the
13864 first instruction. It's safe to use IP as the target of the
13865 load since the call will kill it anyway. */
13866 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13867 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13868 if (arm_arch4t)
13869 output_asm_insn ("bx%?\t%|ip", operands);
13870 else
13871 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
13872 }
13873 else
13874 {
13875 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13876 output_asm_insn ("ldr%?\t%|pc, %0", operands);
13877 }
13878
13879 return "";
13880 }
13881
13882
13883 /* Output a move from arm registers to arm registers of a long double
13884 OPERANDS[0] is the destination.
13885 OPERANDS[1] is the source. */
13886 const char *
13887 output_mov_long_double_arm_from_arm (rtx *operands)
13888 {
13889 /* We have to be careful here because the two might overlap. */
13890 int dest_start = REGNO (operands[0]);
13891 int src_start = REGNO (operands[1]);
13892 rtx ops[2];
13893 int i;
13894
13895 if (dest_start < src_start)
13896 {
13897 for (i = 0; i < 3; i++)
13898 {
13899 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13900 ops[1] = gen_rtx_REG (SImode, src_start + i);
13901 output_asm_insn ("mov%?\t%0, %1", ops);
13902 }
13903 }
13904 else
13905 {
13906 for (i = 2; i >= 0; i--)
13907 {
13908 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13909 ops[1] = gen_rtx_REG (SImode, src_start + i);
13910 output_asm_insn ("mov%?\t%0, %1", ops);
13911 }
13912 }
13913
13914 return "";
13915 }
13916
13917 void
13918 arm_emit_movpair (rtx dest, rtx src)
13919 {
13920 /* If the src is an immediate, simplify it. */
13921 if (CONST_INT_P (src))
13922 {
13923 HOST_WIDE_INT val = INTVAL (src);
13924 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
13925 if ((val >> 16) & 0x0000ffff)
13926 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
13927 GEN_INT (16)),
13928 GEN_INT ((val >> 16) & 0x0000ffff));
13929 return;
13930 }
13931 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
13932 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
13933 }
13934
13935 /* Output a move between double words. It must be REG<-MEM
13936 or MEM<-REG. */
13937 const char *
13938 output_move_double (rtx *operands, bool emit, int *count)
13939 {
13940 enum rtx_code code0 = GET_CODE (operands[0]);
13941 enum rtx_code code1 = GET_CODE (operands[1]);
13942 rtx otherops[3];
13943 if (count)
13944 *count = 1;
13945
13946 /* The only case when this might happen is when
13947 you are looking at the length of a DImode instruction
13948 that has an invalid constant in it. */
13949 if (code0 == REG && code1 != MEM)
13950 {
13951 gcc_assert (!emit);
13952 *count = 2;
13953 return "";
13954 }
13955
13956 if (code0 == REG)
13957 {
13958 unsigned int reg0 = REGNO (operands[0]);
13959
13960 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
13961
13962 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
13963
13964 switch (GET_CODE (XEXP (operands[1], 0)))
13965 {
13966 case REG:
13967
13968 if (emit)
13969 {
13970 if (TARGET_LDRD
13971 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
13972 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
13973 else
13974 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13975 }
13976 break;
13977
13978 case PRE_INC:
13979 gcc_assert (TARGET_LDRD);
13980 if (emit)
13981 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
13982 break;
13983
13984 case PRE_DEC:
13985 if (emit)
13986 {
13987 if (TARGET_LDRD)
13988 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
13989 else
13990 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
13991 }
13992 break;
13993
13994 case POST_INC:
13995 if (emit)
13996 {
13997 if (TARGET_LDRD)
13998 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
13999 else
14000 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
14001 }
14002 break;
14003
14004 case POST_DEC:
14005 gcc_assert (TARGET_LDRD);
14006 if (emit)
14007 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
14008 break;
14009
14010 case PRE_MODIFY:
14011 case POST_MODIFY:
14012 /* Autoicrement addressing modes should never have overlapping
14013 base and destination registers, and overlapping index registers
14014 are already prohibited, so this doesn't need to worry about
14015 fix_cm3_ldrd. */
14016 otherops[0] = operands[0];
14017 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
14018 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
14019
14020 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
14021 {
14022 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
14023 {
14024 /* Registers overlap so split out the increment. */
14025 if (emit)
14026 {
14027 output_asm_insn ("add%?\t%1, %1, %2", otherops);
14028 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
14029 }
14030 if (count)
14031 *count = 2;
14032 }
14033 else
14034 {
14035 /* Use a single insn if we can.
14036 FIXME: IWMMXT allows offsets larger than ldrd can
14037 handle, fix these up with a pair of ldr. */
14038 if (TARGET_THUMB2
14039 || GET_CODE (otherops[2]) != CONST_INT
14040 || (INTVAL (otherops[2]) > -256
14041 && INTVAL (otherops[2]) < 256))
14042 {
14043 if (emit)
14044 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
14045 }
14046 else
14047 {
14048 if (emit)
14049 {
14050 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
14051 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14052 }
14053 if (count)
14054 *count = 2;
14055
14056 }
14057 }
14058 }
14059 else
14060 {
14061 /* Use a single insn if we can.
14062 FIXME: IWMMXT allows offsets larger than ldrd can handle,
14063 fix these up with a pair of ldr. */
14064 if (TARGET_THUMB2
14065 || GET_CODE (otherops[2]) != CONST_INT
14066 || (INTVAL (otherops[2]) > -256
14067 && INTVAL (otherops[2]) < 256))
14068 {
14069 if (emit)
14070 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
14071 }
14072 else
14073 {
14074 if (emit)
14075 {
14076 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14077 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
14078 }
14079 if (count)
14080 *count = 2;
14081 }
14082 }
14083 break;
14084
14085 case LABEL_REF:
14086 case CONST:
14087 /* We might be able to use ldrd %0, %1 here. However the range is
14088 different to ldr/adr, and it is broken on some ARMv7-M
14089 implementations. */
14090 /* Use the second register of the pair to avoid problematic
14091 overlap. */
14092 otherops[1] = operands[1];
14093 if (emit)
14094 output_asm_insn ("adr%?\t%0, %1", otherops);
14095 operands[1] = otherops[0];
14096 if (emit)
14097 {
14098 if (TARGET_LDRD)
14099 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14100 else
14101 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
14102 }
14103
14104 if (count)
14105 *count = 2;
14106 break;
14107
14108 /* ??? This needs checking for thumb2. */
14109 default:
14110 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
14111 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
14112 {
14113 otherops[0] = operands[0];
14114 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
14115 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
14116
14117 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
14118 {
14119 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
14120 {
14121 switch ((int) INTVAL (otherops[2]))
14122 {
14123 case -8:
14124 if (emit)
14125 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
14126 return "";
14127 case -4:
14128 if (TARGET_THUMB2)
14129 break;
14130 if (emit)
14131 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
14132 return "";
14133 case 4:
14134 if (TARGET_THUMB2)
14135 break;
14136 if (emit)
14137 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
14138 return "";
14139 }
14140 }
14141 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
14142 operands[1] = otherops[0];
14143 if (TARGET_LDRD
14144 && (GET_CODE (otherops[2]) == REG
14145 || TARGET_THUMB2
14146 || (GET_CODE (otherops[2]) == CONST_INT
14147 && INTVAL (otherops[2]) > -256
14148 && INTVAL (otherops[2]) < 256)))
14149 {
14150 if (reg_overlap_mentioned_p (operands[0],
14151 otherops[2]))
14152 {
14153 rtx tmp;
14154 /* Swap base and index registers over to
14155 avoid a conflict. */
14156 tmp = otherops[1];
14157 otherops[1] = otherops[2];
14158 otherops[2] = tmp;
14159 }
14160 /* If both registers conflict, it will usually
14161 have been fixed by a splitter. */
14162 if (reg_overlap_mentioned_p (operands[0], otherops[2])
14163 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
14164 {
14165 if (emit)
14166 {
14167 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14168 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14169 }
14170 if (count)
14171 *count = 2;
14172 }
14173 else
14174 {
14175 otherops[0] = operands[0];
14176 if (emit)
14177 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
14178 }
14179 return "";
14180 }
14181
14182 if (GET_CODE (otherops[2]) == CONST_INT)
14183 {
14184 if (emit)
14185 {
14186 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
14187 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
14188 else
14189 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14190 }
14191 }
14192 else
14193 {
14194 if (emit)
14195 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14196 }
14197 }
14198 else
14199 {
14200 if (emit)
14201 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
14202 }
14203
14204 if (count)
14205 *count = 2;
14206
14207 if (TARGET_LDRD)
14208 return "ldr%(d%)\t%0, [%1]";
14209
14210 return "ldm%(ia%)\t%1, %M0";
14211 }
14212 else
14213 {
14214 otherops[1] = adjust_address (operands[1], SImode, 4);
14215 /* Take care of overlapping base/data reg. */
14216 if (reg_mentioned_p (operands[0], operands[1]))
14217 {
14218 if (emit)
14219 {
14220 output_asm_insn ("ldr%?\t%0, %1", otherops);
14221 output_asm_insn ("ldr%?\t%0, %1", operands);
14222 }
14223 if (count)
14224 *count = 2;
14225
14226 }
14227 else
14228 {
14229 if (emit)
14230 {
14231 output_asm_insn ("ldr%?\t%0, %1", operands);
14232 output_asm_insn ("ldr%?\t%0, %1", otherops);
14233 }
14234 if (count)
14235 *count = 2;
14236 }
14237 }
14238 }
14239 }
14240 else
14241 {
14242 /* Constraints should ensure this. */
14243 gcc_assert (code0 == MEM && code1 == REG);
14244 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
14245
14246 switch (GET_CODE (XEXP (operands[0], 0)))
14247 {
14248 case REG:
14249 if (emit)
14250 {
14251 if (TARGET_LDRD)
14252 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
14253 else
14254 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14255 }
14256 break;
14257
14258 case PRE_INC:
14259 gcc_assert (TARGET_LDRD);
14260 if (emit)
14261 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
14262 break;
14263
14264 case PRE_DEC:
14265 if (emit)
14266 {
14267 if (TARGET_LDRD)
14268 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
14269 else
14270 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
14271 }
14272 break;
14273
14274 case POST_INC:
14275 if (emit)
14276 {
14277 if (TARGET_LDRD)
14278 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
14279 else
14280 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
14281 }
14282 break;
14283
14284 case POST_DEC:
14285 gcc_assert (TARGET_LDRD);
14286 if (emit)
14287 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
14288 break;
14289
14290 case PRE_MODIFY:
14291 case POST_MODIFY:
14292 otherops[0] = operands[1];
14293 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
14294 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
14295
14296 /* IWMMXT allows offsets larger than ldrd can handle,
14297 fix these up with a pair of ldr. */
14298 if (!TARGET_THUMB2
14299 && GET_CODE (otherops[2]) == CONST_INT
14300 && (INTVAL(otherops[2]) <= -256
14301 || INTVAL(otherops[2]) >= 256))
14302 {
14303 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14304 {
14305 if (emit)
14306 {
14307 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
14308 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14309 }
14310 if (count)
14311 *count = 2;
14312 }
14313 else
14314 {
14315 if (emit)
14316 {
14317 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14318 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
14319 }
14320 if (count)
14321 *count = 2;
14322 }
14323 }
14324 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14325 {
14326 if (emit)
14327 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
14328 }
14329 else
14330 {
14331 if (emit)
14332 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
14333 }
14334 break;
14335
14336 case PLUS:
14337 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
14338 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
14339 {
14340 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
14341 {
14342 case -8:
14343 if (emit)
14344 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
14345 return "";
14346
14347 case -4:
14348 if (TARGET_THUMB2)
14349 break;
14350 if (emit)
14351 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
14352 return "";
14353
14354 case 4:
14355 if (TARGET_THUMB2)
14356 break;
14357 if (emit)
14358 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
14359 return "";
14360 }
14361 }
14362 if (TARGET_LDRD
14363 && (GET_CODE (otherops[2]) == REG
14364 || TARGET_THUMB2
14365 || (GET_CODE (otherops[2]) == CONST_INT
14366 && INTVAL (otherops[2]) > -256
14367 && INTVAL (otherops[2]) < 256)))
14368 {
14369 otherops[0] = operands[1];
14370 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
14371 if (emit)
14372 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
14373 return "";
14374 }
14375 /* Fall through */
14376
14377 default:
14378 otherops[0] = adjust_address (operands[0], SImode, 4);
14379 otherops[1] = operands[1];
14380 if (emit)
14381 {
14382 output_asm_insn ("str%?\t%1, %0", operands);
14383 output_asm_insn ("str%?\t%H1, %0", otherops);
14384 }
14385 if (count)
14386 *count = 2;
14387 }
14388 }
14389
14390 return "";
14391 }
14392
14393 /* Output a move, load or store for quad-word vectors in ARM registers. Only
14394 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
14395
14396 const char *
14397 output_move_quad (rtx *operands)
14398 {
14399 if (REG_P (operands[0]))
14400 {
14401 /* Load, or reg->reg move. */
14402
14403 if (MEM_P (operands[1]))
14404 {
14405 switch (GET_CODE (XEXP (operands[1], 0)))
14406 {
14407 case REG:
14408 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
14409 break;
14410
14411 case LABEL_REF:
14412 case CONST:
14413 output_asm_insn ("adr%?\t%0, %1", operands);
14414 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
14415 break;
14416
14417 default:
14418 gcc_unreachable ();
14419 }
14420 }
14421 else
14422 {
14423 rtx ops[2];
14424 int dest, src, i;
14425
14426 gcc_assert (REG_P (operands[1]));
14427
14428 dest = REGNO (operands[0]);
14429 src = REGNO (operands[1]);
14430
14431 /* This seems pretty dumb, but hopefully GCC won't try to do it
14432 very often. */
14433 if (dest < src)
14434 for (i = 0; i < 4; i++)
14435 {
14436 ops[0] = gen_rtx_REG (SImode, dest + i);
14437 ops[1] = gen_rtx_REG (SImode, src + i);
14438 output_asm_insn ("mov%?\t%0, %1", ops);
14439 }
14440 else
14441 for (i = 3; i >= 0; i--)
14442 {
14443 ops[0] = gen_rtx_REG (SImode, dest + i);
14444 ops[1] = gen_rtx_REG (SImode, src + i);
14445 output_asm_insn ("mov%?\t%0, %1", ops);
14446 }
14447 }
14448 }
14449 else
14450 {
14451 gcc_assert (MEM_P (operands[0]));
14452 gcc_assert (REG_P (operands[1]));
14453 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
14454
14455 switch (GET_CODE (XEXP (operands[0], 0)))
14456 {
14457 case REG:
14458 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14459 break;
14460
14461 default:
14462 gcc_unreachable ();
14463 }
14464 }
14465
14466 return "";
14467 }
14468
14469 /* Output a VFP load or store instruction. */
14470
14471 const char *
14472 output_move_vfp (rtx *operands)
14473 {
14474 rtx reg, mem, addr, ops[2];
14475 int load = REG_P (operands[0]);
14476 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
14477 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
14478 const char *templ;
14479 char buff[50];
14480 enum machine_mode mode;
14481
14482 reg = operands[!load];
14483 mem = operands[load];
14484
14485 mode = GET_MODE (reg);
14486
14487 gcc_assert (REG_P (reg));
14488 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
14489 gcc_assert (mode == SFmode
14490 || mode == DFmode
14491 || mode == SImode
14492 || mode == DImode
14493 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
14494 gcc_assert (MEM_P (mem));
14495
14496 addr = XEXP (mem, 0);
14497
14498 switch (GET_CODE (addr))
14499 {
14500 case PRE_DEC:
14501 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
14502 ops[0] = XEXP (addr, 0);
14503 ops[1] = reg;
14504 break;
14505
14506 case POST_INC:
14507 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
14508 ops[0] = XEXP (addr, 0);
14509 ops[1] = reg;
14510 break;
14511
14512 default:
14513 templ = "f%s%c%%?\t%%%s0, %%1%s";
14514 ops[0] = reg;
14515 ops[1] = mem;
14516 break;
14517 }
14518
14519 sprintf (buff, templ,
14520 load ? "ld" : "st",
14521 dp ? 'd' : 's',
14522 dp ? "P" : "",
14523 integer_p ? "\t%@ int" : "");
14524 output_asm_insn (buff, ops);
14525
14526 return "";
14527 }
14528
14529 /* Output a Neon quad-word load or store, or a load or store for
14530 larger structure modes.
14531
14532 WARNING: The ordering of elements is weird in big-endian mode,
14533 because we use VSTM, as required by the EABI. GCC RTL defines
14534 element ordering based on in-memory order. This can be differ
14535 from the architectural ordering of elements within a NEON register.
14536 The intrinsics defined in arm_neon.h use the NEON register element
14537 ordering, not the GCC RTL element ordering.
14538
14539 For example, the in-memory ordering of a big-endian a quadword
14540 vector with 16-bit elements when stored from register pair {d0,d1}
14541 will be (lowest address first, d0[N] is NEON register element N):
14542
14543 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
14544
14545 When necessary, quadword registers (dN, dN+1) are moved to ARM
14546 registers from rN in the order:
14547
14548 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
14549
14550 So that STM/LDM can be used on vectors in ARM registers, and the
14551 same memory layout will result as if VSTM/VLDM were used. */
14552
14553 const char *
14554 output_move_neon (rtx *operands)
14555 {
14556 rtx reg, mem, addr, ops[2];
14557 int regno, load = REG_P (operands[0]);
14558 const char *templ;
14559 char buff[50];
14560 enum machine_mode mode;
14561
14562 reg = operands[!load];
14563 mem = operands[load];
14564
14565 mode = GET_MODE (reg);
14566
14567 gcc_assert (REG_P (reg));
14568 regno = REGNO (reg);
14569 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
14570 || NEON_REGNO_OK_FOR_QUAD (regno));
14571 gcc_assert (VALID_NEON_DREG_MODE (mode)
14572 || VALID_NEON_QREG_MODE (mode)
14573 || VALID_NEON_STRUCT_MODE (mode));
14574 gcc_assert (MEM_P (mem));
14575
14576 addr = XEXP (mem, 0);
14577
14578 /* Strip off const from addresses like (const (plus (...))). */
14579 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14580 addr = XEXP (addr, 0);
14581
14582 switch (GET_CODE (addr))
14583 {
14584 case POST_INC:
14585 templ = "v%smia%%?\t%%0!, %%h1";
14586 ops[0] = XEXP (addr, 0);
14587 ops[1] = reg;
14588 break;
14589
14590 case PRE_DEC:
14591 /* FIXME: We should be using vld1/vst1 here in BE mode? */
14592 templ = "v%smdb%%?\t%%0!, %%h1";
14593 ops[0] = XEXP (addr, 0);
14594 ops[1] = reg;
14595 break;
14596
14597 case POST_MODIFY:
14598 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
14599 gcc_unreachable ();
14600
14601 case LABEL_REF:
14602 case PLUS:
14603 {
14604 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14605 int i;
14606 int overlap = -1;
14607 for (i = 0; i < nregs; i++)
14608 {
14609 /* We're only using DImode here because it's a convenient size. */
14610 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
14611 ops[1] = adjust_address (mem, DImode, 8 * i);
14612 if (reg_overlap_mentioned_p (ops[0], mem))
14613 {
14614 gcc_assert (overlap == -1);
14615 overlap = i;
14616 }
14617 else
14618 {
14619 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14620 output_asm_insn (buff, ops);
14621 }
14622 }
14623 if (overlap != -1)
14624 {
14625 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
14626 ops[1] = adjust_address (mem, SImode, 8 * overlap);
14627 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14628 output_asm_insn (buff, ops);
14629 }
14630
14631 return "";
14632 }
14633
14634 default:
14635 templ = "v%smia%%?\t%%m0, %%h1";
14636 ops[0] = mem;
14637 ops[1] = reg;
14638 }
14639
14640 sprintf (buff, templ, load ? "ld" : "st");
14641 output_asm_insn (buff, ops);
14642
14643 return "";
14644 }
14645
14646 /* Compute and return the length of neon_mov<mode>, where <mode> is
14647 one of VSTRUCT modes: EI, OI, CI or XI. */
14648 int
14649 arm_attr_length_move_neon (rtx insn)
14650 {
14651 rtx reg, mem, addr;
14652 int load;
14653 enum machine_mode mode;
14654
14655 extract_insn_cached (insn);
14656
14657 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
14658 {
14659 mode = GET_MODE (recog_data.operand[0]);
14660 switch (mode)
14661 {
14662 case EImode:
14663 case OImode:
14664 return 8;
14665 case CImode:
14666 return 12;
14667 case XImode:
14668 return 16;
14669 default:
14670 gcc_unreachable ();
14671 }
14672 }
14673
14674 load = REG_P (recog_data.operand[0]);
14675 reg = recog_data.operand[!load];
14676 mem = recog_data.operand[load];
14677
14678 gcc_assert (MEM_P (mem));
14679
14680 mode = GET_MODE (reg);
14681 addr = XEXP (mem, 0);
14682
14683 /* Strip off const from addresses like (const (plus (...))). */
14684 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14685 addr = XEXP (addr, 0);
14686
14687 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
14688 {
14689 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14690 return insns * 4;
14691 }
14692 else
14693 return 4;
14694 }
14695
14696 /* Return nonzero if the offset in the address is an immediate. Otherwise,
14697 return zero. */
14698
14699 int
14700 arm_address_offset_is_imm (rtx insn)
14701 {
14702 rtx mem, addr;
14703
14704 extract_insn_cached (insn);
14705
14706 if (REG_P (recog_data.operand[0]))
14707 return 0;
14708
14709 mem = recog_data.operand[0];
14710
14711 gcc_assert (MEM_P (mem));
14712
14713 addr = XEXP (mem, 0);
14714
14715 if (GET_CODE (addr) == REG
14716 || (GET_CODE (addr) == PLUS
14717 && GET_CODE (XEXP (addr, 0)) == REG
14718 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
14719 return 1;
14720 else
14721 return 0;
14722 }
14723
14724 /* Output an ADD r, s, #n where n may be too big for one instruction.
14725 If adding zero to one register, output nothing. */
14726 const char *
14727 output_add_immediate (rtx *operands)
14728 {
14729 HOST_WIDE_INT n = INTVAL (operands[2]);
14730
14731 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
14732 {
14733 if (n < 0)
14734 output_multi_immediate (operands,
14735 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
14736 -n);
14737 else
14738 output_multi_immediate (operands,
14739 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
14740 n);
14741 }
14742
14743 return "";
14744 }
14745
14746 /* Output a multiple immediate operation.
14747 OPERANDS is the vector of operands referred to in the output patterns.
14748 INSTR1 is the output pattern to use for the first constant.
14749 INSTR2 is the output pattern to use for subsequent constants.
14750 IMMED_OP is the index of the constant slot in OPERANDS.
14751 N is the constant value. */
14752 static const char *
14753 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
14754 int immed_op, HOST_WIDE_INT n)
14755 {
14756 #if HOST_BITS_PER_WIDE_INT > 32
14757 n &= 0xffffffff;
14758 #endif
14759
14760 if (n == 0)
14761 {
14762 /* Quick and easy output. */
14763 operands[immed_op] = const0_rtx;
14764 output_asm_insn (instr1, operands);
14765 }
14766 else
14767 {
14768 int i;
14769 const char * instr = instr1;
14770
14771 /* Note that n is never zero here (which would give no output). */
14772 for (i = 0; i < 32; i += 2)
14773 {
14774 if (n & (3 << i))
14775 {
14776 operands[immed_op] = GEN_INT (n & (255 << i));
14777 output_asm_insn (instr, operands);
14778 instr = instr2;
14779 i += 6;
14780 }
14781 }
14782 }
14783
14784 return "";
14785 }
14786
14787 /* Return the name of a shifter operation. */
14788 static const char *
14789 arm_shift_nmem(enum rtx_code code)
14790 {
14791 switch (code)
14792 {
14793 case ASHIFT:
14794 return ARM_LSL_NAME;
14795
14796 case ASHIFTRT:
14797 return "asr";
14798
14799 case LSHIFTRT:
14800 return "lsr";
14801
14802 case ROTATERT:
14803 return "ror";
14804
14805 default:
14806 abort();
14807 }
14808 }
14809
14810 /* Return the appropriate ARM instruction for the operation code.
14811 The returned result should not be overwritten. OP is the rtx of the
14812 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
14813 was shifted. */
14814 const char *
14815 arithmetic_instr (rtx op, int shift_first_arg)
14816 {
14817 switch (GET_CODE (op))
14818 {
14819 case PLUS:
14820 return "add";
14821
14822 case MINUS:
14823 return shift_first_arg ? "rsb" : "sub";
14824
14825 case IOR:
14826 return "orr";
14827
14828 case XOR:
14829 return "eor";
14830
14831 case AND:
14832 return "and";
14833
14834 case ASHIFT:
14835 case ASHIFTRT:
14836 case LSHIFTRT:
14837 case ROTATERT:
14838 return arm_shift_nmem(GET_CODE(op));
14839
14840 default:
14841 gcc_unreachable ();
14842 }
14843 }
14844
14845 /* Ensure valid constant shifts and return the appropriate shift mnemonic
14846 for the operation code. The returned result should not be overwritten.
14847 OP is the rtx code of the shift.
14848 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
14849 shift. */
14850 static const char *
14851 shift_op (rtx op, HOST_WIDE_INT *amountp)
14852 {
14853 const char * mnem;
14854 enum rtx_code code = GET_CODE (op);
14855
14856 switch (GET_CODE (XEXP (op, 1)))
14857 {
14858 case REG:
14859 case SUBREG:
14860 *amountp = -1;
14861 break;
14862
14863 case CONST_INT:
14864 *amountp = INTVAL (XEXP (op, 1));
14865 break;
14866
14867 default:
14868 gcc_unreachable ();
14869 }
14870
14871 switch (code)
14872 {
14873 case ROTATE:
14874 gcc_assert (*amountp != -1);
14875 *amountp = 32 - *amountp;
14876 code = ROTATERT;
14877
14878 /* Fall through. */
14879
14880 case ASHIFT:
14881 case ASHIFTRT:
14882 case LSHIFTRT:
14883 case ROTATERT:
14884 mnem = arm_shift_nmem(code);
14885 break;
14886
14887 case MULT:
14888 /* We never have to worry about the amount being other than a
14889 power of 2, since this case can never be reloaded from a reg. */
14890 gcc_assert (*amountp != -1);
14891 *amountp = int_log2 (*amountp);
14892 return ARM_LSL_NAME;
14893
14894 default:
14895 gcc_unreachable ();
14896 }
14897
14898 if (*amountp != -1)
14899 {
14900 /* This is not 100% correct, but follows from the desire to merge
14901 multiplication by a power of 2 with the recognizer for a
14902 shift. >=32 is not a valid shift for "lsl", so we must try and
14903 output a shift that produces the correct arithmetical result.
14904 Using lsr #32 is identical except for the fact that the carry bit
14905 is not set correctly if we set the flags; but we never use the
14906 carry bit from such an operation, so we can ignore that. */
14907 if (code == ROTATERT)
14908 /* Rotate is just modulo 32. */
14909 *amountp &= 31;
14910 else if (*amountp != (*amountp & 31))
14911 {
14912 if (code == ASHIFT)
14913 mnem = "lsr";
14914 *amountp = 32;
14915 }
14916
14917 /* Shifts of 0 are no-ops. */
14918 if (*amountp == 0)
14919 return NULL;
14920 }
14921
14922 return mnem;
14923 }
14924
14925 /* Obtain the shift from the POWER of two. */
14926
14927 static HOST_WIDE_INT
14928 int_log2 (HOST_WIDE_INT power)
14929 {
14930 HOST_WIDE_INT shift = 0;
14931
14932 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
14933 {
14934 gcc_assert (shift <= 31);
14935 shift++;
14936 }
14937
14938 return shift;
14939 }
14940
14941 /* Output a .ascii pseudo-op, keeping track of lengths. This is
14942 because /bin/as is horribly restrictive. The judgement about
14943 whether or not each character is 'printable' (and can be output as
14944 is) or not (and must be printed with an octal escape) must be made
14945 with reference to the *host* character set -- the situation is
14946 similar to that discussed in the comments above pp_c_char in
14947 c-pretty-print.c. */
14948
14949 #define MAX_ASCII_LEN 51
14950
14951 void
14952 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
14953 {
14954 int i;
14955 int len_so_far = 0;
14956
14957 fputs ("\t.ascii\t\"", stream);
14958
14959 for (i = 0; i < len; i++)
14960 {
14961 int c = p[i];
14962
14963 if (len_so_far >= MAX_ASCII_LEN)
14964 {
14965 fputs ("\"\n\t.ascii\t\"", stream);
14966 len_so_far = 0;
14967 }
14968
14969 if (ISPRINT (c))
14970 {
14971 if (c == '\\' || c == '\"')
14972 {
14973 putc ('\\', stream);
14974 len_so_far++;
14975 }
14976 putc (c, stream);
14977 len_so_far++;
14978 }
14979 else
14980 {
14981 fprintf (stream, "\\%03o", c);
14982 len_so_far += 4;
14983 }
14984 }
14985
14986 fputs ("\"\n", stream);
14987 }
14988 \f
14989 /* Compute the register save mask for registers 0 through 12
14990 inclusive. This code is used by arm_compute_save_reg_mask. */
14991
14992 static unsigned long
14993 arm_compute_save_reg0_reg12_mask (void)
14994 {
14995 unsigned long func_type = arm_current_func_type ();
14996 unsigned long save_reg_mask = 0;
14997 unsigned int reg;
14998
14999 if (IS_INTERRUPT (func_type))
15000 {
15001 unsigned int max_reg;
15002 /* Interrupt functions must not corrupt any registers,
15003 even call clobbered ones. If this is a leaf function
15004 we can just examine the registers used by the RTL, but
15005 otherwise we have to assume that whatever function is
15006 called might clobber anything, and so we have to save
15007 all the call-clobbered registers as well. */
15008 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
15009 /* FIQ handlers have registers r8 - r12 banked, so
15010 we only need to check r0 - r7, Normal ISRs only
15011 bank r14 and r15, so we must check up to r12.
15012 r13 is the stack pointer which is always preserved,
15013 so we do not need to consider it here. */
15014 max_reg = 7;
15015 else
15016 max_reg = 12;
15017
15018 for (reg = 0; reg <= max_reg; reg++)
15019 if (df_regs_ever_live_p (reg)
15020 || (! crtl->is_leaf && call_used_regs[reg]))
15021 save_reg_mask |= (1 << reg);
15022
15023 /* Also save the pic base register if necessary. */
15024 if (flag_pic
15025 && !TARGET_SINGLE_PIC_BASE
15026 && arm_pic_register != INVALID_REGNUM
15027 && crtl->uses_pic_offset_table)
15028 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15029 }
15030 else if (IS_VOLATILE(func_type))
15031 {
15032 /* For noreturn functions we historically omitted register saves
15033 altogether. However this really messes up debugging. As a
15034 compromise save just the frame pointers. Combined with the link
15035 register saved elsewhere this should be sufficient to get
15036 a backtrace. */
15037 if (frame_pointer_needed)
15038 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15039 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
15040 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15041 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
15042 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
15043 }
15044 else
15045 {
15046 /* In the normal case we only need to save those registers
15047 which are call saved and which are used by this function. */
15048 for (reg = 0; reg <= 11; reg++)
15049 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15050 save_reg_mask |= (1 << reg);
15051
15052 /* Handle the frame pointer as a special case. */
15053 if (frame_pointer_needed)
15054 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15055
15056 /* If we aren't loading the PIC register,
15057 don't stack it even though it may be live. */
15058 if (flag_pic
15059 && !TARGET_SINGLE_PIC_BASE
15060 && arm_pic_register != INVALID_REGNUM
15061 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
15062 || crtl->uses_pic_offset_table))
15063 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15064
15065 /* The prologue will copy SP into R0, so save it. */
15066 if (IS_STACKALIGN (func_type))
15067 save_reg_mask |= 1;
15068 }
15069
15070 /* Save registers so the exception handler can modify them. */
15071 if (crtl->calls_eh_return)
15072 {
15073 unsigned int i;
15074
15075 for (i = 0; ; i++)
15076 {
15077 reg = EH_RETURN_DATA_REGNO (i);
15078 if (reg == INVALID_REGNUM)
15079 break;
15080 save_reg_mask |= 1 << reg;
15081 }
15082 }
15083
15084 return save_reg_mask;
15085 }
15086
15087
15088 /* Compute the number of bytes used to store the static chain register on the
15089 stack, above the stack frame. We need to know this accurately to get the
15090 alignment of the rest of the stack frame correct. */
15091
15092 static int arm_compute_static_chain_stack_bytes (void)
15093 {
15094 unsigned long func_type = arm_current_func_type ();
15095 int static_chain_stack_bytes = 0;
15096
15097 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
15098 IS_NESTED (func_type) &&
15099 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
15100 static_chain_stack_bytes = 4;
15101
15102 return static_chain_stack_bytes;
15103 }
15104
15105
15106 /* Compute a bit mask of which registers need to be
15107 saved on the stack for the current function.
15108 This is used by arm_get_frame_offsets, which may add extra registers. */
15109
15110 static unsigned long
15111 arm_compute_save_reg_mask (void)
15112 {
15113 unsigned int save_reg_mask = 0;
15114 unsigned long func_type = arm_current_func_type ();
15115 unsigned int reg;
15116
15117 if (IS_NAKED (func_type))
15118 /* This should never really happen. */
15119 return 0;
15120
15121 /* If we are creating a stack frame, then we must save the frame pointer,
15122 IP (which will hold the old stack pointer), LR and the PC. */
15123 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15124 save_reg_mask |=
15125 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
15126 | (1 << IP_REGNUM)
15127 | (1 << LR_REGNUM)
15128 | (1 << PC_REGNUM);
15129
15130 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
15131
15132 /* Decide if we need to save the link register.
15133 Interrupt routines have their own banked link register,
15134 so they never need to save it.
15135 Otherwise if we do not use the link register we do not need to save
15136 it. If we are pushing other registers onto the stack however, we
15137 can save an instruction in the epilogue by pushing the link register
15138 now and then popping it back into the PC. This incurs extra memory
15139 accesses though, so we only do it when optimizing for size, and only
15140 if we know that we will not need a fancy return sequence. */
15141 if (df_regs_ever_live_p (LR_REGNUM)
15142 || (save_reg_mask
15143 && optimize_size
15144 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15145 && !crtl->calls_eh_return))
15146 save_reg_mask |= 1 << LR_REGNUM;
15147
15148 if (cfun->machine->lr_save_eliminated)
15149 save_reg_mask &= ~ (1 << LR_REGNUM);
15150
15151 if (TARGET_REALLY_IWMMXT
15152 && ((bit_count (save_reg_mask)
15153 + ARM_NUM_INTS (crtl->args.pretend_args_size +
15154 arm_compute_static_chain_stack_bytes())
15155 ) % 2) != 0)
15156 {
15157 /* The total number of registers that are going to be pushed
15158 onto the stack is odd. We need to ensure that the stack
15159 is 64-bit aligned before we start to save iWMMXt registers,
15160 and also before we start to create locals. (A local variable
15161 might be a double or long long which we will load/store using
15162 an iWMMXt instruction). Therefore we need to push another
15163 ARM register, so that the stack will be 64-bit aligned. We
15164 try to avoid using the arg registers (r0 -r3) as they might be
15165 used to pass values in a tail call. */
15166 for (reg = 4; reg <= 12; reg++)
15167 if ((save_reg_mask & (1 << reg)) == 0)
15168 break;
15169
15170 if (reg <= 12)
15171 save_reg_mask |= (1 << reg);
15172 else
15173 {
15174 cfun->machine->sibcall_blocked = 1;
15175 save_reg_mask |= (1 << 3);
15176 }
15177 }
15178
15179 /* We may need to push an additional register for use initializing the
15180 PIC base register. */
15181 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
15182 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
15183 {
15184 reg = thumb_find_work_register (1 << 4);
15185 if (!call_used_regs[reg])
15186 save_reg_mask |= (1 << reg);
15187 }
15188
15189 return save_reg_mask;
15190 }
15191
15192
15193 /* Compute a bit mask of which registers need to be
15194 saved on the stack for the current function. */
15195 static unsigned long
15196 thumb1_compute_save_reg_mask (void)
15197 {
15198 unsigned long mask;
15199 unsigned reg;
15200
15201 mask = 0;
15202 for (reg = 0; reg < 12; reg ++)
15203 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15204 mask |= 1 << reg;
15205
15206 if (flag_pic
15207 && !TARGET_SINGLE_PIC_BASE
15208 && arm_pic_register != INVALID_REGNUM
15209 && crtl->uses_pic_offset_table)
15210 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15211
15212 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
15213 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
15214 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15215
15216 /* LR will also be pushed if any lo regs are pushed. */
15217 if (mask & 0xff || thumb_force_lr_save ())
15218 mask |= (1 << LR_REGNUM);
15219
15220 /* Make sure we have a low work register if we need one.
15221 We will need one if we are going to push a high register,
15222 but we are not currently intending to push a low register. */
15223 if ((mask & 0xff) == 0
15224 && ((mask & 0x0f00) || TARGET_BACKTRACE))
15225 {
15226 /* Use thumb_find_work_register to choose which register
15227 we will use. If the register is live then we will
15228 have to push it. Use LAST_LO_REGNUM as our fallback
15229 choice for the register to select. */
15230 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
15231 /* Make sure the register returned by thumb_find_work_register is
15232 not part of the return value. */
15233 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
15234 reg = LAST_LO_REGNUM;
15235
15236 if (! call_used_regs[reg])
15237 mask |= 1 << reg;
15238 }
15239
15240 /* The 504 below is 8 bytes less than 512 because there are two possible
15241 alignment words. We can't tell here if they will be present or not so we
15242 have to play it safe and assume that they are. */
15243 if ((CALLER_INTERWORKING_SLOT_SIZE +
15244 ROUND_UP_WORD (get_frame_size ()) +
15245 crtl->outgoing_args_size) >= 504)
15246 {
15247 /* This is the same as the code in thumb1_expand_prologue() which
15248 determines which register to use for stack decrement. */
15249 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
15250 if (mask & (1 << reg))
15251 break;
15252
15253 if (reg > LAST_LO_REGNUM)
15254 {
15255 /* Make sure we have a register available for stack decrement. */
15256 mask |= 1 << LAST_LO_REGNUM;
15257 }
15258 }
15259
15260 return mask;
15261 }
15262
15263
15264 /* Return the number of bytes required to save VFP registers. */
15265 static int
15266 arm_get_vfp_saved_size (void)
15267 {
15268 unsigned int regno;
15269 int count;
15270 int saved;
15271
15272 saved = 0;
15273 /* Space for saved VFP registers. */
15274 if (TARGET_HARD_FLOAT && TARGET_VFP)
15275 {
15276 count = 0;
15277 for (regno = FIRST_VFP_REGNUM;
15278 regno < LAST_VFP_REGNUM;
15279 regno += 2)
15280 {
15281 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
15282 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
15283 {
15284 if (count > 0)
15285 {
15286 /* Workaround ARM10 VFPr1 bug. */
15287 if (count == 2 && !arm_arch6)
15288 count++;
15289 saved += count * 8;
15290 }
15291 count = 0;
15292 }
15293 else
15294 count++;
15295 }
15296 if (count > 0)
15297 {
15298 if (count == 2 && !arm_arch6)
15299 count++;
15300 saved += count * 8;
15301 }
15302 }
15303 return saved;
15304 }
15305
15306
15307 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
15308 everything bar the final return instruction. If simple_return is true,
15309 then do not output epilogue, because it has already been emitted in RTL. */
15310 const char *
15311 output_return_instruction (rtx operand, bool really_return, bool reverse,
15312 bool simple_return)
15313 {
15314 char conditional[10];
15315 char instr[100];
15316 unsigned reg;
15317 unsigned long live_regs_mask;
15318 unsigned long func_type;
15319 arm_stack_offsets *offsets;
15320
15321 func_type = arm_current_func_type ();
15322
15323 if (IS_NAKED (func_type))
15324 return "";
15325
15326 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15327 {
15328 /* If this function was declared non-returning, and we have
15329 found a tail call, then we have to trust that the called
15330 function won't return. */
15331 if (really_return)
15332 {
15333 rtx ops[2];
15334
15335 /* Otherwise, trap an attempted return by aborting. */
15336 ops[0] = operand;
15337 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
15338 : "abort");
15339 assemble_external_libcall (ops[1]);
15340 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
15341 }
15342
15343 return "";
15344 }
15345
15346 gcc_assert (!cfun->calls_alloca || really_return);
15347
15348 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
15349
15350 cfun->machine->return_used_this_function = 1;
15351
15352 offsets = arm_get_frame_offsets ();
15353 live_regs_mask = offsets->saved_regs_mask;
15354
15355 if (!simple_return && live_regs_mask)
15356 {
15357 const char * return_reg;
15358
15359 /* If we do not have any special requirements for function exit
15360 (e.g. interworking) then we can load the return address
15361 directly into the PC. Otherwise we must load it into LR. */
15362 if (really_return
15363 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
15364 return_reg = reg_names[PC_REGNUM];
15365 else
15366 return_reg = reg_names[LR_REGNUM];
15367
15368 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
15369 {
15370 /* There are three possible reasons for the IP register
15371 being saved. 1) a stack frame was created, in which case
15372 IP contains the old stack pointer, or 2) an ISR routine
15373 corrupted it, or 3) it was saved to align the stack on
15374 iWMMXt. In case 1, restore IP into SP, otherwise just
15375 restore IP. */
15376 if (frame_pointer_needed)
15377 {
15378 live_regs_mask &= ~ (1 << IP_REGNUM);
15379 live_regs_mask |= (1 << SP_REGNUM);
15380 }
15381 else
15382 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
15383 }
15384
15385 /* On some ARM architectures it is faster to use LDR rather than
15386 LDM to load a single register. On other architectures, the
15387 cost is the same. In 26 bit mode, or for exception handlers,
15388 we have to use LDM to load the PC so that the CPSR is also
15389 restored. */
15390 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
15391 if (live_regs_mask == (1U << reg))
15392 break;
15393
15394 if (reg <= LAST_ARM_REGNUM
15395 && (reg != LR_REGNUM
15396 || ! really_return
15397 || ! IS_INTERRUPT (func_type)))
15398 {
15399 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
15400 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
15401 }
15402 else
15403 {
15404 char *p;
15405 int first = 1;
15406
15407 /* Generate the load multiple instruction to restore the
15408 registers. Note we can get here, even if
15409 frame_pointer_needed is true, but only if sp already
15410 points to the base of the saved core registers. */
15411 if (live_regs_mask & (1 << SP_REGNUM))
15412 {
15413 unsigned HOST_WIDE_INT stack_adjust;
15414
15415 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
15416 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
15417
15418 if (stack_adjust && arm_arch5 && TARGET_ARM)
15419 if (TARGET_UNIFIED_ASM)
15420 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
15421 else
15422 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
15423 else
15424 {
15425 /* If we can't use ldmib (SA110 bug),
15426 then try to pop r3 instead. */
15427 if (stack_adjust)
15428 live_regs_mask |= 1 << 3;
15429
15430 if (TARGET_UNIFIED_ASM)
15431 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
15432 else
15433 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
15434 }
15435 }
15436 else
15437 if (TARGET_UNIFIED_ASM)
15438 sprintf (instr, "pop%s\t{", conditional);
15439 else
15440 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
15441
15442 p = instr + strlen (instr);
15443
15444 for (reg = 0; reg <= SP_REGNUM; reg++)
15445 if (live_regs_mask & (1 << reg))
15446 {
15447 int l = strlen (reg_names[reg]);
15448
15449 if (first)
15450 first = 0;
15451 else
15452 {
15453 memcpy (p, ", ", 2);
15454 p += 2;
15455 }
15456
15457 memcpy (p, "%|", 2);
15458 memcpy (p + 2, reg_names[reg], l);
15459 p += l + 2;
15460 }
15461
15462 if (live_regs_mask & (1 << LR_REGNUM))
15463 {
15464 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
15465 /* If returning from an interrupt, restore the CPSR. */
15466 if (IS_INTERRUPT (func_type))
15467 strcat (p, "^");
15468 }
15469 else
15470 strcpy (p, "}");
15471 }
15472
15473 output_asm_insn (instr, & operand);
15474
15475 /* See if we need to generate an extra instruction to
15476 perform the actual function return. */
15477 if (really_return
15478 && func_type != ARM_FT_INTERWORKED
15479 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
15480 {
15481 /* The return has already been handled
15482 by loading the LR into the PC. */
15483 return "";
15484 }
15485 }
15486
15487 if (really_return)
15488 {
15489 switch ((int) ARM_FUNC_TYPE (func_type))
15490 {
15491 case ARM_FT_ISR:
15492 case ARM_FT_FIQ:
15493 /* ??? This is wrong for unified assembly syntax. */
15494 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
15495 break;
15496
15497 case ARM_FT_INTERWORKED:
15498 sprintf (instr, "bx%s\t%%|lr", conditional);
15499 break;
15500
15501 case ARM_FT_EXCEPTION:
15502 /* ??? This is wrong for unified assembly syntax. */
15503 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
15504 break;
15505
15506 default:
15507 /* Use bx if it's available. */
15508 if (arm_arch5 || arm_arch4t)
15509 sprintf (instr, "bx%s\t%%|lr", conditional);
15510 else
15511 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
15512 break;
15513 }
15514
15515 output_asm_insn (instr, & operand);
15516 }
15517
15518 return "";
15519 }
15520
15521 /* Write the function name into the code section, directly preceding
15522 the function prologue.
15523
15524 Code will be output similar to this:
15525 t0
15526 .ascii "arm_poke_function_name", 0
15527 .align
15528 t1
15529 .word 0xff000000 + (t1 - t0)
15530 arm_poke_function_name
15531 mov ip, sp
15532 stmfd sp!, {fp, ip, lr, pc}
15533 sub fp, ip, #4
15534
15535 When performing a stack backtrace, code can inspect the value
15536 of 'pc' stored at 'fp' + 0. If the trace function then looks
15537 at location pc - 12 and the top 8 bits are set, then we know
15538 that there is a function name embedded immediately preceding this
15539 location and has length ((pc[-3]) & 0xff000000).
15540
15541 We assume that pc is declared as a pointer to an unsigned long.
15542
15543 It is of no benefit to output the function name if we are assembling
15544 a leaf function. These function types will not contain a stack
15545 backtrace structure, therefore it is not possible to determine the
15546 function name. */
15547 void
15548 arm_poke_function_name (FILE *stream, const char *name)
15549 {
15550 unsigned long alignlength;
15551 unsigned long length;
15552 rtx x;
15553
15554 length = strlen (name) + 1;
15555 alignlength = ROUND_UP_WORD (length);
15556
15557 ASM_OUTPUT_ASCII (stream, name, length);
15558 ASM_OUTPUT_ALIGN (stream, 2);
15559 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
15560 assemble_aligned_integer (UNITS_PER_WORD, x);
15561 }
15562
15563 /* Place some comments into the assembler stream
15564 describing the current function. */
15565 static void
15566 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
15567 {
15568 unsigned long func_type;
15569
15570 /* ??? Do we want to print some of the below anyway? */
15571 if (TARGET_THUMB1)
15572 return;
15573
15574 /* Sanity check. */
15575 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
15576
15577 func_type = arm_current_func_type ();
15578
15579 switch ((int) ARM_FUNC_TYPE (func_type))
15580 {
15581 default:
15582 case ARM_FT_NORMAL:
15583 break;
15584 case ARM_FT_INTERWORKED:
15585 asm_fprintf (f, "\t%@ Function supports interworking.\n");
15586 break;
15587 case ARM_FT_ISR:
15588 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
15589 break;
15590 case ARM_FT_FIQ:
15591 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
15592 break;
15593 case ARM_FT_EXCEPTION:
15594 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
15595 break;
15596 }
15597
15598 if (IS_NAKED (func_type))
15599 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
15600
15601 if (IS_VOLATILE (func_type))
15602 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
15603
15604 if (IS_NESTED (func_type))
15605 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
15606 if (IS_STACKALIGN (func_type))
15607 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
15608
15609 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
15610 crtl->args.size,
15611 crtl->args.pretend_args_size, frame_size);
15612
15613 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
15614 frame_pointer_needed,
15615 cfun->machine->uses_anonymous_args);
15616
15617 if (cfun->machine->lr_save_eliminated)
15618 asm_fprintf (f, "\t%@ link register save eliminated.\n");
15619
15620 if (crtl->calls_eh_return)
15621 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
15622
15623 }
15624
15625 static void
15626 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
15627 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
15628 {
15629 arm_stack_offsets *offsets;
15630
15631 if (TARGET_THUMB1)
15632 {
15633 int regno;
15634
15635 /* Emit any call-via-reg trampolines that are needed for v4t support
15636 of call_reg and call_value_reg type insns. */
15637 for (regno = 0; regno < LR_REGNUM; regno++)
15638 {
15639 rtx label = cfun->machine->call_via[regno];
15640
15641 if (label != NULL)
15642 {
15643 switch_to_section (function_section (current_function_decl));
15644 targetm.asm_out.internal_label (asm_out_file, "L",
15645 CODE_LABEL_NUMBER (label));
15646 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
15647 }
15648 }
15649
15650 /* ??? Probably not safe to set this here, since it assumes that a
15651 function will be emitted as assembly immediately after we generate
15652 RTL for it. This does not happen for inline functions. */
15653 cfun->machine->return_used_this_function = 0;
15654 }
15655 else /* TARGET_32BIT */
15656 {
15657 /* We need to take into account any stack-frame rounding. */
15658 offsets = arm_get_frame_offsets ();
15659
15660 gcc_assert (!use_return_insn (FALSE, NULL)
15661 || (cfun->machine->return_used_this_function != 0)
15662 || offsets->saved_regs == offsets->outgoing_args
15663 || frame_pointer_needed);
15664
15665 /* Reset the ARM-specific per-function variables. */
15666 after_arm_reorg = 0;
15667 }
15668 }
15669
15670 /* Generate and emit an insn that we will recognize as a push_multi.
15671 Unfortunately, since this insn does not reflect very well the actual
15672 semantics of the operation, we need to annotate the insn for the benefit
15673 of DWARF2 frame unwind information. */
15674 static rtx
15675 emit_multi_reg_push (unsigned long mask)
15676 {
15677 int num_regs = 0;
15678 int num_dwarf_regs;
15679 int i, j;
15680 rtx par;
15681 rtx dwarf;
15682 int dwarf_par_index;
15683 rtx tmp, reg;
15684
15685 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15686 if (mask & (1 << i))
15687 num_regs++;
15688
15689 gcc_assert (num_regs && num_regs <= 16);
15690
15691 /* We don't record the PC in the dwarf frame information. */
15692 num_dwarf_regs = num_regs;
15693 if (mask & (1 << PC_REGNUM))
15694 num_dwarf_regs--;
15695
15696 /* For the body of the insn we are going to generate an UNSPEC in
15697 parallel with several USEs. This allows the insn to be recognized
15698 by the push_multi pattern in the arm.md file.
15699
15700 The body of the insn looks something like this:
15701
15702 (parallel [
15703 (set (mem:BLK (pre_modify:SI (reg:SI sp)
15704 (const_int:SI <num>)))
15705 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
15706 (use (reg:SI XX))
15707 (use (reg:SI YY))
15708 ...
15709 ])
15710
15711 For the frame note however, we try to be more explicit and actually
15712 show each register being stored into the stack frame, plus a (single)
15713 decrement of the stack pointer. We do it this way in order to be
15714 friendly to the stack unwinding code, which only wants to see a single
15715 stack decrement per instruction. The RTL we generate for the note looks
15716 something like this:
15717
15718 (sequence [
15719 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
15720 (set (mem:SI (reg:SI sp)) (reg:SI r4))
15721 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
15722 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
15723 ...
15724 ])
15725
15726 FIXME:: In an ideal world the PRE_MODIFY would not exist and
15727 instead we'd have a parallel expression detailing all
15728 the stores to the various memory addresses so that debug
15729 information is more up-to-date. Remember however while writing
15730 this to take care of the constraints with the push instruction.
15731
15732 Note also that this has to be taken care of for the VFP registers.
15733
15734 For more see PR43399. */
15735
15736 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
15737 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
15738 dwarf_par_index = 1;
15739
15740 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15741 {
15742 if (mask & (1 << i))
15743 {
15744 reg = gen_rtx_REG (SImode, i);
15745
15746 XVECEXP (par, 0, 0)
15747 = gen_rtx_SET (VOIDmode,
15748 gen_frame_mem
15749 (BLKmode,
15750 gen_rtx_PRE_MODIFY (Pmode,
15751 stack_pointer_rtx,
15752 plus_constant
15753 (Pmode, stack_pointer_rtx,
15754 -4 * num_regs))
15755 ),
15756 gen_rtx_UNSPEC (BLKmode,
15757 gen_rtvec (1, reg),
15758 UNSPEC_PUSH_MULT));
15759
15760 if (i != PC_REGNUM)
15761 {
15762 tmp = gen_rtx_SET (VOIDmode,
15763 gen_frame_mem (SImode, stack_pointer_rtx),
15764 reg);
15765 RTX_FRAME_RELATED_P (tmp) = 1;
15766 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
15767 dwarf_par_index++;
15768 }
15769
15770 break;
15771 }
15772 }
15773
15774 for (j = 1, i++; j < num_regs; i++)
15775 {
15776 if (mask & (1 << i))
15777 {
15778 reg = gen_rtx_REG (SImode, i);
15779
15780 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
15781
15782 if (i != PC_REGNUM)
15783 {
15784 tmp
15785 = gen_rtx_SET (VOIDmode,
15786 gen_frame_mem
15787 (SImode,
15788 plus_constant (Pmode, stack_pointer_rtx,
15789 4 * j)),
15790 reg);
15791 RTX_FRAME_RELATED_P (tmp) = 1;
15792 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
15793 }
15794
15795 j++;
15796 }
15797 }
15798
15799 par = emit_insn (par);
15800
15801 tmp = gen_rtx_SET (VOIDmode,
15802 stack_pointer_rtx,
15803 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
15804 RTX_FRAME_RELATED_P (tmp) = 1;
15805 XVECEXP (dwarf, 0, 0) = tmp;
15806
15807 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15808
15809 return par;
15810 }
15811
15812 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
15813 SAVED_REGS_MASK shows which registers need to be restored.
15814
15815 Unfortunately, since this insn does not reflect very well the actual
15816 semantics of the operation, we need to annotate the insn for the benefit
15817 of DWARF2 frame unwind information. */
15818 static void
15819 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
15820 {
15821 int num_regs = 0;
15822 int i, j;
15823 rtx par;
15824 rtx dwarf = NULL_RTX;
15825 rtx tmp, reg;
15826 bool return_in_pc;
15827 int offset_adj;
15828 int emit_update;
15829
15830 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
15831 offset_adj = return_in_pc ? 1 : 0;
15832 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15833 if (saved_regs_mask & (1 << i))
15834 num_regs++;
15835
15836 gcc_assert (num_regs && num_regs <= 16);
15837
15838 /* If SP is in reglist, then we don't emit SP update insn. */
15839 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
15840
15841 /* The parallel needs to hold num_regs SETs
15842 and one SET for the stack update. */
15843 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
15844
15845 if (return_in_pc)
15846 {
15847 tmp = ret_rtx;
15848 XVECEXP (par, 0, 0) = tmp;
15849 }
15850
15851 if (emit_update)
15852 {
15853 /* Increment the stack pointer, based on there being
15854 num_regs 4-byte registers to restore. */
15855 tmp = gen_rtx_SET (VOIDmode,
15856 stack_pointer_rtx,
15857 plus_constant (Pmode,
15858 stack_pointer_rtx,
15859 4 * num_regs));
15860 RTX_FRAME_RELATED_P (tmp) = 1;
15861 XVECEXP (par, 0, offset_adj) = tmp;
15862 }
15863
15864 /* Now restore every reg, which may include PC. */
15865 for (j = 0, i = 0; j < num_regs; i++)
15866 if (saved_regs_mask & (1 << i))
15867 {
15868 reg = gen_rtx_REG (SImode, i);
15869 tmp = gen_rtx_SET (VOIDmode,
15870 reg,
15871 gen_frame_mem
15872 (SImode,
15873 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
15874 RTX_FRAME_RELATED_P (tmp) = 1;
15875 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
15876
15877 /* We need to maintain a sequence for DWARF info too. As dwarf info
15878 should not have PC, skip PC. */
15879 if (i != PC_REGNUM)
15880 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
15881
15882 j++;
15883 }
15884
15885 if (return_in_pc)
15886 par = emit_jump_insn (par);
15887 else
15888 par = emit_insn (par);
15889
15890 REG_NOTES (par) = dwarf;
15891 }
15892
15893 /* Generate and emit an insn pattern that we will recognize as a pop_multi
15894 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
15895
15896 Unfortunately, since this insn does not reflect very well the actual
15897 semantics of the operation, we need to annotate the insn for the benefit
15898 of DWARF2 frame unwind information. */
15899 static void
15900 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
15901 {
15902 int i, j;
15903 rtx par;
15904 rtx dwarf = NULL_RTX;
15905 rtx tmp, reg;
15906
15907 gcc_assert (num_regs && num_regs <= 32);
15908
15909 /* Workaround ARM10 VFPr1 bug. */
15910 if (num_regs == 2 && !arm_arch6)
15911 {
15912 if (first_reg == 15)
15913 first_reg--;
15914
15915 num_regs++;
15916 }
15917
15918 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
15919 there could be up to 32 D-registers to restore.
15920 If there are more than 16 D-registers, make two recursive calls,
15921 each of which emits one pop_multi instruction. */
15922 if (num_regs > 16)
15923 {
15924 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
15925 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
15926 return;
15927 }
15928
15929 /* The parallel needs to hold num_regs SETs
15930 and one SET for the stack update. */
15931 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
15932
15933 /* Increment the stack pointer, based on there being
15934 num_regs 8-byte registers to restore. */
15935 tmp = gen_rtx_SET (VOIDmode,
15936 base_reg,
15937 plus_constant (Pmode, base_reg, 8 * num_regs));
15938 RTX_FRAME_RELATED_P (tmp) = 1;
15939 XVECEXP (par, 0, 0) = tmp;
15940
15941 /* Now show every reg that will be restored, using a SET for each. */
15942 for (j = 0, i=first_reg; j < num_regs; i += 2)
15943 {
15944 reg = gen_rtx_REG (DFmode, i);
15945
15946 tmp = gen_rtx_SET (VOIDmode,
15947 reg,
15948 gen_frame_mem
15949 (DFmode,
15950 plus_constant (Pmode, base_reg, 8 * j)));
15951 RTX_FRAME_RELATED_P (tmp) = 1;
15952 XVECEXP (par, 0, j + 1) = tmp;
15953
15954 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
15955
15956 j++;
15957 }
15958
15959 par = emit_insn (par);
15960 REG_NOTES (par) = dwarf;
15961 }
15962
15963 /* Calculate the size of the return value that is passed in registers. */
15964 static unsigned
15965 arm_size_return_regs (void)
15966 {
15967 enum machine_mode mode;
15968
15969 if (crtl->return_rtx != 0)
15970 mode = GET_MODE (crtl->return_rtx);
15971 else
15972 mode = DECL_MODE (DECL_RESULT (current_function_decl));
15973
15974 return GET_MODE_SIZE (mode);
15975 }
15976
15977 /* Return true if the current function needs to save/restore LR. */
15978 static bool
15979 thumb_force_lr_save (void)
15980 {
15981 return !cfun->machine->lr_save_eliminated
15982 && (!leaf_function_p ()
15983 || thumb_far_jump_used_p ()
15984 || df_regs_ever_live_p (LR_REGNUM));
15985 }
15986
15987
15988 /* Return true if r3 is used by any of the tail call insns in the
15989 current function. */
15990 static bool
15991 any_sibcall_uses_r3 (void)
15992 {
15993 edge_iterator ei;
15994 edge e;
15995
15996 if (!crtl->tail_call_emit)
15997 return false;
15998 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
15999 if (e->flags & EDGE_SIBCALL)
16000 {
16001 rtx call = BB_END (e->src);
16002 if (!CALL_P (call))
16003 call = prev_nonnote_nondebug_insn (call);
16004 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
16005 if (find_regno_fusage (call, USE, 3))
16006 return true;
16007 }
16008 return false;
16009 }
16010
16011
16012 /* Compute the distance from register FROM to register TO.
16013 These can be the arg pointer (26), the soft frame pointer (25),
16014 the stack pointer (13) or the hard frame pointer (11).
16015 In thumb mode r7 is used as the soft frame pointer, if needed.
16016 Typical stack layout looks like this:
16017
16018 old stack pointer -> | |
16019 ----
16020 | | \
16021 | | saved arguments for
16022 | | vararg functions
16023 | | /
16024 --
16025 hard FP & arg pointer -> | | \
16026 | | stack
16027 | | frame
16028 | | /
16029 --
16030 | | \
16031 | | call saved
16032 | | registers
16033 soft frame pointer -> | | /
16034 --
16035 | | \
16036 | | local
16037 | | variables
16038 locals base pointer -> | | /
16039 --
16040 | | \
16041 | | outgoing
16042 | | arguments
16043 current stack pointer -> | | /
16044 --
16045
16046 For a given function some or all of these stack components
16047 may not be needed, giving rise to the possibility of
16048 eliminating some of the registers.
16049
16050 The values returned by this function must reflect the behavior
16051 of arm_expand_prologue() and arm_compute_save_reg_mask().
16052
16053 The sign of the number returned reflects the direction of stack
16054 growth, so the values are positive for all eliminations except
16055 from the soft frame pointer to the hard frame pointer.
16056
16057 SFP may point just inside the local variables block to ensure correct
16058 alignment. */
16059
16060
16061 /* Calculate stack offsets. These are used to calculate register elimination
16062 offsets and in prologue/epilogue code. Also calculates which registers
16063 should be saved. */
16064
16065 static arm_stack_offsets *
16066 arm_get_frame_offsets (void)
16067 {
16068 struct arm_stack_offsets *offsets;
16069 unsigned long func_type;
16070 int leaf;
16071 int saved;
16072 int core_saved;
16073 HOST_WIDE_INT frame_size;
16074 int i;
16075
16076 offsets = &cfun->machine->stack_offsets;
16077
16078 /* We need to know if we are a leaf function. Unfortunately, it
16079 is possible to be called after start_sequence has been called,
16080 which causes get_insns to return the insns for the sequence,
16081 not the function, which will cause leaf_function_p to return
16082 the incorrect result.
16083
16084 to know about leaf functions once reload has completed, and the
16085 frame size cannot be changed after that time, so we can safely
16086 use the cached value. */
16087
16088 if (reload_completed)
16089 return offsets;
16090
16091 /* Initially this is the size of the local variables. It will translated
16092 into an offset once we have determined the size of preceding data. */
16093 frame_size = ROUND_UP_WORD (get_frame_size ());
16094
16095 leaf = leaf_function_p ();
16096
16097 /* Space for variadic functions. */
16098 offsets->saved_args = crtl->args.pretend_args_size;
16099
16100 /* In Thumb mode this is incorrect, but never used. */
16101 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
16102 arm_compute_static_chain_stack_bytes();
16103
16104 if (TARGET_32BIT)
16105 {
16106 unsigned int regno;
16107
16108 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
16109 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16110 saved = core_saved;
16111
16112 /* We know that SP will be doubleword aligned on entry, and we must
16113 preserve that condition at any subroutine call. We also require the
16114 soft frame pointer to be doubleword aligned. */
16115
16116 if (TARGET_REALLY_IWMMXT)
16117 {
16118 /* Check for the call-saved iWMMXt registers. */
16119 for (regno = FIRST_IWMMXT_REGNUM;
16120 regno <= LAST_IWMMXT_REGNUM;
16121 regno++)
16122 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
16123 saved += 8;
16124 }
16125
16126 func_type = arm_current_func_type ();
16127 /* Space for saved VFP registers. */
16128 if (! IS_VOLATILE (func_type)
16129 && TARGET_HARD_FLOAT && TARGET_VFP)
16130 saved += arm_get_vfp_saved_size ();
16131 }
16132 else /* TARGET_THUMB1 */
16133 {
16134 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
16135 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16136 saved = core_saved;
16137 if (TARGET_BACKTRACE)
16138 saved += 16;
16139 }
16140
16141 /* Saved registers include the stack frame. */
16142 offsets->saved_regs = offsets->saved_args + saved +
16143 arm_compute_static_chain_stack_bytes();
16144 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
16145 /* A leaf function does not need any stack alignment if it has nothing
16146 on the stack. */
16147 if (leaf && frame_size == 0
16148 /* However if it calls alloca(), we have a dynamically allocated
16149 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
16150 && ! cfun->calls_alloca)
16151 {
16152 offsets->outgoing_args = offsets->soft_frame;
16153 offsets->locals_base = offsets->soft_frame;
16154 return offsets;
16155 }
16156
16157 /* Ensure SFP has the correct alignment. */
16158 if (ARM_DOUBLEWORD_ALIGN
16159 && (offsets->soft_frame & 7))
16160 {
16161 offsets->soft_frame += 4;
16162 /* Try to align stack by pushing an extra reg. Don't bother doing this
16163 when there is a stack frame as the alignment will be rolled into
16164 the normal stack adjustment. */
16165 if (frame_size + crtl->outgoing_args_size == 0)
16166 {
16167 int reg = -1;
16168
16169 /* If it is safe to use r3, then do so. This sometimes
16170 generates better code on Thumb-2 by avoiding the need to
16171 use 32-bit push/pop instructions. */
16172 if (! any_sibcall_uses_r3 ()
16173 && arm_size_return_regs () <= 12
16174 && (offsets->saved_regs_mask & (1 << 3)) == 0)
16175 {
16176 reg = 3;
16177 }
16178 else
16179 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
16180 {
16181 /* Avoid fixed registers; they may be changed at
16182 arbitrary times so it's unsafe to restore them
16183 during the epilogue. */
16184 if (!fixed_regs[i]
16185 && (offsets->saved_regs_mask & (1 << i)) == 0)
16186 {
16187 reg = i;
16188 break;
16189 }
16190 }
16191
16192 if (reg != -1)
16193 {
16194 offsets->saved_regs += 4;
16195 offsets->saved_regs_mask |= (1 << reg);
16196 }
16197 }
16198 }
16199
16200 offsets->locals_base = offsets->soft_frame + frame_size;
16201 offsets->outgoing_args = (offsets->locals_base
16202 + crtl->outgoing_args_size);
16203
16204 if (ARM_DOUBLEWORD_ALIGN)
16205 {
16206 /* Ensure SP remains doubleword aligned. */
16207 if (offsets->outgoing_args & 7)
16208 offsets->outgoing_args += 4;
16209 gcc_assert (!(offsets->outgoing_args & 7));
16210 }
16211
16212 return offsets;
16213 }
16214
16215
16216 /* Calculate the relative offsets for the different stack pointers. Positive
16217 offsets are in the direction of stack growth. */
16218
16219 HOST_WIDE_INT
16220 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16221 {
16222 arm_stack_offsets *offsets;
16223
16224 offsets = arm_get_frame_offsets ();
16225
16226 /* OK, now we have enough information to compute the distances.
16227 There must be an entry in these switch tables for each pair
16228 of registers in ELIMINABLE_REGS, even if some of the entries
16229 seem to be redundant or useless. */
16230 switch (from)
16231 {
16232 case ARG_POINTER_REGNUM:
16233 switch (to)
16234 {
16235 case THUMB_HARD_FRAME_POINTER_REGNUM:
16236 return 0;
16237
16238 case FRAME_POINTER_REGNUM:
16239 /* This is the reverse of the soft frame pointer
16240 to hard frame pointer elimination below. */
16241 return offsets->soft_frame - offsets->saved_args;
16242
16243 case ARM_HARD_FRAME_POINTER_REGNUM:
16244 /* This is only non-zero in the case where the static chain register
16245 is stored above the frame. */
16246 return offsets->frame - offsets->saved_args - 4;
16247
16248 case STACK_POINTER_REGNUM:
16249 /* If nothing has been pushed on the stack at all
16250 then this will return -4. This *is* correct! */
16251 return offsets->outgoing_args - (offsets->saved_args + 4);
16252
16253 default:
16254 gcc_unreachable ();
16255 }
16256 gcc_unreachable ();
16257
16258 case FRAME_POINTER_REGNUM:
16259 switch (to)
16260 {
16261 case THUMB_HARD_FRAME_POINTER_REGNUM:
16262 return 0;
16263
16264 case ARM_HARD_FRAME_POINTER_REGNUM:
16265 /* The hard frame pointer points to the top entry in the
16266 stack frame. The soft frame pointer to the bottom entry
16267 in the stack frame. If there is no stack frame at all,
16268 then they are identical. */
16269
16270 return offsets->frame - offsets->soft_frame;
16271
16272 case STACK_POINTER_REGNUM:
16273 return offsets->outgoing_args - offsets->soft_frame;
16274
16275 default:
16276 gcc_unreachable ();
16277 }
16278 gcc_unreachable ();
16279
16280 default:
16281 /* You cannot eliminate from the stack pointer.
16282 In theory you could eliminate from the hard frame
16283 pointer to the stack pointer, but this will never
16284 happen, since if a stack frame is not needed the
16285 hard frame pointer will never be used. */
16286 gcc_unreachable ();
16287 }
16288 }
16289
16290 /* Given FROM and TO register numbers, say whether this elimination is
16291 allowed. Frame pointer elimination is automatically handled.
16292
16293 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
16294 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
16295 pointer, we must eliminate FRAME_POINTER_REGNUM into
16296 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
16297 ARG_POINTER_REGNUM. */
16298
16299 bool
16300 arm_can_eliminate (const int from, const int to)
16301 {
16302 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
16303 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
16304 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
16305 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
16306 true);
16307 }
16308
16309 /* Emit RTL to save coprocessor registers on function entry. Returns the
16310 number of bytes pushed. */
16311
16312 static int
16313 arm_save_coproc_regs(void)
16314 {
16315 int saved_size = 0;
16316 unsigned reg;
16317 unsigned start_reg;
16318 rtx insn;
16319
16320 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
16321 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
16322 {
16323 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16324 insn = gen_rtx_MEM (V2SImode, insn);
16325 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
16326 RTX_FRAME_RELATED_P (insn) = 1;
16327 saved_size += 8;
16328 }
16329
16330 if (TARGET_HARD_FLOAT && TARGET_VFP)
16331 {
16332 start_reg = FIRST_VFP_REGNUM;
16333
16334 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
16335 {
16336 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
16337 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
16338 {
16339 if (start_reg != reg)
16340 saved_size += vfp_emit_fstmd (start_reg,
16341 (reg - start_reg) / 2);
16342 start_reg = reg + 2;
16343 }
16344 }
16345 if (start_reg != reg)
16346 saved_size += vfp_emit_fstmd (start_reg,
16347 (reg - start_reg) / 2);
16348 }
16349 return saved_size;
16350 }
16351
16352
16353 /* Set the Thumb frame pointer from the stack pointer. */
16354
16355 static void
16356 thumb_set_frame_pointer (arm_stack_offsets *offsets)
16357 {
16358 HOST_WIDE_INT amount;
16359 rtx insn, dwarf;
16360
16361 amount = offsets->outgoing_args - offsets->locals_base;
16362 if (amount < 1024)
16363 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16364 stack_pointer_rtx, GEN_INT (amount)));
16365 else
16366 {
16367 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
16368 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
16369 expects the first two operands to be the same. */
16370 if (TARGET_THUMB2)
16371 {
16372 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16373 stack_pointer_rtx,
16374 hard_frame_pointer_rtx));
16375 }
16376 else
16377 {
16378 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16379 hard_frame_pointer_rtx,
16380 stack_pointer_rtx));
16381 }
16382 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
16383 plus_constant (Pmode, stack_pointer_rtx, amount));
16384 RTX_FRAME_RELATED_P (dwarf) = 1;
16385 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16386 }
16387
16388 RTX_FRAME_RELATED_P (insn) = 1;
16389 }
16390
16391 /* Generate the prologue instructions for entry into an ARM or Thumb-2
16392 function. */
16393 void
16394 arm_expand_prologue (void)
16395 {
16396 rtx amount;
16397 rtx insn;
16398 rtx ip_rtx;
16399 unsigned long live_regs_mask;
16400 unsigned long func_type;
16401 int fp_offset = 0;
16402 int saved_pretend_args = 0;
16403 int saved_regs = 0;
16404 unsigned HOST_WIDE_INT args_to_push;
16405 arm_stack_offsets *offsets;
16406
16407 func_type = arm_current_func_type ();
16408
16409 /* Naked functions don't have prologues. */
16410 if (IS_NAKED (func_type))
16411 return;
16412
16413 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
16414 args_to_push = crtl->args.pretend_args_size;
16415
16416 /* Compute which register we will have to save onto the stack. */
16417 offsets = arm_get_frame_offsets ();
16418 live_regs_mask = offsets->saved_regs_mask;
16419
16420 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
16421
16422 if (IS_STACKALIGN (func_type))
16423 {
16424 rtx r0, r1;
16425
16426 /* Handle a word-aligned stack pointer. We generate the following:
16427
16428 mov r0, sp
16429 bic r1, r0, #7
16430 mov sp, r1
16431 <save and restore r0 in normal prologue/epilogue>
16432 mov sp, r0
16433 bx lr
16434
16435 The unwinder doesn't need to know about the stack realignment.
16436 Just tell it we saved SP in r0. */
16437 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
16438
16439 r0 = gen_rtx_REG (SImode, 0);
16440 r1 = gen_rtx_REG (SImode, 1);
16441
16442 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
16443 RTX_FRAME_RELATED_P (insn) = 1;
16444 add_reg_note (insn, REG_CFA_REGISTER, NULL);
16445
16446 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
16447
16448 /* ??? The CFA changes here, which may cause GDB to conclude that it
16449 has entered a different function. That said, the unwind info is
16450 correct, individually, before and after this instruction because
16451 we've described the save of SP, which will override the default
16452 handling of SP as restoring from the CFA. */
16453 emit_insn (gen_movsi (stack_pointer_rtx, r1));
16454 }
16455
16456 /* For APCS frames, if IP register is clobbered
16457 when creating frame, save that register in a special
16458 way. */
16459 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
16460 {
16461 if (IS_INTERRUPT (func_type))
16462 {
16463 /* Interrupt functions must not corrupt any registers.
16464 Creating a frame pointer however, corrupts the IP
16465 register, so we must push it first. */
16466 emit_multi_reg_push (1 << IP_REGNUM);
16467
16468 /* Do not set RTX_FRAME_RELATED_P on this insn.
16469 The dwarf stack unwinding code only wants to see one
16470 stack decrement per function, and this is not it. If
16471 this instruction is labeled as being part of the frame
16472 creation sequence then dwarf2out_frame_debug_expr will
16473 die when it encounters the assignment of IP to FP
16474 later on, since the use of SP here establishes SP as
16475 the CFA register and not IP.
16476
16477 Anyway this instruction is not really part of the stack
16478 frame creation although it is part of the prologue. */
16479 }
16480 else if (IS_NESTED (func_type))
16481 {
16482 /* The Static chain register is the same as the IP register
16483 used as a scratch register during stack frame creation.
16484 To get around this need to find somewhere to store IP
16485 whilst the frame is being created. We try the following
16486 places in order:
16487
16488 1. The last argument register.
16489 2. A slot on the stack above the frame. (This only
16490 works if the function is not a varargs function).
16491 3. Register r3, after pushing the argument registers
16492 onto the stack.
16493
16494 Note - we only need to tell the dwarf2 backend about the SP
16495 adjustment in the second variant; the static chain register
16496 doesn't need to be unwound, as it doesn't contain a value
16497 inherited from the caller. */
16498
16499 if (df_regs_ever_live_p (3) == false)
16500 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16501 else if (args_to_push == 0)
16502 {
16503 rtx dwarf;
16504
16505 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
16506 saved_regs += 4;
16507
16508 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
16509 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
16510 fp_offset = 4;
16511
16512 /* Just tell the dwarf backend that we adjusted SP. */
16513 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16514 plus_constant (Pmode, stack_pointer_rtx,
16515 -fp_offset));
16516 RTX_FRAME_RELATED_P (insn) = 1;
16517 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16518 }
16519 else
16520 {
16521 /* Store the args on the stack. */
16522 if (cfun->machine->uses_anonymous_args)
16523 insn = emit_multi_reg_push
16524 ((0xf0 >> (args_to_push / 4)) & 0xf);
16525 else
16526 insn = emit_insn
16527 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16528 GEN_INT (- args_to_push)));
16529
16530 RTX_FRAME_RELATED_P (insn) = 1;
16531
16532 saved_pretend_args = 1;
16533 fp_offset = args_to_push;
16534 args_to_push = 0;
16535
16536 /* Now reuse r3 to preserve IP. */
16537 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16538 }
16539 }
16540
16541 insn = emit_set_insn (ip_rtx,
16542 plus_constant (Pmode, stack_pointer_rtx,
16543 fp_offset));
16544 RTX_FRAME_RELATED_P (insn) = 1;
16545 }
16546
16547 if (args_to_push)
16548 {
16549 /* Push the argument registers, or reserve space for them. */
16550 if (cfun->machine->uses_anonymous_args)
16551 insn = emit_multi_reg_push
16552 ((0xf0 >> (args_to_push / 4)) & 0xf);
16553 else
16554 insn = emit_insn
16555 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16556 GEN_INT (- args_to_push)));
16557 RTX_FRAME_RELATED_P (insn) = 1;
16558 }
16559
16560 /* If this is an interrupt service routine, and the link register
16561 is going to be pushed, and we're not generating extra
16562 push of IP (needed when frame is needed and frame layout if apcs),
16563 subtracting four from LR now will mean that the function return
16564 can be done with a single instruction. */
16565 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
16566 && (live_regs_mask & (1 << LR_REGNUM)) != 0
16567 && !(frame_pointer_needed && TARGET_APCS_FRAME)
16568 && TARGET_ARM)
16569 {
16570 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
16571
16572 emit_set_insn (lr, plus_constant (SImode, lr, -4));
16573 }
16574
16575 if (live_regs_mask)
16576 {
16577 saved_regs += bit_count (live_regs_mask) * 4;
16578 if (optimize_size && !frame_pointer_needed
16579 && saved_regs == offsets->saved_regs - offsets->saved_args)
16580 {
16581 /* If no coprocessor registers are being pushed and we don't have
16582 to worry about a frame pointer then push extra registers to
16583 create the stack frame. This is done is a way that does not
16584 alter the frame layout, so is independent of the epilogue. */
16585 int n;
16586 int frame;
16587 n = 0;
16588 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
16589 n++;
16590 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
16591 if (frame && n * 4 >= frame)
16592 {
16593 n = frame / 4;
16594 live_regs_mask |= (1 << n) - 1;
16595 saved_regs += frame;
16596 }
16597 }
16598 insn = emit_multi_reg_push (live_regs_mask);
16599 RTX_FRAME_RELATED_P (insn) = 1;
16600 }
16601
16602 if (! IS_VOLATILE (func_type))
16603 saved_regs += arm_save_coproc_regs ();
16604
16605 if (frame_pointer_needed && TARGET_ARM)
16606 {
16607 /* Create the new frame pointer. */
16608 if (TARGET_APCS_FRAME)
16609 {
16610 insn = GEN_INT (-(4 + args_to_push + fp_offset));
16611 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
16612 RTX_FRAME_RELATED_P (insn) = 1;
16613
16614 if (IS_NESTED (func_type))
16615 {
16616 /* Recover the static chain register. */
16617 if (!df_regs_ever_live_p (3)
16618 || saved_pretend_args)
16619 insn = gen_rtx_REG (SImode, 3);
16620 else /* if (crtl->args.pretend_args_size == 0) */
16621 {
16622 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
16623 insn = gen_frame_mem (SImode, insn);
16624 }
16625 emit_set_insn (ip_rtx, insn);
16626 /* Add a USE to stop propagate_one_insn() from barfing. */
16627 emit_insn (gen_prologue_use (ip_rtx));
16628 }
16629 }
16630 else
16631 {
16632 insn = GEN_INT (saved_regs - 4);
16633 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16634 stack_pointer_rtx, insn));
16635 RTX_FRAME_RELATED_P (insn) = 1;
16636 }
16637 }
16638
16639 if (flag_stack_usage_info)
16640 current_function_static_stack_size
16641 = offsets->outgoing_args - offsets->saved_args;
16642
16643 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
16644 {
16645 /* This add can produce multiple insns for a large constant, so we
16646 need to get tricky. */
16647 rtx last = get_last_insn ();
16648
16649 amount = GEN_INT (offsets->saved_args + saved_regs
16650 - offsets->outgoing_args);
16651
16652 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16653 amount));
16654 do
16655 {
16656 last = last ? NEXT_INSN (last) : get_insns ();
16657 RTX_FRAME_RELATED_P (last) = 1;
16658 }
16659 while (last != insn);
16660
16661 /* If the frame pointer is needed, emit a special barrier that
16662 will prevent the scheduler from moving stores to the frame
16663 before the stack adjustment. */
16664 if (frame_pointer_needed)
16665 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
16666 hard_frame_pointer_rtx));
16667 }
16668
16669
16670 if (frame_pointer_needed && TARGET_THUMB2)
16671 thumb_set_frame_pointer (offsets);
16672
16673 if (flag_pic && arm_pic_register != INVALID_REGNUM)
16674 {
16675 unsigned long mask;
16676
16677 mask = live_regs_mask;
16678 mask &= THUMB2_WORK_REGS;
16679 if (!IS_NESTED (func_type))
16680 mask |= (1 << IP_REGNUM);
16681 arm_load_pic_register (mask);
16682 }
16683
16684 /* If we are profiling, make sure no instructions are scheduled before
16685 the call to mcount. Similarly if the user has requested no
16686 scheduling in the prolog. Similarly if we want non-call exceptions
16687 using the EABI unwinder, to prevent faulting instructions from being
16688 swapped with a stack adjustment. */
16689 if (crtl->profile || !TARGET_SCHED_PROLOG
16690 || (arm_except_unwind_info (&global_options) == UI_TARGET
16691 && cfun->can_throw_non_call_exceptions))
16692 emit_insn (gen_blockage ());
16693
16694 /* If the link register is being kept alive, with the return address in it,
16695 then make sure that it does not get reused by the ce2 pass. */
16696 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
16697 cfun->machine->lr_save_eliminated = 1;
16698 }
16699 \f
16700 /* Print condition code to STREAM. Helper function for arm_print_operand. */
16701 static void
16702 arm_print_condition (FILE *stream)
16703 {
16704 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
16705 {
16706 /* Branch conversion is not implemented for Thumb-2. */
16707 if (TARGET_THUMB)
16708 {
16709 output_operand_lossage ("predicated Thumb instruction");
16710 return;
16711 }
16712 if (current_insn_predicate != NULL)
16713 {
16714 output_operand_lossage
16715 ("predicated instruction in conditional sequence");
16716 return;
16717 }
16718
16719 fputs (arm_condition_codes[arm_current_cc], stream);
16720 }
16721 else if (current_insn_predicate)
16722 {
16723 enum arm_cond_code code;
16724
16725 if (TARGET_THUMB1)
16726 {
16727 output_operand_lossage ("predicated Thumb instruction");
16728 return;
16729 }
16730
16731 code = get_arm_condition_code (current_insn_predicate);
16732 fputs (arm_condition_codes[code], stream);
16733 }
16734 }
16735
16736
16737 /* If CODE is 'd', then the X is a condition operand and the instruction
16738 should only be executed if the condition is true.
16739 if CODE is 'D', then the X is a condition operand and the instruction
16740 should only be executed if the condition is false: however, if the mode
16741 of the comparison is CCFPEmode, then always execute the instruction -- we
16742 do this because in these circumstances !GE does not necessarily imply LT;
16743 in these cases the instruction pattern will take care to make sure that
16744 an instruction containing %d will follow, thereby undoing the effects of
16745 doing this instruction unconditionally.
16746 If CODE is 'N' then X is a floating point operand that must be negated
16747 before output.
16748 If CODE is 'B' then output a bitwise inverted value of X (a const int).
16749 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
16750 static void
16751 arm_print_operand (FILE *stream, rtx x, int code)
16752 {
16753 switch (code)
16754 {
16755 case '@':
16756 fputs (ASM_COMMENT_START, stream);
16757 return;
16758
16759 case '_':
16760 fputs (user_label_prefix, stream);
16761 return;
16762
16763 case '|':
16764 fputs (REGISTER_PREFIX, stream);
16765 return;
16766
16767 case '?':
16768 arm_print_condition (stream);
16769 return;
16770
16771 case '(':
16772 /* Nothing in unified syntax, otherwise the current condition code. */
16773 if (!TARGET_UNIFIED_ASM)
16774 arm_print_condition (stream);
16775 break;
16776
16777 case ')':
16778 /* The current condition code in unified syntax, otherwise nothing. */
16779 if (TARGET_UNIFIED_ASM)
16780 arm_print_condition (stream);
16781 break;
16782
16783 case '.':
16784 /* The current condition code for a condition code setting instruction.
16785 Preceded by 's' in unified syntax, otherwise followed by 's'. */
16786 if (TARGET_UNIFIED_ASM)
16787 {
16788 fputc('s', stream);
16789 arm_print_condition (stream);
16790 }
16791 else
16792 {
16793 arm_print_condition (stream);
16794 fputc('s', stream);
16795 }
16796 return;
16797
16798 case '!':
16799 /* If the instruction is conditionally executed then print
16800 the current condition code, otherwise print 's'. */
16801 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
16802 if (current_insn_predicate)
16803 arm_print_condition (stream);
16804 else
16805 fputc('s', stream);
16806 break;
16807
16808 /* %# is a "break" sequence. It doesn't output anything, but is used to
16809 separate e.g. operand numbers from following text, if that text consists
16810 of further digits which we don't want to be part of the operand
16811 number. */
16812 case '#':
16813 return;
16814
16815 case 'N':
16816 {
16817 REAL_VALUE_TYPE r;
16818 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16819 r = real_value_negate (&r);
16820 fprintf (stream, "%s", fp_const_from_val (&r));
16821 }
16822 return;
16823
16824 /* An integer or symbol address without a preceding # sign. */
16825 case 'c':
16826 switch (GET_CODE (x))
16827 {
16828 case CONST_INT:
16829 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16830 break;
16831
16832 case SYMBOL_REF:
16833 output_addr_const (stream, x);
16834 break;
16835
16836 case CONST:
16837 if (GET_CODE (XEXP (x, 0)) == PLUS
16838 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
16839 {
16840 output_addr_const (stream, x);
16841 break;
16842 }
16843 /* Fall through. */
16844
16845 default:
16846 output_operand_lossage ("Unsupported operand for code '%c'", code);
16847 }
16848 return;
16849
16850 /* An integer that we want to print in HEX. */
16851 case 'x':
16852 switch (GET_CODE (x))
16853 {
16854 case CONST_INT:
16855 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16856 break;
16857
16858 default:
16859 output_operand_lossage ("Unsupported operand for code '%c'", code);
16860 }
16861 return;
16862
16863 case 'B':
16864 if (GET_CODE (x) == CONST_INT)
16865 {
16866 HOST_WIDE_INT val;
16867 val = ARM_SIGN_EXTEND (~INTVAL (x));
16868 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
16869 }
16870 else
16871 {
16872 putc ('~', stream);
16873 output_addr_const (stream, x);
16874 }
16875 return;
16876
16877 case 'L':
16878 /* The low 16 bits of an immediate constant. */
16879 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
16880 return;
16881
16882 case 'i':
16883 fprintf (stream, "%s", arithmetic_instr (x, 1));
16884 return;
16885
16886 case 'I':
16887 fprintf (stream, "%s", arithmetic_instr (x, 0));
16888 return;
16889
16890 case 'S':
16891 {
16892 HOST_WIDE_INT val;
16893 const char *shift;
16894
16895 if (!shift_operator (x, SImode))
16896 {
16897 output_operand_lossage ("invalid shift operand");
16898 break;
16899 }
16900
16901 shift = shift_op (x, &val);
16902
16903 if (shift)
16904 {
16905 fprintf (stream, ", %s ", shift);
16906 if (val == -1)
16907 arm_print_operand (stream, XEXP (x, 1), 0);
16908 else
16909 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
16910 }
16911 }
16912 return;
16913
16914 /* An explanation of the 'Q', 'R' and 'H' register operands:
16915
16916 In a pair of registers containing a DI or DF value the 'Q'
16917 operand returns the register number of the register containing
16918 the least significant part of the value. The 'R' operand returns
16919 the register number of the register containing the most
16920 significant part of the value.
16921
16922 The 'H' operand returns the higher of the two register numbers.
16923 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
16924 same as the 'Q' operand, since the most significant part of the
16925 value is held in the lower number register. The reverse is true
16926 on systems where WORDS_BIG_ENDIAN is false.
16927
16928 The purpose of these operands is to distinguish between cases
16929 where the endian-ness of the values is important (for example
16930 when they are added together), and cases where the endian-ness
16931 is irrelevant, but the order of register operations is important.
16932 For example when loading a value from memory into a register
16933 pair, the endian-ness does not matter. Provided that the value
16934 from the lower memory address is put into the lower numbered
16935 register, and the value from the higher address is put into the
16936 higher numbered register, the load will work regardless of whether
16937 the value being loaded is big-wordian or little-wordian. The
16938 order of the two register loads can matter however, if the address
16939 of the memory location is actually held in one of the registers
16940 being overwritten by the load.
16941
16942 The 'Q' and 'R' constraints are also available for 64-bit
16943 constants. */
16944 case 'Q':
16945 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16946 {
16947 rtx part = gen_lowpart (SImode, x);
16948 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16949 return;
16950 }
16951
16952 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16953 {
16954 output_operand_lossage ("invalid operand for code '%c'", code);
16955 return;
16956 }
16957
16958 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
16959 return;
16960
16961 case 'R':
16962 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16963 {
16964 enum machine_mode mode = GET_MODE (x);
16965 rtx part;
16966
16967 if (mode == VOIDmode)
16968 mode = DImode;
16969 part = gen_highpart_mode (SImode, mode, x);
16970 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16971 return;
16972 }
16973
16974 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16975 {
16976 output_operand_lossage ("invalid operand for code '%c'", code);
16977 return;
16978 }
16979
16980 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
16981 return;
16982
16983 case 'H':
16984 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16985 {
16986 output_operand_lossage ("invalid operand for code '%c'", code);
16987 return;
16988 }
16989
16990 asm_fprintf (stream, "%r", REGNO (x) + 1);
16991 return;
16992
16993 case 'J':
16994 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16995 {
16996 output_operand_lossage ("invalid operand for code '%c'", code);
16997 return;
16998 }
16999
17000 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
17001 return;
17002
17003 case 'K':
17004 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17005 {
17006 output_operand_lossage ("invalid operand for code '%c'", code);
17007 return;
17008 }
17009
17010 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
17011 return;
17012
17013 case 'm':
17014 asm_fprintf (stream, "%r",
17015 GET_CODE (XEXP (x, 0)) == REG
17016 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
17017 return;
17018
17019 case 'M':
17020 asm_fprintf (stream, "{%r-%r}",
17021 REGNO (x),
17022 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
17023 return;
17024
17025 /* Like 'M', but writing doubleword vector registers, for use by Neon
17026 insns. */
17027 case 'h':
17028 {
17029 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
17030 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
17031 if (numregs == 1)
17032 asm_fprintf (stream, "{d%d}", regno);
17033 else
17034 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
17035 }
17036 return;
17037
17038 case 'd':
17039 /* CONST_TRUE_RTX means always -- that's the default. */
17040 if (x == const_true_rtx)
17041 return;
17042
17043 if (!COMPARISON_P (x))
17044 {
17045 output_operand_lossage ("invalid operand for code '%c'", code);
17046 return;
17047 }
17048
17049 fputs (arm_condition_codes[get_arm_condition_code (x)],
17050 stream);
17051 return;
17052
17053 case 'D':
17054 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
17055 want to do that. */
17056 if (x == const_true_rtx)
17057 {
17058 output_operand_lossage ("instruction never executed");
17059 return;
17060 }
17061 if (!COMPARISON_P (x))
17062 {
17063 output_operand_lossage ("invalid operand for code '%c'", code);
17064 return;
17065 }
17066
17067 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
17068 (get_arm_condition_code (x))],
17069 stream);
17070 return;
17071
17072 case 's':
17073 case 'V':
17074 case 'W':
17075 case 'X':
17076 case 'Y':
17077 case 'Z':
17078 /* Former Maverick support, removed after GCC-4.7. */
17079 output_operand_lossage ("obsolete Maverick format code '%c'", code);
17080 return;
17081
17082 case 'U':
17083 if (GET_CODE (x) != REG
17084 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
17085 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
17086 /* Bad value for wCG register number. */
17087 {
17088 output_operand_lossage ("invalid operand for code '%c'", code);
17089 return;
17090 }
17091
17092 else
17093 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
17094 return;
17095
17096 /* Print an iWMMXt control register name. */
17097 case 'w':
17098 if (GET_CODE (x) != CONST_INT
17099 || INTVAL (x) < 0
17100 || INTVAL (x) >= 16)
17101 /* Bad value for wC register number. */
17102 {
17103 output_operand_lossage ("invalid operand for code '%c'", code);
17104 return;
17105 }
17106
17107 else
17108 {
17109 static const char * wc_reg_names [16] =
17110 {
17111 "wCID", "wCon", "wCSSF", "wCASF",
17112 "wC4", "wC5", "wC6", "wC7",
17113 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
17114 "wC12", "wC13", "wC14", "wC15"
17115 };
17116
17117 fprintf (stream, wc_reg_names [INTVAL (x)]);
17118 }
17119 return;
17120
17121 /* Print the high single-precision register of a VFP double-precision
17122 register. */
17123 case 'p':
17124 {
17125 int mode = GET_MODE (x);
17126 int regno;
17127
17128 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
17129 {
17130 output_operand_lossage ("invalid operand for code '%c'", code);
17131 return;
17132 }
17133
17134 regno = REGNO (x);
17135 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
17136 {
17137 output_operand_lossage ("invalid operand for code '%c'", code);
17138 return;
17139 }
17140
17141 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
17142 }
17143 return;
17144
17145 /* Print a VFP/Neon double precision or quad precision register name. */
17146 case 'P':
17147 case 'q':
17148 {
17149 int mode = GET_MODE (x);
17150 int is_quad = (code == 'q');
17151 int regno;
17152
17153 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
17154 {
17155 output_operand_lossage ("invalid operand for code '%c'", code);
17156 return;
17157 }
17158
17159 if (GET_CODE (x) != REG
17160 || !IS_VFP_REGNUM (REGNO (x)))
17161 {
17162 output_operand_lossage ("invalid operand for code '%c'", code);
17163 return;
17164 }
17165
17166 regno = REGNO (x);
17167 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
17168 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
17169 {
17170 output_operand_lossage ("invalid operand for code '%c'", code);
17171 return;
17172 }
17173
17174 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
17175 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
17176 }
17177 return;
17178
17179 /* These two codes print the low/high doubleword register of a Neon quad
17180 register, respectively. For pair-structure types, can also print
17181 low/high quadword registers. */
17182 case 'e':
17183 case 'f':
17184 {
17185 int mode = GET_MODE (x);
17186 int regno;
17187
17188 if ((GET_MODE_SIZE (mode) != 16
17189 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
17190 {
17191 output_operand_lossage ("invalid operand for code '%c'", code);
17192 return;
17193 }
17194
17195 regno = REGNO (x);
17196 if (!NEON_REGNO_OK_FOR_QUAD (regno))
17197 {
17198 output_operand_lossage ("invalid operand for code '%c'", code);
17199 return;
17200 }
17201
17202 if (GET_MODE_SIZE (mode) == 16)
17203 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
17204 + (code == 'f' ? 1 : 0));
17205 else
17206 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
17207 + (code == 'f' ? 1 : 0));
17208 }
17209 return;
17210
17211 /* Print a VFPv3 floating-point constant, represented as an integer
17212 index. */
17213 case 'G':
17214 {
17215 int index = vfp3_const_double_index (x);
17216 gcc_assert (index != -1);
17217 fprintf (stream, "%d", index);
17218 }
17219 return;
17220
17221 /* Print bits representing opcode features for Neon.
17222
17223 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
17224 and polynomials as unsigned.
17225
17226 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
17227
17228 Bit 2 is 1 for rounding functions, 0 otherwise. */
17229
17230 /* Identify the type as 's', 'u', 'p' or 'f'. */
17231 case 'T':
17232 {
17233 HOST_WIDE_INT bits = INTVAL (x);
17234 fputc ("uspf"[bits & 3], stream);
17235 }
17236 return;
17237
17238 /* Likewise, but signed and unsigned integers are both 'i'. */
17239 case 'F':
17240 {
17241 HOST_WIDE_INT bits = INTVAL (x);
17242 fputc ("iipf"[bits & 3], stream);
17243 }
17244 return;
17245
17246 /* As for 'T', but emit 'u' instead of 'p'. */
17247 case 't':
17248 {
17249 HOST_WIDE_INT bits = INTVAL (x);
17250 fputc ("usuf"[bits & 3], stream);
17251 }
17252 return;
17253
17254 /* Bit 2: rounding (vs none). */
17255 case 'O':
17256 {
17257 HOST_WIDE_INT bits = INTVAL (x);
17258 fputs ((bits & 4) != 0 ? "r" : "", stream);
17259 }
17260 return;
17261
17262 /* Memory operand for vld1/vst1 instruction. */
17263 case 'A':
17264 {
17265 rtx addr;
17266 bool postinc = FALSE;
17267 unsigned align, memsize, align_bits;
17268
17269 gcc_assert (GET_CODE (x) == MEM);
17270 addr = XEXP (x, 0);
17271 if (GET_CODE (addr) == POST_INC)
17272 {
17273 postinc = 1;
17274 addr = XEXP (addr, 0);
17275 }
17276 asm_fprintf (stream, "[%r", REGNO (addr));
17277
17278 /* We know the alignment of this access, so we can emit a hint in the
17279 instruction (for some alignments) as an aid to the memory subsystem
17280 of the target. */
17281 align = MEM_ALIGN (x) >> 3;
17282 memsize = MEM_SIZE (x);
17283
17284 /* Only certain alignment specifiers are supported by the hardware. */
17285 if (memsize == 32 && (align % 32) == 0)
17286 align_bits = 256;
17287 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
17288 align_bits = 128;
17289 else if (memsize >= 8 && (align % 8) == 0)
17290 align_bits = 64;
17291 else
17292 align_bits = 0;
17293
17294 if (align_bits != 0)
17295 asm_fprintf (stream, ":%d", align_bits);
17296
17297 asm_fprintf (stream, "]");
17298
17299 if (postinc)
17300 fputs("!", stream);
17301 }
17302 return;
17303
17304 case 'C':
17305 {
17306 rtx addr;
17307
17308 gcc_assert (GET_CODE (x) == MEM);
17309 addr = XEXP (x, 0);
17310 gcc_assert (GET_CODE (addr) == REG);
17311 asm_fprintf (stream, "[%r]", REGNO (addr));
17312 }
17313 return;
17314
17315 /* Translate an S register number into a D register number and element index. */
17316 case 'y':
17317 {
17318 int mode = GET_MODE (x);
17319 int regno;
17320
17321 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
17322 {
17323 output_operand_lossage ("invalid operand for code '%c'", code);
17324 return;
17325 }
17326
17327 regno = REGNO (x);
17328 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17329 {
17330 output_operand_lossage ("invalid operand for code '%c'", code);
17331 return;
17332 }
17333
17334 regno = regno - FIRST_VFP_REGNUM;
17335 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
17336 }
17337 return;
17338
17339 case 'v':
17340 gcc_assert (GET_CODE (x) == CONST_DOUBLE);
17341 fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
17342 return;
17343
17344 /* Register specifier for vld1.16/vst1.16. Translate the S register
17345 number into a D register number and element index. */
17346 case 'z':
17347 {
17348 int mode = GET_MODE (x);
17349 int regno;
17350
17351 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
17352 {
17353 output_operand_lossage ("invalid operand for code '%c'", code);
17354 return;
17355 }
17356
17357 regno = REGNO (x);
17358 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17359 {
17360 output_operand_lossage ("invalid operand for code '%c'", code);
17361 return;
17362 }
17363
17364 regno = regno - FIRST_VFP_REGNUM;
17365 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
17366 }
17367 return;
17368
17369 default:
17370 if (x == 0)
17371 {
17372 output_operand_lossage ("missing operand");
17373 return;
17374 }
17375
17376 switch (GET_CODE (x))
17377 {
17378 case REG:
17379 asm_fprintf (stream, "%r", REGNO (x));
17380 break;
17381
17382 case MEM:
17383 output_memory_reference_mode = GET_MODE (x);
17384 output_address (XEXP (x, 0));
17385 break;
17386
17387 case CONST_DOUBLE:
17388 if (TARGET_NEON)
17389 {
17390 char fpstr[20];
17391 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17392 sizeof (fpstr), 0, 1);
17393 fprintf (stream, "#%s", fpstr);
17394 }
17395 else
17396 fprintf (stream, "#%s", fp_immediate_constant (x));
17397 break;
17398
17399 default:
17400 gcc_assert (GET_CODE (x) != NEG);
17401 fputc ('#', stream);
17402 if (GET_CODE (x) == HIGH)
17403 {
17404 fputs (":lower16:", stream);
17405 x = XEXP (x, 0);
17406 }
17407
17408 output_addr_const (stream, x);
17409 break;
17410 }
17411 }
17412 }
17413 \f
17414 /* Target hook for printing a memory address. */
17415 static void
17416 arm_print_operand_address (FILE *stream, rtx x)
17417 {
17418 if (TARGET_32BIT)
17419 {
17420 int is_minus = GET_CODE (x) == MINUS;
17421
17422 if (GET_CODE (x) == REG)
17423 asm_fprintf (stream, "[%r]", REGNO (x));
17424 else if (GET_CODE (x) == PLUS || is_minus)
17425 {
17426 rtx base = XEXP (x, 0);
17427 rtx index = XEXP (x, 1);
17428 HOST_WIDE_INT offset = 0;
17429 if (GET_CODE (base) != REG
17430 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
17431 {
17432 /* Ensure that BASE is a register. */
17433 /* (one of them must be). */
17434 /* Also ensure the SP is not used as in index register. */
17435 rtx temp = base;
17436 base = index;
17437 index = temp;
17438 }
17439 switch (GET_CODE (index))
17440 {
17441 case CONST_INT:
17442 offset = INTVAL (index);
17443 if (is_minus)
17444 offset = -offset;
17445 asm_fprintf (stream, "[%r, #%wd]",
17446 REGNO (base), offset);
17447 break;
17448
17449 case REG:
17450 asm_fprintf (stream, "[%r, %s%r]",
17451 REGNO (base), is_minus ? "-" : "",
17452 REGNO (index));
17453 break;
17454
17455 case MULT:
17456 case ASHIFTRT:
17457 case LSHIFTRT:
17458 case ASHIFT:
17459 case ROTATERT:
17460 {
17461 asm_fprintf (stream, "[%r, %s%r",
17462 REGNO (base), is_minus ? "-" : "",
17463 REGNO (XEXP (index, 0)));
17464 arm_print_operand (stream, index, 'S');
17465 fputs ("]", stream);
17466 break;
17467 }
17468
17469 default:
17470 gcc_unreachable ();
17471 }
17472 }
17473 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
17474 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
17475 {
17476 extern enum machine_mode output_memory_reference_mode;
17477
17478 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17479
17480 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
17481 asm_fprintf (stream, "[%r, #%s%d]!",
17482 REGNO (XEXP (x, 0)),
17483 GET_CODE (x) == PRE_DEC ? "-" : "",
17484 GET_MODE_SIZE (output_memory_reference_mode));
17485 else
17486 asm_fprintf (stream, "[%r], #%s%d",
17487 REGNO (XEXP (x, 0)),
17488 GET_CODE (x) == POST_DEC ? "-" : "",
17489 GET_MODE_SIZE (output_memory_reference_mode));
17490 }
17491 else if (GET_CODE (x) == PRE_MODIFY)
17492 {
17493 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
17494 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17495 asm_fprintf (stream, "#%wd]!",
17496 INTVAL (XEXP (XEXP (x, 1), 1)));
17497 else
17498 asm_fprintf (stream, "%r]!",
17499 REGNO (XEXP (XEXP (x, 1), 1)));
17500 }
17501 else if (GET_CODE (x) == POST_MODIFY)
17502 {
17503 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
17504 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17505 asm_fprintf (stream, "#%wd",
17506 INTVAL (XEXP (XEXP (x, 1), 1)));
17507 else
17508 asm_fprintf (stream, "%r",
17509 REGNO (XEXP (XEXP (x, 1), 1)));
17510 }
17511 else output_addr_const (stream, x);
17512 }
17513 else
17514 {
17515 if (GET_CODE (x) == REG)
17516 asm_fprintf (stream, "[%r]", REGNO (x));
17517 else if (GET_CODE (x) == POST_INC)
17518 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
17519 else if (GET_CODE (x) == PLUS)
17520 {
17521 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17522 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17523 asm_fprintf (stream, "[%r, #%wd]",
17524 REGNO (XEXP (x, 0)),
17525 INTVAL (XEXP (x, 1)));
17526 else
17527 asm_fprintf (stream, "[%r, %r]",
17528 REGNO (XEXP (x, 0)),
17529 REGNO (XEXP (x, 1)));
17530 }
17531 else
17532 output_addr_const (stream, x);
17533 }
17534 }
17535 \f
17536 /* Target hook for indicating whether a punctuation character for
17537 TARGET_PRINT_OPERAND is valid. */
17538 static bool
17539 arm_print_operand_punct_valid_p (unsigned char code)
17540 {
17541 return (code == '@' || code == '|' || code == '.'
17542 || code == '(' || code == ')' || code == '#'
17543 || (TARGET_32BIT && (code == '?'))
17544 || (TARGET_THUMB2 && (code == '!'))
17545 || (TARGET_THUMB && (code == '_')));
17546 }
17547 \f
17548 /* Target hook for assembling integer objects. The ARM version needs to
17549 handle word-sized values specially. */
17550 static bool
17551 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
17552 {
17553 enum machine_mode mode;
17554
17555 if (size == UNITS_PER_WORD && aligned_p)
17556 {
17557 fputs ("\t.word\t", asm_out_file);
17558 output_addr_const (asm_out_file, x);
17559
17560 /* Mark symbols as position independent. We only do this in the
17561 .text segment, not in the .data segment. */
17562 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
17563 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
17564 {
17565 /* See legitimize_pic_address for an explanation of the
17566 TARGET_VXWORKS_RTP check. */
17567 if (TARGET_VXWORKS_RTP
17568 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
17569 fputs ("(GOT)", asm_out_file);
17570 else
17571 fputs ("(GOTOFF)", asm_out_file);
17572 }
17573 fputc ('\n', asm_out_file);
17574 return true;
17575 }
17576
17577 mode = GET_MODE (x);
17578
17579 if (arm_vector_mode_supported_p (mode))
17580 {
17581 int i, units;
17582
17583 gcc_assert (GET_CODE (x) == CONST_VECTOR);
17584
17585 units = CONST_VECTOR_NUNITS (x);
17586 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
17587
17588 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17589 for (i = 0; i < units; i++)
17590 {
17591 rtx elt = CONST_VECTOR_ELT (x, i);
17592 assemble_integer
17593 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
17594 }
17595 else
17596 for (i = 0; i < units; i++)
17597 {
17598 rtx elt = CONST_VECTOR_ELT (x, i);
17599 REAL_VALUE_TYPE rval;
17600
17601 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
17602
17603 assemble_real
17604 (rval, GET_MODE_INNER (mode),
17605 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
17606 }
17607
17608 return true;
17609 }
17610
17611 return default_assemble_integer (x, size, aligned_p);
17612 }
17613
17614 static void
17615 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
17616 {
17617 section *s;
17618
17619 if (!TARGET_AAPCS_BASED)
17620 {
17621 (is_ctor ?
17622 default_named_section_asm_out_constructor
17623 : default_named_section_asm_out_destructor) (symbol, priority);
17624 return;
17625 }
17626
17627 /* Put these in the .init_array section, using a special relocation. */
17628 if (priority != DEFAULT_INIT_PRIORITY)
17629 {
17630 char buf[18];
17631 sprintf (buf, "%s.%.5u",
17632 is_ctor ? ".init_array" : ".fini_array",
17633 priority);
17634 s = get_section (buf, SECTION_WRITE, NULL_TREE);
17635 }
17636 else if (is_ctor)
17637 s = ctors_section;
17638 else
17639 s = dtors_section;
17640
17641 switch_to_section (s);
17642 assemble_align (POINTER_SIZE);
17643 fputs ("\t.word\t", asm_out_file);
17644 output_addr_const (asm_out_file, symbol);
17645 fputs ("(target1)\n", asm_out_file);
17646 }
17647
17648 /* Add a function to the list of static constructors. */
17649
17650 static void
17651 arm_elf_asm_constructor (rtx symbol, int priority)
17652 {
17653 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
17654 }
17655
17656 /* Add a function to the list of static destructors. */
17657
17658 static void
17659 arm_elf_asm_destructor (rtx symbol, int priority)
17660 {
17661 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
17662 }
17663 \f
17664 /* A finite state machine takes care of noticing whether or not instructions
17665 can be conditionally executed, and thus decrease execution time and code
17666 size by deleting branch instructions. The fsm is controlled by
17667 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
17668
17669 /* The state of the fsm controlling condition codes are:
17670 0: normal, do nothing special
17671 1: make ASM_OUTPUT_OPCODE not output this instruction
17672 2: make ASM_OUTPUT_OPCODE not output this instruction
17673 3: make instructions conditional
17674 4: make instructions conditional
17675
17676 State transitions (state->state by whom under condition):
17677 0 -> 1 final_prescan_insn if the `target' is a label
17678 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
17679 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
17680 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
17681 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
17682 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
17683 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
17684 (the target insn is arm_target_insn).
17685
17686 If the jump clobbers the conditions then we use states 2 and 4.
17687
17688 A similar thing can be done with conditional return insns.
17689
17690 XXX In case the `target' is an unconditional branch, this conditionalising
17691 of the instructions always reduces code size, but not always execution
17692 time. But then, I want to reduce the code size to somewhere near what
17693 /bin/cc produces. */
17694
17695 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
17696 instructions. When a COND_EXEC instruction is seen the subsequent
17697 instructions are scanned so that multiple conditional instructions can be
17698 combined into a single IT block. arm_condexec_count and arm_condexec_mask
17699 specify the length and true/false mask for the IT block. These will be
17700 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
17701
17702 /* Returns the index of the ARM condition code string in
17703 `arm_condition_codes', or ARM_NV if the comparison is invalid.
17704 COMPARISON should be an rtx like `(eq (...) (...))'. */
17705
17706 enum arm_cond_code
17707 maybe_get_arm_condition_code (rtx comparison)
17708 {
17709 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
17710 enum arm_cond_code code;
17711 enum rtx_code comp_code = GET_CODE (comparison);
17712
17713 if (GET_MODE_CLASS (mode) != MODE_CC)
17714 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
17715 XEXP (comparison, 1));
17716
17717 switch (mode)
17718 {
17719 case CC_DNEmode: code = ARM_NE; goto dominance;
17720 case CC_DEQmode: code = ARM_EQ; goto dominance;
17721 case CC_DGEmode: code = ARM_GE; goto dominance;
17722 case CC_DGTmode: code = ARM_GT; goto dominance;
17723 case CC_DLEmode: code = ARM_LE; goto dominance;
17724 case CC_DLTmode: code = ARM_LT; goto dominance;
17725 case CC_DGEUmode: code = ARM_CS; goto dominance;
17726 case CC_DGTUmode: code = ARM_HI; goto dominance;
17727 case CC_DLEUmode: code = ARM_LS; goto dominance;
17728 case CC_DLTUmode: code = ARM_CC;
17729
17730 dominance:
17731 if (comp_code == EQ)
17732 return ARM_INVERSE_CONDITION_CODE (code);
17733 if (comp_code == NE)
17734 return code;
17735 return ARM_NV;
17736
17737 case CC_NOOVmode:
17738 switch (comp_code)
17739 {
17740 case NE: return ARM_NE;
17741 case EQ: return ARM_EQ;
17742 case GE: return ARM_PL;
17743 case LT: return ARM_MI;
17744 default: return ARM_NV;
17745 }
17746
17747 case CC_Zmode:
17748 switch (comp_code)
17749 {
17750 case NE: return ARM_NE;
17751 case EQ: return ARM_EQ;
17752 default: return ARM_NV;
17753 }
17754
17755 case CC_Nmode:
17756 switch (comp_code)
17757 {
17758 case NE: return ARM_MI;
17759 case EQ: return ARM_PL;
17760 default: return ARM_NV;
17761 }
17762
17763 case CCFPEmode:
17764 case CCFPmode:
17765 /* We can handle all cases except UNEQ and LTGT. */
17766 switch (comp_code)
17767 {
17768 case GE: return ARM_GE;
17769 case GT: return ARM_GT;
17770 case LE: return ARM_LS;
17771 case LT: return ARM_MI;
17772 case NE: return ARM_NE;
17773 case EQ: return ARM_EQ;
17774 case ORDERED: return ARM_VC;
17775 case UNORDERED: return ARM_VS;
17776 case UNLT: return ARM_LT;
17777 case UNLE: return ARM_LE;
17778 case UNGT: return ARM_HI;
17779 case UNGE: return ARM_PL;
17780 /* UNEQ and LTGT do not have a representation. */
17781 case UNEQ: /* Fall through. */
17782 case LTGT: /* Fall through. */
17783 default: return ARM_NV;
17784 }
17785
17786 case CC_SWPmode:
17787 switch (comp_code)
17788 {
17789 case NE: return ARM_NE;
17790 case EQ: return ARM_EQ;
17791 case GE: return ARM_LE;
17792 case GT: return ARM_LT;
17793 case LE: return ARM_GE;
17794 case LT: return ARM_GT;
17795 case GEU: return ARM_LS;
17796 case GTU: return ARM_CC;
17797 case LEU: return ARM_CS;
17798 case LTU: return ARM_HI;
17799 default: return ARM_NV;
17800 }
17801
17802 case CC_Cmode:
17803 switch (comp_code)
17804 {
17805 case LTU: return ARM_CS;
17806 case GEU: return ARM_CC;
17807 default: return ARM_NV;
17808 }
17809
17810 case CC_CZmode:
17811 switch (comp_code)
17812 {
17813 case NE: return ARM_NE;
17814 case EQ: return ARM_EQ;
17815 case GEU: return ARM_CS;
17816 case GTU: return ARM_HI;
17817 case LEU: return ARM_LS;
17818 case LTU: return ARM_CC;
17819 default: return ARM_NV;
17820 }
17821
17822 case CC_NCVmode:
17823 switch (comp_code)
17824 {
17825 case GE: return ARM_GE;
17826 case LT: return ARM_LT;
17827 case GEU: return ARM_CS;
17828 case LTU: return ARM_CC;
17829 default: return ARM_NV;
17830 }
17831
17832 case CCmode:
17833 switch (comp_code)
17834 {
17835 case NE: return ARM_NE;
17836 case EQ: return ARM_EQ;
17837 case GE: return ARM_GE;
17838 case GT: return ARM_GT;
17839 case LE: return ARM_LE;
17840 case LT: return ARM_LT;
17841 case GEU: return ARM_CS;
17842 case GTU: return ARM_HI;
17843 case LEU: return ARM_LS;
17844 case LTU: return ARM_CC;
17845 default: return ARM_NV;
17846 }
17847
17848 default: gcc_unreachable ();
17849 }
17850 }
17851
17852 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
17853 static enum arm_cond_code
17854 get_arm_condition_code (rtx comparison)
17855 {
17856 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
17857 gcc_assert (code != ARM_NV);
17858 return code;
17859 }
17860
17861 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17862 instructions. */
17863 void
17864 thumb2_final_prescan_insn (rtx insn)
17865 {
17866 rtx first_insn = insn;
17867 rtx body = PATTERN (insn);
17868 rtx predicate;
17869 enum arm_cond_code code;
17870 int n;
17871 int mask;
17872
17873 /* Remove the previous insn from the count of insns to be output. */
17874 if (arm_condexec_count)
17875 arm_condexec_count--;
17876
17877 /* Nothing to do if we are already inside a conditional block. */
17878 if (arm_condexec_count)
17879 return;
17880
17881 if (GET_CODE (body) != COND_EXEC)
17882 return;
17883
17884 /* Conditional jumps are implemented directly. */
17885 if (GET_CODE (insn) == JUMP_INSN)
17886 return;
17887
17888 predicate = COND_EXEC_TEST (body);
17889 arm_current_cc = get_arm_condition_code (predicate);
17890
17891 n = get_attr_ce_count (insn);
17892 arm_condexec_count = 1;
17893 arm_condexec_mask = (1 << n) - 1;
17894 arm_condexec_masklen = n;
17895 /* See if subsequent instructions can be combined into the same block. */
17896 for (;;)
17897 {
17898 insn = next_nonnote_insn (insn);
17899
17900 /* Jumping into the middle of an IT block is illegal, so a label or
17901 barrier terminates the block. */
17902 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
17903 break;
17904
17905 body = PATTERN (insn);
17906 /* USE and CLOBBER aren't really insns, so just skip them. */
17907 if (GET_CODE (body) == USE
17908 || GET_CODE (body) == CLOBBER)
17909 continue;
17910
17911 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17912 if (GET_CODE (body) != COND_EXEC)
17913 break;
17914 /* Allow up to 4 conditionally executed instructions in a block. */
17915 n = get_attr_ce_count (insn);
17916 if (arm_condexec_masklen + n > 4)
17917 break;
17918
17919 predicate = COND_EXEC_TEST (body);
17920 code = get_arm_condition_code (predicate);
17921 mask = (1 << n) - 1;
17922 if (arm_current_cc == code)
17923 arm_condexec_mask |= (mask << arm_condexec_masklen);
17924 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
17925 break;
17926
17927 arm_condexec_count++;
17928 arm_condexec_masklen += n;
17929
17930 /* A jump must be the last instruction in a conditional block. */
17931 if (GET_CODE(insn) == JUMP_INSN)
17932 break;
17933 }
17934 /* Restore recog_data (getting the attributes of other insns can
17935 destroy this array, but final.c assumes that it remains intact
17936 across this call). */
17937 extract_constrain_insn_cached (first_insn);
17938 }
17939
17940 void
17941 arm_final_prescan_insn (rtx insn)
17942 {
17943 /* BODY will hold the body of INSN. */
17944 rtx body = PATTERN (insn);
17945
17946 /* This will be 1 if trying to repeat the trick, and things need to be
17947 reversed if it appears to fail. */
17948 int reverse = 0;
17949
17950 /* If we start with a return insn, we only succeed if we find another one. */
17951 int seeking_return = 0;
17952 enum rtx_code return_code = UNKNOWN;
17953
17954 /* START_INSN will hold the insn from where we start looking. This is the
17955 first insn after the following code_label if REVERSE is true. */
17956 rtx start_insn = insn;
17957
17958 /* If in state 4, check if the target branch is reached, in order to
17959 change back to state 0. */
17960 if (arm_ccfsm_state == 4)
17961 {
17962 if (insn == arm_target_insn)
17963 {
17964 arm_target_insn = NULL;
17965 arm_ccfsm_state = 0;
17966 }
17967 return;
17968 }
17969
17970 /* If in state 3, it is possible to repeat the trick, if this insn is an
17971 unconditional branch to a label, and immediately following this branch
17972 is the previous target label which is only used once, and the label this
17973 branch jumps to is not too far off. */
17974 if (arm_ccfsm_state == 3)
17975 {
17976 if (simplejump_p (insn))
17977 {
17978 start_insn = next_nonnote_insn (start_insn);
17979 if (GET_CODE (start_insn) == BARRIER)
17980 {
17981 /* XXX Isn't this always a barrier? */
17982 start_insn = next_nonnote_insn (start_insn);
17983 }
17984 if (GET_CODE (start_insn) == CODE_LABEL
17985 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17986 && LABEL_NUSES (start_insn) == 1)
17987 reverse = TRUE;
17988 else
17989 return;
17990 }
17991 else if (ANY_RETURN_P (body))
17992 {
17993 start_insn = next_nonnote_insn (start_insn);
17994 if (GET_CODE (start_insn) == BARRIER)
17995 start_insn = next_nonnote_insn (start_insn);
17996 if (GET_CODE (start_insn) == CODE_LABEL
17997 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17998 && LABEL_NUSES (start_insn) == 1)
17999 {
18000 reverse = TRUE;
18001 seeking_return = 1;
18002 return_code = GET_CODE (body);
18003 }
18004 else
18005 return;
18006 }
18007 else
18008 return;
18009 }
18010
18011 gcc_assert (!arm_ccfsm_state || reverse);
18012 if (GET_CODE (insn) != JUMP_INSN)
18013 return;
18014
18015 /* This jump might be paralleled with a clobber of the condition codes
18016 the jump should always come first */
18017 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
18018 body = XVECEXP (body, 0, 0);
18019
18020 if (reverse
18021 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
18022 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
18023 {
18024 int insns_skipped;
18025 int fail = FALSE, succeed = FALSE;
18026 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
18027 int then_not_else = TRUE;
18028 rtx this_insn = start_insn, label = 0;
18029
18030 /* Register the insn jumped to. */
18031 if (reverse)
18032 {
18033 if (!seeking_return)
18034 label = XEXP (SET_SRC (body), 0);
18035 }
18036 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
18037 label = XEXP (XEXP (SET_SRC (body), 1), 0);
18038 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
18039 {
18040 label = XEXP (XEXP (SET_SRC (body), 2), 0);
18041 then_not_else = FALSE;
18042 }
18043 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
18044 {
18045 seeking_return = 1;
18046 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
18047 }
18048 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
18049 {
18050 seeking_return = 1;
18051 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
18052 then_not_else = FALSE;
18053 }
18054 else
18055 gcc_unreachable ();
18056
18057 /* See how many insns this branch skips, and what kind of insns. If all
18058 insns are okay, and the label or unconditional branch to the same
18059 label is not too far away, succeed. */
18060 for (insns_skipped = 0;
18061 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
18062 {
18063 rtx scanbody;
18064
18065 this_insn = next_nonnote_insn (this_insn);
18066 if (!this_insn)
18067 break;
18068
18069 switch (GET_CODE (this_insn))
18070 {
18071 case CODE_LABEL:
18072 /* Succeed if it is the target label, otherwise fail since
18073 control falls in from somewhere else. */
18074 if (this_insn == label)
18075 {
18076 arm_ccfsm_state = 1;
18077 succeed = TRUE;
18078 }
18079 else
18080 fail = TRUE;
18081 break;
18082
18083 case BARRIER:
18084 /* Succeed if the following insn is the target label.
18085 Otherwise fail.
18086 If return insns are used then the last insn in a function
18087 will be a barrier. */
18088 this_insn = next_nonnote_insn (this_insn);
18089 if (this_insn && this_insn == label)
18090 {
18091 arm_ccfsm_state = 1;
18092 succeed = TRUE;
18093 }
18094 else
18095 fail = TRUE;
18096 break;
18097
18098 case CALL_INSN:
18099 /* The AAPCS says that conditional calls should not be
18100 used since they make interworking inefficient (the
18101 linker can't transform BL<cond> into BLX). That's
18102 only a problem if the machine has BLX. */
18103 if (arm_arch5)
18104 {
18105 fail = TRUE;
18106 break;
18107 }
18108
18109 /* Succeed if the following insn is the target label, or
18110 if the following two insns are a barrier and the
18111 target label. */
18112 this_insn = next_nonnote_insn (this_insn);
18113 if (this_insn && GET_CODE (this_insn) == BARRIER)
18114 this_insn = next_nonnote_insn (this_insn);
18115
18116 if (this_insn && this_insn == label
18117 && insns_skipped < max_insns_skipped)
18118 {
18119 arm_ccfsm_state = 1;
18120 succeed = TRUE;
18121 }
18122 else
18123 fail = TRUE;
18124 break;
18125
18126 case JUMP_INSN:
18127 /* If this is an unconditional branch to the same label, succeed.
18128 If it is to another label, do nothing. If it is conditional,
18129 fail. */
18130 /* XXX Probably, the tests for SET and the PC are
18131 unnecessary. */
18132
18133 scanbody = PATTERN (this_insn);
18134 if (GET_CODE (scanbody) == SET
18135 && GET_CODE (SET_DEST (scanbody)) == PC)
18136 {
18137 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
18138 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
18139 {
18140 arm_ccfsm_state = 2;
18141 succeed = TRUE;
18142 }
18143 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
18144 fail = TRUE;
18145 }
18146 /* Fail if a conditional return is undesirable (e.g. on a
18147 StrongARM), but still allow this if optimizing for size. */
18148 else if (GET_CODE (scanbody) == return_code
18149 && !use_return_insn (TRUE, NULL)
18150 && !optimize_size)
18151 fail = TRUE;
18152 else if (GET_CODE (scanbody) == return_code)
18153 {
18154 arm_ccfsm_state = 2;
18155 succeed = TRUE;
18156 }
18157 else if (GET_CODE (scanbody) == PARALLEL)
18158 {
18159 switch (get_attr_conds (this_insn))
18160 {
18161 case CONDS_NOCOND:
18162 break;
18163 default:
18164 fail = TRUE;
18165 break;
18166 }
18167 }
18168 else
18169 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
18170
18171 break;
18172
18173 case INSN:
18174 /* Instructions using or affecting the condition codes make it
18175 fail. */
18176 scanbody = PATTERN (this_insn);
18177 if (!(GET_CODE (scanbody) == SET
18178 || GET_CODE (scanbody) == PARALLEL)
18179 || get_attr_conds (this_insn) != CONDS_NOCOND)
18180 fail = TRUE;
18181 break;
18182
18183 default:
18184 break;
18185 }
18186 }
18187 if (succeed)
18188 {
18189 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
18190 arm_target_label = CODE_LABEL_NUMBER (label);
18191 else
18192 {
18193 gcc_assert (seeking_return || arm_ccfsm_state == 2);
18194
18195 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
18196 {
18197 this_insn = next_nonnote_insn (this_insn);
18198 gcc_assert (!this_insn
18199 || (GET_CODE (this_insn) != BARRIER
18200 && GET_CODE (this_insn) != CODE_LABEL));
18201 }
18202 if (!this_insn)
18203 {
18204 /* Oh, dear! we ran off the end.. give up. */
18205 extract_constrain_insn_cached (insn);
18206 arm_ccfsm_state = 0;
18207 arm_target_insn = NULL;
18208 return;
18209 }
18210 arm_target_insn = this_insn;
18211 }
18212
18213 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
18214 what it was. */
18215 if (!reverse)
18216 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
18217
18218 if (reverse || then_not_else)
18219 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
18220 }
18221
18222 /* Restore recog_data (getting the attributes of other insns can
18223 destroy this array, but final.c assumes that it remains intact
18224 across this call. */
18225 extract_constrain_insn_cached (insn);
18226 }
18227 }
18228
18229 /* Output IT instructions. */
18230 void
18231 thumb2_asm_output_opcode (FILE * stream)
18232 {
18233 char buff[5];
18234 int n;
18235
18236 if (arm_condexec_mask)
18237 {
18238 for (n = 0; n < arm_condexec_masklen; n++)
18239 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
18240 buff[n] = 0;
18241 asm_fprintf(stream, "i%s\t%s\n\t", buff,
18242 arm_condition_codes[arm_current_cc]);
18243 arm_condexec_mask = 0;
18244 }
18245 }
18246
18247 /* Returns true if REGNO is a valid register
18248 for holding a quantity of type MODE. */
18249 int
18250 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
18251 {
18252 if (GET_MODE_CLASS (mode) == MODE_CC)
18253 return (regno == CC_REGNUM
18254 || (TARGET_HARD_FLOAT && TARGET_VFP
18255 && regno == VFPCC_REGNUM));
18256
18257 if (TARGET_THUMB1)
18258 /* For the Thumb we only allow values bigger than SImode in
18259 registers 0 - 6, so that there is always a second low
18260 register available to hold the upper part of the value.
18261 We probably we ought to ensure that the register is the
18262 start of an even numbered register pair. */
18263 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
18264
18265 if (TARGET_HARD_FLOAT && TARGET_VFP
18266 && IS_VFP_REGNUM (regno))
18267 {
18268 if (mode == SFmode || mode == SImode)
18269 return VFP_REGNO_OK_FOR_SINGLE (regno);
18270
18271 if (mode == DFmode)
18272 return VFP_REGNO_OK_FOR_DOUBLE (regno);
18273
18274 /* VFP registers can hold HFmode values, but there is no point in
18275 putting them there unless we have hardware conversion insns. */
18276 if (mode == HFmode)
18277 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
18278
18279 if (TARGET_NEON)
18280 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
18281 || (VALID_NEON_QREG_MODE (mode)
18282 && NEON_REGNO_OK_FOR_QUAD (regno))
18283 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
18284 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
18285 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
18286 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
18287 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
18288
18289 return FALSE;
18290 }
18291
18292 if (TARGET_REALLY_IWMMXT)
18293 {
18294 if (IS_IWMMXT_GR_REGNUM (regno))
18295 return mode == SImode;
18296
18297 if (IS_IWMMXT_REGNUM (regno))
18298 return VALID_IWMMXT_REG_MODE (mode);
18299 }
18300
18301 /* We allow almost any value to be stored in the general registers.
18302 Restrict doubleword quantities to even register pairs so that we can
18303 use ldrd. Do not allow very large Neon structure opaque modes in
18304 general registers; they would use too many. */
18305 if (regno <= LAST_ARM_REGNUM)
18306 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
18307 && ARM_NUM_REGS (mode) <= 4;
18308
18309 if (regno == FRAME_POINTER_REGNUM
18310 || regno == ARG_POINTER_REGNUM)
18311 /* We only allow integers in the fake hard registers. */
18312 return GET_MODE_CLASS (mode) == MODE_INT;
18313
18314 return FALSE;
18315 }
18316
18317 /* Implement MODES_TIEABLE_P. */
18318
18319 bool
18320 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18321 {
18322 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
18323 return true;
18324
18325 /* We specifically want to allow elements of "structure" modes to
18326 be tieable to the structure. This more general condition allows
18327 other rarer situations too. */
18328 if (TARGET_NEON
18329 && (VALID_NEON_DREG_MODE (mode1)
18330 || VALID_NEON_QREG_MODE (mode1)
18331 || VALID_NEON_STRUCT_MODE (mode1))
18332 && (VALID_NEON_DREG_MODE (mode2)
18333 || VALID_NEON_QREG_MODE (mode2)
18334 || VALID_NEON_STRUCT_MODE (mode2)))
18335 return true;
18336
18337 return false;
18338 }
18339
18340 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
18341 not used in arm mode. */
18342
18343 enum reg_class
18344 arm_regno_class (int regno)
18345 {
18346 if (TARGET_THUMB1)
18347 {
18348 if (regno == STACK_POINTER_REGNUM)
18349 return STACK_REG;
18350 if (regno == CC_REGNUM)
18351 return CC_REG;
18352 if (regno < 8)
18353 return LO_REGS;
18354 return HI_REGS;
18355 }
18356
18357 if (TARGET_THUMB2 && regno < 8)
18358 return LO_REGS;
18359
18360 if ( regno <= LAST_ARM_REGNUM
18361 || regno == FRAME_POINTER_REGNUM
18362 || regno == ARG_POINTER_REGNUM)
18363 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
18364
18365 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
18366 return TARGET_THUMB2 ? CC_REG : NO_REGS;
18367
18368 if (IS_VFP_REGNUM (regno))
18369 {
18370 if (regno <= D7_VFP_REGNUM)
18371 return VFP_D0_D7_REGS;
18372 else if (regno <= LAST_LO_VFP_REGNUM)
18373 return VFP_LO_REGS;
18374 else
18375 return VFP_HI_REGS;
18376 }
18377
18378 if (IS_IWMMXT_REGNUM (regno))
18379 return IWMMXT_REGS;
18380
18381 if (IS_IWMMXT_GR_REGNUM (regno))
18382 return IWMMXT_GR_REGS;
18383
18384 return NO_REGS;
18385 }
18386
18387 /* Handle a special case when computing the offset
18388 of an argument from the frame pointer. */
18389 int
18390 arm_debugger_arg_offset (int value, rtx addr)
18391 {
18392 rtx insn;
18393
18394 /* We are only interested if dbxout_parms() failed to compute the offset. */
18395 if (value != 0)
18396 return 0;
18397
18398 /* We can only cope with the case where the address is held in a register. */
18399 if (GET_CODE (addr) != REG)
18400 return 0;
18401
18402 /* If we are using the frame pointer to point at the argument, then
18403 an offset of 0 is correct. */
18404 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
18405 return 0;
18406
18407 /* If we are using the stack pointer to point at the
18408 argument, then an offset of 0 is correct. */
18409 /* ??? Check this is consistent with thumb2 frame layout. */
18410 if ((TARGET_THUMB || !frame_pointer_needed)
18411 && REGNO (addr) == SP_REGNUM)
18412 return 0;
18413
18414 /* Oh dear. The argument is pointed to by a register rather
18415 than being held in a register, or being stored at a known
18416 offset from the frame pointer. Since GDB only understands
18417 those two kinds of argument we must translate the address
18418 held in the register into an offset from the frame pointer.
18419 We do this by searching through the insns for the function
18420 looking to see where this register gets its value. If the
18421 register is initialized from the frame pointer plus an offset
18422 then we are in luck and we can continue, otherwise we give up.
18423
18424 This code is exercised by producing debugging information
18425 for a function with arguments like this:
18426
18427 double func (double a, double b, int c, double d) {return d;}
18428
18429 Without this code the stab for parameter 'd' will be set to
18430 an offset of 0 from the frame pointer, rather than 8. */
18431
18432 /* The if() statement says:
18433
18434 If the insn is a normal instruction
18435 and if the insn is setting the value in a register
18436 and if the register being set is the register holding the address of the argument
18437 and if the address is computing by an addition
18438 that involves adding to a register
18439 which is the frame pointer
18440 a constant integer
18441
18442 then... */
18443
18444 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18445 {
18446 if ( GET_CODE (insn) == INSN
18447 && GET_CODE (PATTERN (insn)) == SET
18448 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
18449 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
18450 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
18451 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
18452 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
18453 )
18454 {
18455 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
18456
18457 break;
18458 }
18459 }
18460
18461 if (value == 0)
18462 {
18463 debug_rtx (addr);
18464 warning (0, "unable to compute real location of stacked parameter");
18465 value = 8; /* XXX magic hack */
18466 }
18467
18468 return value;
18469 }
18470 \f
18471 typedef enum {
18472 T_V8QI,
18473 T_V4HI,
18474 T_V2SI,
18475 T_V2SF,
18476 T_DI,
18477 T_V16QI,
18478 T_V8HI,
18479 T_V4SI,
18480 T_V4SF,
18481 T_V2DI,
18482 T_TI,
18483 T_EI,
18484 T_OI,
18485 T_MAX /* Size of enum. Keep last. */
18486 } neon_builtin_type_mode;
18487
18488 #define TYPE_MODE_BIT(X) (1 << (X))
18489
18490 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
18491 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
18492 | TYPE_MODE_BIT (T_DI))
18493 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
18494 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
18495 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
18496
18497 #define v8qi_UP T_V8QI
18498 #define v4hi_UP T_V4HI
18499 #define v2si_UP T_V2SI
18500 #define v2sf_UP T_V2SF
18501 #define di_UP T_DI
18502 #define v16qi_UP T_V16QI
18503 #define v8hi_UP T_V8HI
18504 #define v4si_UP T_V4SI
18505 #define v4sf_UP T_V4SF
18506 #define v2di_UP T_V2DI
18507 #define ti_UP T_TI
18508 #define ei_UP T_EI
18509 #define oi_UP T_OI
18510
18511 #define UP(X) X##_UP
18512
18513 typedef enum {
18514 NEON_BINOP,
18515 NEON_TERNOP,
18516 NEON_UNOP,
18517 NEON_GETLANE,
18518 NEON_SETLANE,
18519 NEON_CREATE,
18520 NEON_DUP,
18521 NEON_DUPLANE,
18522 NEON_COMBINE,
18523 NEON_SPLIT,
18524 NEON_LANEMUL,
18525 NEON_LANEMULL,
18526 NEON_LANEMULH,
18527 NEON_LANEMAC,
18528 NEON_SCALARMUL,
18529 NEON_SCALARMULL,
18530 NEON_SCALARMULH,
18531 NEON_SCALARMAC,
18532 NEON_CONVERT,
18533 NEON_FIXCONV,
18534 NEON_SELECT,
18535 NEON_RESULTPAIR,
18536 NEON_REINTERP,
18537 NEON_VTBL,
18538 NEON_VTBX,
18539 NEON_LOAD1,
18540 NEON_LOAD1LANE,
18541 NEON_STORE1,
18542 NEON_STORE1LANE,
18543 NEON_LOADSTRUCT,
18544 NEON_LOADSTRUCTLANE,
18545 NEON_STORESTRUCT,
18546 NEON_STORESTRUCTLANE,
18547 NEON_LOGICBINOP,
18548 NEON_SHIFTINSERT,
18549 NEON_SHIFTIMM,
18550 NEON_SHIFTACC
18551 } neon_itype;
18552
18553 typedef struct {
18554 const char *name;
18555 const neon_itype itype;
18556 const neon_builtin_type_mode mode;
18557 const enum insn_code code;
18558 unsigned int fcode;
18559 } neon_builtin_datum;
18560
18561 #define CF(N,X) CODE_FOR_neon_##N##X
18562
18563 #define VAR1(T, N, A) \
18564 {#N, NEON_##T, UP (A), CF (N, A), 0}
18565 #define VAR2(T, N, A, B) \
18566 VAR1 (T, N, A), \
18567 {#N, NEON_##T, UP (B), CF (N, B), 0}
18568 #define VAR3(T, N, A, B, C) \
18569 VAR2 (T, N, A, B), \
18570 {#N, NEON_##T, UP (C), CF (N, C), 0}
18571 #define VAR4(T, N, A, B, C, D) \
18572 VAR3 (T, N, A, B, C), \
18573 {#N, NEON_##T, UP (D), CF (N, D), 0}
18574 #define VAR5(T, N, A, B, C, D, E) \
18575 VAR4 (T, N, A, B, C, D), \
18576 {#N, NEON_##T, UP (E), CF (N, E), 0}
18577 #define VAR6(T, N, A, B, C, D, E, F) \
18578 VAR5 (T, N, A, B, C, D, E), \
18579 {#N, NEON_##T, UP (F), CF (N, F), 0}
18580 #define VAR7(T, N, A, B, C, D, E, F, G) \
18581 VAR6 (T, N, A, B, C, D, E, F), \
18582 {#N, NEON_##T, UP (G), CF (N, G), 0}
18583 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18584 VAR7 (T, N, A, B, C, D, E, F, G), \
18585 {#N, NEON_##T, UP (H), CF (N, H), 0}
18586 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18587 VAR8 (T, N, A, B, C, D, E, F, G, H), \
18588 {#N, NEON_##T, UP (I), CF (N, I), 0}
18589 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18590 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
18591 {#N, NEON_##T, UP (J), CF (N, J), 0}
18592
18593 /* The mode entries in the following table correspond to the "key" type of the
18594 instruction variant, i.e. equivalent to that which would be specified after
18595 the assembler mnemonic, which usually refers to the last vector operand.
18596 (Signed/unsigned/polynomial types are not differentiated between though, and
18597 are all mapped onto the same mode for a given element size.) The modes
18598 listed per instruction should be the same as those defined for that
18599 instruction's pattern in neon.md. */
18600
18601 static neon_builtin_datum neon_builtin_data[] =
18602 {
18603 VAR10 (BINOP, vadd,
18604 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18605 VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
18606 VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
18607 VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18608 VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18609 VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
18610 VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18611 VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18612 VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
18613 VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18614 VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
18615 VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
18616 VAR2 (TERNOP, vqdmlal, v4hi, v2si),
18617 VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
18618 VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
18619 VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
18620 VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
18621 VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
18622 VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
18623 VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
18624 VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
18625 VAR2 (BINOP, vqdmull, v4hi, v2si),
18626 VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18627 VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18628 VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18629 VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
18630 VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
18631 VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
18632 VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18633 VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18634 VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18635 VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
18636 VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18637 VAR10 (BINOP, vsub,
18638 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18639 VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
18640 VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
18641 VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18642 VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18643 VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
18644 VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18645 VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18646 VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18647 VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18648 VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18649 VAR2 (BINOP, vcage, v2sf, v4sf),
18650 VAR2 (BINOP, vcagt, v2sf, v4sf),
18651 VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18652 VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18653 VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
18654 VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18655 VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
18656 VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18657 VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18658 VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
18659 VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18660 VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18661 VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
18662 VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
18663 VAR2 (BINOP, vrecps, v2sf, v4sf),
18664 VAR2 (BINOP, vrsqrts, v2sf, v4sf),
18665 VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18666 VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18667 VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18668 VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18669 VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18670 VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18671 VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18672 VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18673 VAR2 (UNOP, vcnt, v8qi, v16qi),
18674 VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
18675 VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
18676 VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18677 /* FIXME: vget_lane supports more variants than this! */
18678 VAR10 (GETLANE, vget_lane,
18679 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18680 VAR10 (SETLANE, vset_lane,
18681 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18682 VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
18683 VAR10 (DUP, vdup_n,
18684 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18685 VAR10 (DUPLANE, vdup_lane,
18686 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18687 VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
18688 VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
18689 VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
18690 VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
18691 VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
18692 VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
18693 VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
18694 VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18695 VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18696 VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
18697 VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
18698 VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18699 VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
18700 VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
18701 VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18702 VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18703 VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
18704 VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
18705 VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18706 VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
18707 VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
18708 VAR10 (BINOP, vext,
18709 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18710 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18711 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
18712 VAR2 (UNOP, vrev16, v8qi, v16qi),
18713 VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
18714 VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
18715 VAR10 (SELECT, vbsl,
18716 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18717 VAR1 (VTBL, vtbl1, v8qi),
18718 VAR1 (VTBL, vtbl2, v8qi),
18719 VAR1 (VTBL, vtbl3, v8qi),
18720 VAR1 (VTBL, vtbl4, v8qi),
18721 VAR1 (VTBX, vtbx1, v8qi),
18722 VAR1 (VTBX, vtbx2, v8qi),
18723 VAR1 (VTBX, vtbx3, v8qi),
18724 VAR1 (VTBX, vtbx4, v8qi),
18725 VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18726 VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18727 VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18728 VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
18729 VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
18730 VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
18731 VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
18732 VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
18733 VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
18734 VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
18735 VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
18736 VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
18737 VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
18738 VAR10 (LOAD1, vld1,
18739 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18740 VAR10 (LOAD1LANE, vld1_lane,
18741 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18742 VAR10 (LOAD1, vld1_dup,
18743 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18744 VAR10 (STORE1, vst1,
18745 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18746 VAR10 (STORE1LANE, vst1_lane,
18747 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18748 VAR9 (LOADSTRUCT,
18749 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18750 VAR7 (LOADSTRUCTLANE, vld2_lane,
18751 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18752 VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
18753 VAR9 (STORESTRUCT, vst2,
18754 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18755 VAR7 (STORESTRUCTLANE, vst2_lane,
18756 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18757 VAR9 (LOADSTRUCT,
18758 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18759 VAR7 (LOADSTRUCTLANE, vld3_lane,
18760 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18761 VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
18762 VAR9 (STORESTRUCT, vst3,
18763 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18764 VAR7 (STORESTRUCTLANE, vst3_lane,
18765 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18766 VAR9 (LOADSTRUCT, vld4,
18767 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18768 VAR7 (LOADSTRUCTLANE, vld4_lane,
18769 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18770 VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
18771 VAR9 (STORESTRUCT, vst4,
18772 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18773 VAR7 (STORESTRUCTLANE, vst4_lane,
18774 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18775 VAR10 (LOGICBINOP, vand,
18776 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18777 VAR10 (LOGICBINOP, vorr,
18778 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18779 VAR10 (BINOP, veor,
18780 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18781 VAR10 (LOGICBINOP, vbic,
18782 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18783 VAR10 (LOGICBINOP, vorn,
18784 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
18785 };
18786
18787 #undef CF
18788 #undef VAR1
18789 #undef VAR2
18790 #undef VAR3
18791 #undef VAR4
18792 #undef VAR5
18793 #undef VAR6
18794 #undef VAR7
18795 #undef VAR8
18796 #undef VAR9
18797 #undef VAR10
18798
18799 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
18800 symbolic names defined here (which would require too much duplication).
18801 FIXME? */
18802 enum arm_builtins
18803 {
18804 ARM_BUILTIN_GETWCGR0,
18805 ARM_BUILTIN_GETWCGR1,
18806 ARM_BUILTIN_GETWCGR2,
18807 ARM_BUILTIN_GETWCGR3,
18808
18809 ARM_BUILTIN_SETWCGR0,
18810 ARM_BUILTIN_SETWCGR1,
18811 ARM_BUILTIN_SETWCGR2,
18812 ARM_BUILTIN_SETWCGR3,
18813
18814 ARM_BUILTIN_WZERO,
18815
18816 ARM_BUILTIN_WAVG2BR,
18817 ARM_BUILTIN_WAVG2HR,
18818 ARM_BUILTIN_WAVG2B,
18819 ARM_BUILTIN_WAVG2H,
18820
18821 ARM_BUILTIN_WACCB,
18822 ARM_BUILTIN_WACCH,
18823 ARM_BUILTIN_WACCW,
18824
18825 ARM_BUILTIN_WMACS,
18826 ARM_BUILTIN_WMACSZ,
18827 ARM_BUILTIN_WMACU,
18828 ARM_BUILTIN_WMACUZ,
18829
18830 ARM_BUILTIN_WSADB,
18831 ARM_BUILTIN_WSADBZ,
18832 ARM_BUILTIN_WSADH,
18833 ARM_BUILTIN_WSADHZ,
18834
18835 ARM_BUILTIN_WALIGNI,
18836 ARM_BUILTIN_WALIGNR0,
18837 ARM_BUILTIN_WALIGNR1,
18838 ARM_BUILTIN_WALIGNR2,
18839 ARM_BUILTIN_WALIGNR3,
18840
18841 ARM_BUILTIN_TMIA,
18842 ARM_BUILTIN_TMIAPH,
18843 ARM_BUILTIN_TMIABB,
18844 ARM_BUILTIN_TMIABT,
18845 ARM_BUILTIN_TMIATB,
18846 ARM_BUILTIN_TMIATT,
18847
18848 ARM_BUILTIN_TMOVMSKB,
18849 ARM_BUILTIN_TMOVMSKH,
18850 ARM_BUILTIN_TMOVMSKW,
18851
18852 ARM_BUILTIN_TBCSTB,
18853 ARM_BUILTIN_TBCSTH,
18854 ARM_BUILTIN_TBCSTW,
18855
18856 ARM_BUILTIN_WMADDS,
18857 ARM_BUILTIN_WMADDU,
18858
18859 ARM_BUILTIN_WPACKHSS,
18860 ARM_BUILTIN_WPACKWSS,
18861 ARM_BUILTIN_WPACKDSS,
18862 ARM_BUILTIN_WPACKHUS,
18863 ARM_BUILTIN_WPACKWUS,
18864 ARM_BUILTIN_WPACKDUS,
18865
18866 ARM_BUILTIN_WADDB,
18867 ARM_BUILTIN_WADDH,
18868 ARM_BUILTIN_WADDW,
18869 ARM_BUILTIN_WADDSSB,
18870 ARM_BUILTIN_WADDSSH,
18871 ARM_BUILTIN_WADDSSW,
18872 ARM_BUILTIN_WADDUSB,
18873 ARM_BUILTIN_WADDUSH,
18874 ARM_BUILTIN_WADDUSW,
18875 ARM_BUILTIN_WSUBB,
18876 ARM_BUILTIN_WSUBH,
18877 ARM_BUILTIN_WSUBW,
18878 ARM_BUILTIN_WSUBSSB,
18879 ARM_BUILTIN_WSUBSSH,
18880 ARM_BUILTIN_WSUBSSW,
18881 ARM_BUILTIN_WSUBUSB,
18882 ARM_BUILTIN_WSUBUSH,
18883 ARM_BUILTIN_WSUBUSW,
18884
18885 ARM_BUILTIN_WAND,
18886 ARM_BUILTIN_WANDN,
18887 ARM_BUILTIN_WOR,
18888 ARM_BUILTIN_WXOR,
18889
18890 ARM_BUILTIN_WCMPEQB,
18891 ARM_BUILTIN_WCMPEQH,
18892 ARM_BUILTIN_WCMPEQW,
18893 ARM_BUILTIN_WCMPGTUB,
18894 ARM_BUILTIN_WCMPGTUH,
18895 ARM_BUILTIN_WCMPGTUW,
18896 ARM_BUILTIN_WCMPGTSB,
18897 ARM_BUILTIN_WCMPGTSH,
18898 ARM_BUILTIN_WCMPGTSW,
18899
18900 ARM_BUILTIN_TEXTRMSB,
18901 ARM_BUILTIN_TEXTRMSH,
18902 ARM_BUILTIN_TEXTRMSW,
18903 ARM_BUILTIN_TEXTRMUB,
18904 ARM_BUILTIN_TEXTRMUH,
18905 ARM_BUILTIN_TEXTRMUW,
18906 ARM_BUILTIN_TINSRB,
18907 ARM_BUILTIN_TINSRH,
18908 ARM_BUILTIN_TINSRW,
18909
18910 ARM_BUILTIN_WMAXSW,
18911 ARM_BUILTIN_WMAXSH,
18912 ARM_BUILTIN_WMAXSB,
18913 ARM_BUILTIN_WMAXUW,
18914 ARM_BUILTIN_WMAXUH,
18915 ARM_BUILTIN_WMAXUB,
18916 ARM_BUILTIN_WMINSW,
18917 ARM_BUILTIN_WMINSH,
18918 ARM_BUILTIN_WMINSB,
18919 ARM_BUILTIN_WMINUW,
18920 ARM_BUILTIN_WMINUH,
18921 ARM_BUILTIN_WMINUB,
18922
18923 ARM_BUILTIN_WMULUM,
18924 ARM_BUILTIN_WMULSM,
18925 ARM_BUILTIN_WMULUL,
18926
18927 ARM_BUILTIN_PSADBH,
18928 ARM_BUILTIN_WSHUFH,
18929
18930 ARM_BUILTIN_WSLLH,
18931 ARM_BUILTIN_WSLLW,
18932 ARM_BUILTIN_WSLLD,
18933 ARM_BUILTIN_WSRAH,
18934 ARM_BUILTIN_WSRAW,
18935 ARM_BUILTIN_WSRAD,
18936 ARM_BUILTIN_WSRLH,
18937 ARM_BUILTIN_WSRLW,
18938 ARM_BUILTIN_WSRLD,
18939 ARM_BUILTIN_WRORH,
18940 ARM_BUILTIN_WRORW,
18941 ARM_BUILTIN_WRORD,
18942 ARM_BUILTIN_WSLLHI,
18943 ARM_BUILTIN_WSLLWI,
18944 ARM_BUILTIN_WSLLDI,
18945 ARM_BUILTIN_WSRAHI,
18946 ARM_BUILTIN_WSRAWI,
18947 ARM_BUILTIN_WSRADI,
18948 ARM_BUILTIN_WSRLHI,
18949 ARM_BUILTIN_WSRLWI,
18950 ARM_BUILTIN_WSRLDI,
18951 ARM_BUILTIN_WRORHI,
18952 ARM_BUILTIN_WRORWI,
18953 ARM_BUILTIN_WRORDI,
18954
18955 ARM_BUILTIN_WUNPCKIHB,
18956 ARM_BUILTIN_WUNPCKIHH,
18957 ARM_BUILTIN_WUNPCKIHW,
18958 ARM_BUILTIN_WUNPCKILB,
18959 ARM_BUILTIN_WUNPCKILH,
18960 ARM_BUILTIN_WUNPCKILW,
18961
18962 ARM_BUILTIN_WUNPCKEHSB,
18963 ARM_BUILTIN_WUNPCKEHSH,
18964 ARM_BUILTIN_WUNPCKEHSW,
18965 ARM_BUILTIN_WUNPCKEHUB,
18966 ARM_BUILTIN_WUNPCKEHUH,
18967 ARM_BUILTIN_WUNPCKEHUW,
18968 ARM_BUILTIN_WUNPCKELSB,
18969 ARM_BUILTIN_WUNPCKELSH,
18970 ARM_BUILTIN_WUNPCKELSW,
18971 ARM_BUILTIN_WUNPCKELUB,
18972 ARM_BUILTIN_WUNPCKELUH,
18973 ARM_BUILTIN_WUNPCKELUW,
18974
18975 ARM_BUILTIN_WABSB,
18976 ARM_BUILTIN_WABSH,
18977 ARM_BUILTIN_WABSW,
18978
18979 ARM_BUILTIN_WADDSUBHX,
18980 ARM_BUILTIN_WSUBADDHX,
18981
18982 ARM_BUILTIN_WABSDIFFB,
18983 ARM_BUILTIN_WABSDIFFH,
18984 ARM_BUILTIN_WABSDIFFW,
18985
18986 ARM_BUILTIN_WADDCH,
18987 ARM_BUILTIN_WADDCW,
18988
18989 ARM_BUILTIN_WAVG4,
18990 ARM_BUILTIN_WAVG4R,
18991
18992 ARM_BUILTIN_WMADDSX,
18993 ARM_BUILTIN_WMADDUX,
18994
18995 ARM_BUILTIN_WMADDSN,
18996 ARM_BUILTIN_WMADDUN,
18997
18998 ARM_BUILTIN_WMULWSM,
18999 ARM_BUILTIN_WMULWUM,
19000
19001 ARM_BUILTIN_WMULWSMR,
19002 ARM_BUILTIN_WMULWUMR,
19003
19004 ARM_BUILTIN_WMULWL,
19005
19006 ARM_BUILTIN_WMULSMR,
19007 ARM_BUILTIN_WMULUMR,
19008
19009 ARM_BUILTIN_WQMULM,
19010 ARM_BUILTIN_WQMULMR,
19011
19012 ARM_BUILTIN_WQMULWM,
19013 ARM_BUILTIN_WQMULWMR,
19014
19015 ARM_BUILTIN_WADDBHUSM,
19016 ARM_BUILTIN_WADDBHUSL,
19017
19018 ARM_BUILTIN_WQMIABB,
19019 ARM_BUILTIN_WQMIABT,
19020 ARM_BUILTIN_WQMIATB,
19021 ARM_BUILTIN_WQMIATT,
19022
19023 ARM_BUILTIN_WQMIABBN,
19024 ARM_BUILTIN_WQMIABTN,
19025 ARM_BUILTIN_WQMIATBN,
19026 ARM_BUILTIN_WQMIATTN,
19027
19028 ARM_BUILTIN_WMIABB,
19029 ARM_BUILTIN_WMIABT,
19030 ARM_BUILTIN_WMIATB,
19031 ARM_BUILTIN_WMIATT,
19032
19033 ARM_BUILTIN_WMIABBN,
19034 ARM_BUILTIN_WMIABTN,
19035 ARM_BUILTIN_WMIATBN,
19036 ARM_BUILTIN_WMIATTN,
19037
19038 ARM_BUILTIN_WMIAWBB,
19039 ARM_BUILTIN_WMIAWBT,
19040 ARM_BUILTIN_WMIAWTB,
19041 ARM_BUILTIN_WMIAWTT,
19042
19043 ARM_BUILTIN_WMIAWBBN,
19044 ARM_BUILTIN_WMIAWBTN,
19045 ARM_BUILTIN_WMIAWTBN,
19046 ARM_BUILTIN_WMIAWTTN,
19047
19048 ARM_BUILTIN_WMERGE,
19049
19050 ARM_BUILTIN_THREAD_POINTER,
19051
19052 ARM_BUILTIN_NEON_BASE,
19053
19054 ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
19055 };
19056
19057 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
19058
19059 static void
19060 arm_init_neon_builtins (void)
19061 {
19062 unsigned int i, fcode;
19063 tree decl;
19064
19065 tree neon_intQI_type_node;
19066 tree neon_intHI_type_node;
19067 tree neon_polyQI_type_node;
19068 tree neon_polyHI_type_node;
19069 tree neon_intSI_type_node;
19070 tree neon_intDI_type_node;
19071 tree neon_float_type_node;
19072
19073 tree intQI_pointer_node;
19074 tree intHI_pointer_node;
19075 tree intSI_pointer_node;
19076 tree intDI_pointer_node;
19077 tree float_pointer_node;
19078
19079 tree const_intQI_node;
19080 tree const_intHI_node;
19081 tree const_intSI_node;
19082 tree const_intDI_node;
19083 tree const_float_node;
19084
19085 tree const_intQI_pointer_node;
19086 tree const_intHI_pointer_node;
19087 tree const_intSI_pointer_node;
19088 tree const_intDI_pointer_node;
19089 tree const_float_pointer_node;
19090
19091 tree V8QI_type_node;
19092 tree V4HI_type_node;
19093 tree V2SI_type_node;
19094 tree V2SF_type_node;
19095 tree V16QI_type_node;
19096 tree V8HI_type_node;
19097 tree V4SI_type_node;
19098 tree V4SF_type_node;
19099 tree V2DI_type_node;
19100
19101 tree intUQI_type_node;
19102 tree intUHI_type_node;
19103 tree intUSI_type_node;
19104 tree intUDI_type_node;
19105
19106 tree intEI_type_node;
19107 tree intOI_type_node;
19108 tree intCI_type_node;
19109 tree intXI_type_node;
19110
19111 tree V8QI_pointer_node;
19112 tree V4HI_pointer_node;
19113 tree V2SI_pointer_node;
19114 tree V2SF_pointer_node;
19115 tree V16QI_pointer_node;
19116 tree V8HI_pointer_node;
19117 tree V4SI_pointer_node;
19118 tree V4SF_pointer_node;
19119 tree V2DI_pointer_node;
19120
19121 tree void_ftype_pv8qi_v8qi_v8qi;
19122 tree void_ftype_pv4hi_v4hi_v4hi;
19123 tree void_ftype_pv2si_v2si_v2si;
19124 tree void_ftype_pv2sf_v2sf_v2sf;
19125 tree void_ftype_pdi_di_di;
19126 tree void_ftype_pv16qi_v16qi_v16qi;
19127 tree void_ftype_pv8hi_v8hi_v8hi;
19128 tree void_ftype_pv4si_v4si_v4si;
19129 tree void_ftype_pv4sf_v4sf_v4sf;
19130 tree void_ftype_pv2di_v2di_v2di;
19131
19132 tree reinterp_ftype_dreg[5][5];
19133 tree reinterp_ftype_qreg[5][5];
19134 tree dreg_types[5], qreg_types[5];
19135
19136 /* Create distinguished type nodes for NEON vector element types,
19137 and pointers to values of such types, so we can detect them later. */
19138 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19139 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19140 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19141 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19142 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
19143 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
19144 neon_float_type_node = make_node (REAL_TYPE);
19145 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
19146 layout_type (neon_float_type_node);
19147
19148 /* Define typedefs which exactly correspond to the modes we are basing vector
19149 types on. If you change these names you'll need to change
19150 the table used by arm_mangle_type too. */
19151 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
19152 "__builtin_neon_qi");
19153 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
19154 "__builtin_neon_hi");
19155 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
19156 "__builtin_neon_si");
19157 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
19158 "__builtin_neon_sf");
19159 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
19160 "__builtin_neon_di");
19161 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
19162 "__builtin_neon_poly8");
19163 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
19164 "__builtin_neon_poly16");
19165
19166 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
19167 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
19168 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
19169 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
19170 float_pointer_node = build_pointer_type (neon_float_type_node);
19171
19172 /* Next create constant-qualified versions of the above types. */
19173 const_intQI_node = build_qualified_type (neon_intQI_type_node,
19174 TYPE_QUAL_CONST);
19175 const_intHI_node = build_qualified_type (neon_intHI_type_node,
19176 TYPE_QUAL_CONST);
19177 const_intSI_node = build_qualified_type (neon_intSI_type_node,
19178 TYPE_QUAL_CONST);
19179 const_intDI_node = build_qualified_type (neon_intDI_type_node,
19180 TYPE_QUAL_CONST);
19181 const_float_node = build_qualified_type (neon_float_type_node,
19182 TYPE_QUAL_CONST);
19183
19184 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
19185 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
19186 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
19187 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
19188 const_float_pointer_node = build_pointer_type (const_float_node);
19189
19190 /* Now create vector types based on our NEON element types. */
19191 /* 64-bit vectors. */
19192 V8QI_type_node =
19193 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
19194 V4HI_type_node =
19195 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
19196 V2SI_type_node =
19197 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
19198 V2SF_type_node =
19199 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
19200 /* 128-bit vectors. */
19201 V16QI_type_node =
19202 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
19203 V8HI_type_node =
19204 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
19205 V4SI_type_node =
19206 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
19207 V4SF_type_node =
19208 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
19209 V2DI_type_node =
19210 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
19211
19212 /* Unsigned integer types for various mode sizes. */
19213 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
19214 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
19215 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
19216 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
19217
19218 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
19219 "__builtin_neon_uqi");
19220 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
19221 "__builtin_neon_uhi");
19222 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
19223 "__builtin_neon_usi");
19224 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
19225 "__builtin_neon_udi");
19226
19227 /* Opaque integer types for structures of vectors. */
19228 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
19229 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
19230 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
19231 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
19232
19233 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
19234 "__builtin_neon_ti");
19235 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
19236 "__builtin_neon_ei");
19237 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
19238 "__builtin_neon_oi");
19239 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
19240 "__builtin_neon_ci");
19241 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
19242 "__builtin_neon_xi");
19243
19244 /* Pointers to vector types. */
19245 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
19246 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
19247 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
19248 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
19249 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
19250 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
19251 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
19252 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
19253 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
19254
19255 /* Operations which return results as pairs. */
19256 void_ftype_pv8qi_v8qi_v8qi =
19257 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
19258 V8QI_type_node, NULL);
19259 void_ftype_pv4hi_v4hi_v4hi =
19260 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
19261 V4HI_type_node, NULL);
19262 void_ftype_pv2si_v2si_v2si =
19263 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
19264 V2SI_type_node, NULL);
19265 void_ftype_pv2sf_v2sf_v2sf =
19266 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
19267 V2SF_type_node, NULL);
19268 void_ftype_pdi_di_di =
19269 build_function_type_list (void_type_node, intDI_pointer_node,
19270 neon_intDI_type_node, neon_intDI_type_node, NULL);
19271 void_ftype_pv16qi_v16qi_v16qi =
19272 build_function_type_list (void_type_node, V16QI_pointer_node,
19273 V16QI_type_node, V16QI_type_node, NULL);
19274 void_ftype_pv8hi_v8hi_v8hi =
19275 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
19276 V8HI_type_node, NULL);
19277 void_ftype_pv4si_v4si_v4si =
19278 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
19279 V4SI_type_node, NULL);
19280 void_ftype_pv4sf_v4sf_v4sf =
19281 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
19282 V4SF_type_node, NULL);
19283 void_ftype_pv2di_v2di_v2di =
19284 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
19285 V2DI_type_node, NULL);
19286
19287 dreg_types[0] = V8QI_type_node;
19288 dreg_types[1] = V4HI_type_node;
19289 dreg_types[2] = V2SI_type_node;
19290 dreg_types[3] = V2SF_type_node;
19291 dreg_types[4] = neon_intDI_type_node;
19292
19293 qreg_types[0] = V16QI_type_node;
19294 qreg_types[1] = V8HI_type_node;
19295 qreg_types[2] = V4SI_type_node;
19296 qreg_types[3] = V4SF_type_node;
19297 qreg_types[4] = V2DI_type_node;
19298
19299 for (i = 0; i < 5; i++)
19300 {
19301 int j;
19302 for (j = 0; j < 5; j++)
19303 {
19304 reinterp_ftype_dreg[i][j]
19305 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
19306 reinterp_ftype_qreg[i][j]
19307 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
19308 }
19309 }
19310
19311 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
19312 i < ARRAY_SIZE (neon_builtin_data);
19313 i++, fcode++)
19314 {
19315 neon_builtin_datum *d = &neon_builtin_data[i];
19316
19317 const char* const modenames[] = {
19318 "v8qi", "v4hi", "v2si", "v2sf", "di",
19319 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
19320 "ti", "ei", "oi"
19321 };
19322 char namebuf[60];
19323 tree ftype = NULL;
19324 int is_load = 0, is_store = 0;
19325
19326 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
19327
19328 d->fcode = fcode;
19329
19330 switch (d->itype)
19331 {
19332 case NEON_LOAD1:
19333 case NEON_LOAD1LANE:
19334 case NEON_LOADSTRUCT:
19335 case NEON_LOADSTRUCTLANE:
19336 is_load = 1;
19337 /* Fall through. */
19338 case NEON_STORE1:
19339 case NEON_STORE1LANE:
19340 case NEON_STORESTRUCT:
19341 case NEON_STORESTRUCTLANE:
19342 if (!is_load)
19343 is_store = 1;
19344 /* Fall through. */
19345 case NEON_UNOP:
19346 case NEON_BINOP:
19347 case NEON_LOGICBINOP:
19348 case NEON_SHIFTINSERT:
19349 case NEON_TERNOP:
19350 case NEON_GETLANE:
19351 case NEON_SETLANE:
19352 case NEON_CREATE:
19353 case NEON_DUP:
19354 case NEON_DUPLANE:
19355 case NEON_SHIFTIMM:
19356 case NEON_SHIFTACC:
19357 case NEON_COMBINE:
19358 case NEON_SPLIT:
19359 case NEON_CONVERT:
19360 case NEON_FIXCONV:
19361 case NEON_LANEMUL:
19362 case NEON_LANEMULL:
19363 case NEON_LANEMULH:
19364 case NEON_LANEMAC:
19365 case NEON_SCALARMUL:
19366 case NEON_SCALARMULL:
19367 case NEON_SCALARMULH:
19368 case NEON_SCALARMAC:
19369 case NEON_SELECT:
19370 case NEON_VTBL:
19371 case NEON_VTBX:
19372 {
19373 int k;
19374 tree return_type = void_type_node, args = void_list_node;
19375
19376 /* Build a function type directly from the insn_data for
19377 this builtin. The build_function_type() function takes
19378 care of removing duplicates for us. */
19379 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
19380 {
19381 tree eltype;
19382
19383 if (is_load && k == 1)
19384 {
19385 /* Neon load patterns always have the memory
19386 operand in the operand 1 position. */
19387 gcc_assert (insn_data[d->code].operand[k].predicate
19388 == neon_struct_operand);
19389
19390 switch (d->mode)
19391 {
19392 case T_V8QI:
19393 case T_V16QI:
19394 eltype = const_intQI_pointer_node;
19395 break;
19396
19397 case T_V4HI:
19398 case T_V8HI:
19399 eltype = const_intHI_pointer_node;
19400 break;
19401
19402 case T_V2SI:
19403 case T_V4SI:
19404 eltype = const_intSI_pointer_node;
19405 break;
19406
19407 case T_V2SF:
19408 case T_V4SF:
19409 eltype = const_float_pointer_node;
19410 break;
19411
19412 case T_DI:
19413 case T_V2DI:
19414 eltype = const_intDI_pointer_node;
19415 break;
19416
19417 default: gcc_unreachable ();
19418 }
19419 }
19420 else if (is_store && k == 0)
19421 {
19422 /* Similarly, Neon store patterns use operand 0 as
19423 the memory location to store to. */
19424 gcc_assert (insn_data[d->code].operand[k].predicate
19425 == neon_struct_operand);
19426
19427 switch (d->mode)
19428 {
19429 case T_V8QI:
19430 case T_V16QI:
19431 eltype = intQI_pointer_node;
19432 break;
19433
19434 case T_V4HI:
19435 case T_V8HI:
19436 eltype = intHI_pointer_node;
19437 break;
19438
19439 case T_V2SI:
19440 case T_V4SI:
19441 eltype = intSI_pointer_node;
19442 break;
19443
19444 case T_V2SF:
19445 case T_V4SF:
19446 eltype = float_pointer_node;
19447 break;
19448
19449 case T_DI:
19450 case T_V2DI:
19451 eltype = intDI_pointer_node;
19452 break;
19453
19454 default: gcc_unreachable ();
19455 }
19456 }
19457 else
19458 {
19459 switch (insn_data[d->code].operand[k].mode)
19460 {
19461 case VOIDmode: eltype = void_type_node; break;
19462 /* Scalars. */
19463 case QImode: eltype = neon_intQI_type_node; break;
19464 case HImode: eltype = neon_intHI_type_node; break;
19465 case SImode: eltype = neon_intSI_type_node; break;
19466 case SFmode: eltype = neon_float_type_node; break;
19467 case DImode: eltype = neon_intDI_type_node; break;
19468 case TImode: eltype = intTI_type_node; break;
19469 case EImode: eltype = intEI_type_node; break;
19470 case OImode: eltype = intOI_type_node; break;
19471 case CImode: eltype = intCI_type_node; break;
19472 case XImode: eltype = intXI_type_node; break;
19473 /* 64-bit vectors. */
19474 case V8QImode: eltype = V8QI_type_node; break;
19475 case V4HImode: eltype = V4HI_type_node; break;
19476 case V2SImode: eltype = V2SI_type_node; break;
19477 case V2SFmode: eltype = V2SF_type_node; break;
19478 /* 128-bit vectors. */
19479 case V16QImode: eltype = V16QI_type_node; break;
19480 case V8HImode: eltype = V8HI_type_node; break;
19481 case V4SImode: eltype = V4SI_type_node; break;
19482 case V4SFmode: eltype = V4SF_type_node; break;
19483 case V2DImode: eltype = V2DI_type_node; break;
19484 default: gcc_unreachable ();
19485 }
19486 }
19487
19488 if (k == 0 && !is_store)
19489 return_type = eltype;
19490 else
19491 args = tree_cons (NULL_TREE, eltype, args);
19492 }
19493
19494 ftype = build_function_type (return_type, args);
19495 }
19496 break;
19497
19498 case NEON_RESULTPAIR:
19499 {
19500 switch (insn_data[d->code].operand[1].mode)
19501 {
19502 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
19503 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
19504 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
19505 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
19506 case DImode: ftype = void_ftype_pdi_di_di; break;
19507 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
19508 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
19509 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
19510 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
19511 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
19512 default: gcc_unreachable ();
19513 }
19514 }
19515 break;
19516
19517 case NEON_REINTERP:
19518 {
19519 /* We iterate over 5 doubleword types, then 5 quadword
19520 types. */
19521 int rhs = d->mode % 5;
19522 switch (insn_data[d->code].operand[0].mode)
19523 {
19524 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
19525 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
19526 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
19527 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
19528 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
19529 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
19530 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
19531 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
19532 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
19533 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
19534 default: gcc_unreachable ();
19535 }
19536 }
19537 break;
19538
19539 default:
19540 gcc_unreachable ();
19541 }
19542
19543 gcc_assert (ftype != NULL);
19544
19545 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
19546
19547 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
19548 NULL_TREE);
19549 arm_builtin_decls[fcode] = decl;
19550 }
19551 }
19552
19553 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
19554 do \
19555 { \
19556 if ((MASK) & insn_flags) \
19557 { \
19558 tree bdecl; \
19559 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
19560 BUILT_IN_MD, NULL, NULL_TREE); \
19561 arm_builtin_decls[CODE] = bdecl; \
19562 } \
19563 } \
19564 while (0)
19565
19566 struct builtin_description
19567 {
19568 const unsigned int mask;
19569 const enum insn_code icode;
19570 const char * const name;
19571 const enum arm_builtins code;
19572 const enum rtx_code comparison;
19573 const unsigned int flag;
19574 };
19575
19576 static const struct builtin_description bdesc_2arg[] =
19577 {
19578 #define IWMMXT_BUILTIN(code, string, builtin) \
19579 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
19580 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19581
19582 #define IWMMXT2_BUILTIN(code, string, builtin) \
19583 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
19584 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19585
19586 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
19587 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
19588 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
19589 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
19590 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
19591 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
19592 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
19593 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
19594 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
19595 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
19596 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
19597 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
19598 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
19599 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
19600 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
19601 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
19602 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
19603 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
19604 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
19605 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
19606 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
19607 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
19608 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
19609 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
19610 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
19611 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
19612 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
19613 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
19614 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
19615 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
19616 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
19617 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
19618 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
19619 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
19620 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
19621 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
19622 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
19623 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
19624 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
19625 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
19626 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
19627 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
19628 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
19629 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
19630 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
19631 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
19632 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
19633 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
19634 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
19635 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
19636 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
19637 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
19638 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
19639 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
19640 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
19641 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
19642 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
19643 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
19644 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
19645 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
19646 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
19647 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
19648 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
19649 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
19650 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
19651 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
19652 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
19653 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
19654 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
19655 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
19656 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
19657 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
19658 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
19659 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
19660 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
19661 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
19662 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
19663 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
19664
19665 #define IWMMXT_BUILTIN2(code, builtin) \
19666 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19667
19668 #define IWMMXT2_BUILTIN2(code, builtin) \
19669 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19670
19671 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
19672 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
19673 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
19674 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
19675 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
19676 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
19677 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
19678 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
19679 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
19680 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
19681 };
19682
19683 static const struct builtin_description bdesc_1arg[] =
19684 {
19685 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
19686 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
19687 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
19688 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
19689 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
19690 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
19691 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
19692 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
19693 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
19694 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
19695 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
19696 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
19697 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
19698 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
19699 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
19700 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
19701 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
19702 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
19703 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
19704 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
19705 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
19706 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
19707 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
19708 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
19709 };
19710
19711 /* Set up all the iWMMXt builtins. This is not called if
19712 TARGET_IWMMXT is zero. */
19713
19714 static void
19715 arm_init_iwmmxt_builtins (void)
19716 {
19717 const struct builtin_description * d;
19718 size_t i;
19719
19720 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
19721 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
19722 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
19723
19724 tree v8qi_ftype_v8qi_v8qi_int
19725 = build_function_type_list (V8QI_type_node,
19726 V8QI_type_node, V8QI_type_node,
19727 integer_type_node, NULL_TREE);
19728 tree v4hi_ftype_v4hi_int
19729 = build_function_type_list (V4HI_type_node,
19730 V4HI_type_node, integer_type_node, NULL_TREE);
19731 tree v2si_ftype_v2si_int
19732 = build_function_type_list (V2SI_type_node,
19733 V2SI_type_node, integer_type_node, NULL_TREE);
19734 tree v2si_ftype_di_di
19735 = build_function_type_list (V2SI_type_node,
19736 long_long_integer_type_node,
19737 long_long_integer_type_node,
19738 NULL_TREE);
19739 tree di_ftype_di_int
19740 = build_function_type_list (long_long_integer_type_node,
19741 long_long_integer_type_node,
19742 integer_type_node, NULL_TREE);
19743 tree di_ftype_di_int_int
19744 = build_function_type_list (long_long_integer_type_node,
19745 long_long_integer_type_node,
19746 integer_type_node,
19747 integer_type_node, NULL_TREE);
19748 tree int_ftype_v8qi
19749 = build_function_type_list (integer_type_node,
19750 V8QI_type_node, NULL_TREE);
19751 tree int_ftype_v4hi
19752 = build_function_type_list (integer_type_node,
19753 V4HI_type_node, NULL_TREE);
19754 tree int_ftype_v2si
19755 = build_function_type_list (integer_type_node,
19756 V2SI_type_node, NULL_TREE);
19757 tree int_ftype_v8qi_int
19758 = build_function_type_list (integer_type_node,
19759 V8QI_type_node, integer_type_node, NULL_TREE);
19760 tree int_ftype_v4hi_int
19761 = build_function_type_list (integer_type_node,
19762 V4HI_type_node, integer_type_node, NULL_TREE);
19763 tree int_ftype_v2si_int
19764 = build_function_type_list (integer_type_node,
19765 V2SI_type_node, integer_type_node, NULL_TREE);
19766 tree v8qi_ftype_v8qi_int_int
19767 = build_function_type_list (V8QI_type_node,
19768 V8QI_type_node, integer_type_node,
19769 integer_type_node, NULL_TREE);
19770 tree v4hi_ftype_v4hi_int_int
19771 = build_function_type_list (V4HI_type_node,
19772 V4HI_type_node, integer_type_node,
19773 integer_type_node, NULL_TREE);
19774 tree v2si_ftype_v2si_int_int
19775 = build_function_type_list (V2SI_type_node,
19776 V2SI_type_node, integer_type_node,
19777 integer_type_node, NULL_TREE);
19778 /* Miscellaneous. */
19779 tree v8qi_ftype_v4hi_v4hi
19780 = build_function_type_list (V8QI_type_node,
19781 V4HI_type_node, V4HI_type_node, NULL_TREE);
19782 tree v4hi_ftype_v2si_v2si
19783 = build_function_type_list (V4HI_type_node,
19784 V2SI_type_node, V2SI_type_node, NULL_TREE);
19785 tree v8qi_ftype_v4hi_v8qi
19786 = build_function_type_list (V8QI_type_node,
19787 V4HI_type_node, V8QI_type_node, NULL_TREE);
19788 tree v2si_ftype_v4hi_v4hi
19789 = build_function_type_list (V2SI_type_node,
19790 V4HI_type_node, V4HI_type_node, NULL_TREE);
19791 tree v2si_ftype_v8qi_v8qi
19792 = build_function_type_list (V2SI_type_node,
19793 V8QI_type_node, V8QI_type_node, NULL_TREE);
19794 tree v4hi_ftype_v4hi_di
19795 = build_function_type_list (V4HI_type_node,
19796 V4HI_type_node, long_long_integer_type_node,
19797 NULL_TREE);
19798 tree v2si_ftype_v2si_di
19799 = build_function_type_list (V2SI_type_node,
19800 V2SI_type_node, long_long_integer_type_node,
19801 NULL_TREE);
19802 tree di_ftype_void
19803 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
19804 tree int_ftype_void
19805 = build_function_type_list (integer_type_node, NULL_TREE);
19806 tree di_ftype_v8qi
19807 = build_function_type_list (long_long_integer_type_node,
19808 V8QI_type_node, NULL_TREE);
19809 tree di_ftype_v4hi
19810 = build_function_type_list (long_long_integer_type_node,
19811 V4HI_type_node, NULL_TREE);
19812 tree di_ftype_v2si
19813 = build_function_type_list (long_long_integer_type_node,
19814 V2SI_type_node, NULL_TREE);
19815 tree v2si_ftype_v4hi
19816 = build_function_type_list (V2SI_type_node,
19817 V4HI_type_node, NULL_TREE);
19818 tree v4hi_ftype_v8qi
19819 = build_function_type_list (V4HI_type_node,
19820 V8QI_type_node, NULL_TREE);
19821 tree v8qi_ftype_v8qi
19822 = build_function_type_list (V8QI_type_node,
19823 V8QI_type_node, NULL_TREE);
19824 tree v4hi_ftype_v4hi
19825 = build_function_type_list (V4HI_type_node,
19826 V4HI_type_node, NULL_TREE);
19827 tree v2si_ftype_v2si
19828 = build_function_type_list (V2SI_type_node,
19829 V2SI_type_node, NULL_TREE);
19830
19831 tree di_ftype_di_v4hi_v4hi
19832 = build_function_type_list (long_long_unsigned_type_node,
19833 long_long_unsigned_type_node,
19834 V4HI_type_node, V4HI_type_node,
19835 NULL_TREE);
19836
19837 tree di_ftype_v4hi_v4hi
19838 = build_function_type_list (long_long_unsigned_type_node,
19839 V4HI_type_node,V4HI_type_node,
19840 NULL_TREE);
19841
19842 tree v2si_ftype_v2si_v4hi_v4hi
19843 = build_function_type_list (V2SI_type_node,
19844 V2SI_type_node, V4HI_type_node,
19845 V4HI_type_node, NULL_TREE);
19846
19847 tree v2si_ftype_v2si_v8qi_v8qi
19848 = build_function_type_list (V2SI_type_node,
19849 V2SI_type_node, V8QI_type_node,
19850 V8QI_type_node, NULL_TREE);
19851
19852 tree di_ftype_di_v2si_v2si
19853 = build_function_type_list (long_long_unsigned_type_node,
19854 long_long_unsigned_type_node,
19855 V2SI_type_node, V2SI_type_node,
19856 NULL_TREE);
19857
19858 tree di_ftype_di_di_int
19859 = build_function_type_list (long_long_unsigned_type_node,
19860 long_long_unsigned_type_node,
19861 long_long_unsigned_type_node,
19862 integer_type_node, NULL_TREE);
19863
19864 tree void_ftype_int
19865 = build_function_type_list (void_type_node,
19866 integer_type_node, NULL_TREE);
19867
19868 tree v8qi_ftype_char
19869 = build_function_type_list (V8QI_type_node,
19870 signed_char_type_node, NULL_TREE);
19871
19872 tree v4hi_ftype_short
19873 = build_function_type_list (V4HI_type_node,
19874 short_integer_type_node, NULL_TREE);
19875
19876 tree v2si_ftype_int
19877 = build_function_type_list (V2SI_type_node,
19878 integer_type_node, NULL_TREE);
19879
19880 /* Normal vector binops. */
19881 tree v8qi_ftype_v8qi_v8qi
19882 = build_function_type_list (V8QI_type_node,
19883 V8QI_type_node, V8QI_type_node, NULL_TREE);
19884 tree v4hi_ftype_v4hi_v4hi
19885 = build_function_type_list (V4HI_type_node,
19886 V4HI_type_node,V4HI_type_node, NULL_TREE);
19887 tree v2si_ftype_v2si_v2si
19888 = build_function_type_list (V2SI_type_node,
19889 V2SI_type_node, V2SI_type_node, NULL_TREE);
19890 tree di_ftype_di_di
19891 = build_function_type_list (long_long_unsigned_type_node,
19892 long_long_unsigned_type_node,
19893 long_long_unsigned_type_node,
19894 NULL_TREE);
19895
19896 /* Add all builtins that are more or less simple operations on two
19897 operands. */
19898 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19899 {
19900 /* Use one of the operands; the target can have a different mode for
19901 mask-generating compares. */
19902 enum machine_mode mode;
19903 tree type;
19904
19905 if (d->name == 0)
19906 continue;
19907
19908 mode = insn_data[d->icode].operand[1].mode;
19909
19910 switch (mode)
19911 {
19912 case V8QImode:
19913 type = v8qi_ftype_v8qi_v8qi;
19914 break;
19915 case V4HImode:
19916 type = v4hi_ftype_v4hi_v4hi;
19917 break;
19918 case V2SImode:
19919 type = v2si_ftype_v2si_v2si;
19920 break;
19921 case DImode:
19922 type = di_ftype_di_di;
19923 break;
19924
19925 default:
19926 gcc_unreachable ();
19927 }
19928
19929 def_mbuiltin (d->mask, d->name, type, d->code);
19930 }
19931
19932 /* Add the remaining MMX insns with somewhat more complicated types. */
19933 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
19934 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
19935 ARM_BUILTIN_ ## CODE)
19936
19937 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
19938 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
19939 ARM_BUILTIN_ ## CODE)
19940
19941 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
19942 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
19943 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
19944 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
19945 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
19946 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
19947 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
19948 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
19949 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
19950
19951 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
19952 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
19953 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
19954 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
19955 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
19956 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
19957
19958 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
19959 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
19960 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
19961 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
19962 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
19963 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
19964
19965 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
19966 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
19967 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
19968 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
19969 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
19970 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
19971
19972 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
19973 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
19974 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
19975 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
19976 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
19977 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
19978
19979 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
19980
19981 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
19982 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
19983 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
19984 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
19985 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
19986 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
19987 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
19988 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
19989 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
19990 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
19991
19992 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
19993 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
19994 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
19995 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
19996 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
19997 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
19998 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
19999 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
20000 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
20001
20002 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
20003 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
20004 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
20005
20006 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
20007 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
20008 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
20009
20010 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
20011 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
20012
20013 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
20014 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
20015 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
20016 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
20017 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
20018 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
20019
20020 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
20021 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
20022 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
20023 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
20024 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
20025 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
20026 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
20027 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
20028 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
20029 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
20030 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
20031 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
20032
20033 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
20034 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
20035 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
20036 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
20037
20038 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
20039 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
20040 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
20041 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
20042 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
20043 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
20044 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
20045
20046 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
20047 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
20048 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
20049
20050 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
20051 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
20052 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
20053 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
20054
20055 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
20056 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
20057 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
20058 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
20059
20060 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
20061 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
20062 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
20063 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
20064
20065 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
20066 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
20067 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
20068 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
20069
20070 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
20071 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
20072 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
20073 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
20074
20075 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
20076 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
20077 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
20078 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
20079
20080 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
20081
20082 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
20083 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
20084 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
20085
20086 #undef iwmmx_mbuiltin
20087 #undef iwmmx2_mbuiltin
20088 }
20089
20090 static void
20091 arm_init_tls_builtins (void)
20092 {
20093 tree ftype, decl;
20094
20095 ftype = build_function_type (ptr_type_node, void_list_node);
20096 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
20097 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
20098 NULL, NULL_TREE);
20099 TREE_NOTHROW (decl) = 1;
20100 TREE_READONLY (decl) = 1;
20101 arm_builtin_decls[ARM_BUILTIN_THREAD_POINTER] = decl;
20102 }
20103
20104 static void
20105 arm_init_fp16_builtins (void)
20106 {
20107 tree fp16_type = make_node (REAL_TYPE);
20108 TYPE_PRECISION (fp16_type) = 16;
20109 layout_type (fp16_type);
20110 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
20111 }
20112
20113 static void
20114 arm_init_builtins (void)
20115 {
20116 arm_init_tls_builtins ();
20117
20118 if (TARGET_REALLY_IWMMXT)
20119 arm_init_iwmmxt_builtins ();
20120
20121 if (TARGET_NEON)
20122 arm_init_neon_builtins ();
20123
20124 if (arm_fp16_format)
20125 arm_init_fp16_builtins ();
20126 }
20127
20128 /* Return the ARM builtin for CODE. */
20129
20130 static tree
20131 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
20132 {
20133 if (code >= ARM_BUILTIN_MAX)
20134 return error_mark_node;
20135
20136 return arm_builtin_decls[code];
20137 }
20138
20139 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20140
20141 static const char *
20142 arm_invalid_parameter_type (const_tree t)
20143 {
20144 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20145 return N_("function parameters cannot have __fp16 type");
20146 return NULL;
20147 }
20148
20149 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20150
20151 static const char *
20152 arm_invalid_return_type (const_tree t)
20153 {
20154 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20155 return N_("functions cannot return __fp16 type");
20156 return NULL;
20157 }
20158
20159 /* Implement TARGET_PROMOTED_TYPE. */
20160
20161 static tree
20162 arm_promoted_type (const_tree t)
20163 {
20164 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20165 return float_type_node;
20166 return NULL_TREE;
20167 }
20168
20169 /* Implement TARGET_CONVERT_TO_TYPE.
20170 Specifically, this hook implements the peculiarity of the ARM
20171 half-precision floating-point C semantics that requires conversions between
20172 __fp16 to or from double to do an intermediate conversion to float. */
20173
20174 static tree
20175 arm_convert_to_type (tree type, tree expr)
20176 {
20177 tree fromtype = TREE_TYPE (expr);
20178 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
20179 return NULL_TREE;
20180 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
20181 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
20182 return convert (type, convert (float_type_node, expr));
20183 return NULL_TREE;
20184 }
20185
20186 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
20187 This simply adds HFmode as a supported mode; even though we don't
20188 implement arithmetic on this type directly, it's supported by
20189 optabs conversions, much the way the double-word arithmetic is
20190 special-cased in the default hook. */
20191
20192 static bool
20193 arm_scalar_mode_supported_p (enum machine_mode mode)
20194 {
20195 if (mode == HFmode)
20196 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
20197 else if (ALL_FIXED_POINT_MODE_P (mode))
20198 return true;
20199 else
20200 return default_scalar_mode_supported_p (mode);
20201 }
20202
20203 /* Errors in the source file can cause expand_expr to return const0_rtx
20204 where we expect a vector. To avoid crashing, use one of the vector
20205 clear instructions. */
20206
20207 static rtx
20208 safe_vector_operand (rtx x, enum machine_mode mode)
20209 {
20210 if (x != const0_rtx)
20211 return x;
20212 x = gen_reg_rtx (mode);
20213
20214 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
20215 : gen_rtx_SUBREG (DImode, x, 0)));
20216 return x;
20217 }
20218
20219 /* Subroutine of arm_expand_builtin to take care of binop insns. */
20220
20221 static rtx
20222 arm_expand_binop_builtin (enum insn_code icode,
20223 tree exp, rtx target)
20224 {
20225 rtx pat;
20226 tree arg0 = CALL_EXPR_ARG (exp, 0);
20227 tree arg1 = CALL_EXPR_ARG (exp, 1);
20228 rtx op0 = expand_normal (arg0);
20229 rtx op1 = expand_normal (arg1);
20230 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20231 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20232 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20233
20234 if (VECTOR_MODE_P (mode0))
20235 op0 = safe_vector_operand (op0, mode0);
20236 if (VECTOR_MODE_P (mode1))
20237 op1 = safe_vector_operand (op1, mode1);
20238
20239 if (! target
20240 || GET_MODE (target) != tmode
20241 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20242 target = gen_reg_rtx (tmode);
20243
20244 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
20245 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
20246
20247 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20248 op0 = copy_to_mode_reg (mode0, op0);
20249 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20250 op1 = copy_to_mode_reg (mode1, op1);
20251
20252 pat = GEN_FCN (icode) (target, op0, op1);
20253 if (! pat)
20254 return 0;
20255 emit_insn (pat);
20256 return target;
20257 }
20258
20259 /* Subroutine of arm_expand_builtin to take care of unop insns. */
20260
20261 static rtx
20262 arm_expand_unop_builtin (enum insn_code icode,
20263 tree exp, rtx target, int do_load)
20264 {
20265 rtx pat;
20266 tree arg0 = CALL_EXPR_ARG (exp, 0);
20267 rtx op0 = expand_normal (arg0);
20268 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20269 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20270
20271 if (! target
20272 || GET_MODE (target) != tmode
20273 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20274 target = gen_reg_rtx (tmode);
20275 if (do_load)
20276 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20277 else
20278 {
20279 if (VECTOR_MODE_P (mode0))
20280 op0 = safe_vector_operand (op0, mode0);
20281
20282 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20283 op0 = copy_to_mode_reg (mode0, op0);
20284 }
20285
20286 pat = GEN_FCN (icode) (target, op0);
20287 if (! pat)
20288 return 0;
20289 emit_insn (pat);
20290 return target;
20291 }
20292
20293 typedef enum {
20294 NEON_ARG_COPY_TO_REG,
20295 NEON_ARG_CONSTANT,
20296 NEON_ARG_MEMORY,
20297 NEON_ARG_STOP
20298 } builtin_arg;
20299
20300 #define NEON_MAX_BUILTIN_ARGS 5
20301
20302 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
20303 and return an expression for the accessed memory.
20304
20305 The intrinsic function operates on a block of registers that has
20306 mode REG_MODE. This block contains vectors of type TYPE_MODE.
20307 The function references the memory at EXP in mode MEM_MODE;
20308 this mode may be BLKmode if no more suitable mode is available. */
20309
20310 static tree
20311 neon_dereference_pointer (tree exp, enum machine_mode mem_mode,
20312 enum machine_mode reg_mode,
20313 neon_builtin_type_mode type_mode)
20314 {
20315 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
20316 tree elem_type, upper_bound, array_type;
20317
20318 /* Work out the size of the register block in bytes. */
20319 reg_size = GET_MODE_SIZE (reg_mode);
20320
20321 /* Work out the size of each vector in bytes. */
20322 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
20323 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
20324
20325 /* Work out how many vectors there are. */
20326 gcc_assert (reg_size % vector_size == 0);
20327 nvectors = reg_size / vector_size;
20328
20329 /* Work out how many elements are being loaded or stored.
20330 MEM_MODE == REG_MODE implies a one-to-one mapping between register
20331 and memory elements; anything else implies a lane load or store. */
20332 if (mem_mode == reg_mode)
20333 nelems = vector_size * nvectors;
20334 else
20335 nelems = nvectors;
20336
20337 /* Work out the type of each element. */
20338 gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp)));
20339 elem_type = TREE_TYPE (TREE_TYPE (exp));
20340
20341 /* Create a type that describes the full access. */
20342 upper_bound = build_int_cst (size_type_node, nelems - 1);
20343 array_type = build_array_type (elem_type, build_index_type (upper_bound));
20344
20345 /* Dereference EXP using that type. */
20346 return fold_build2 (MEM_REF, array_type, exp,
20347 build_int_cst (build_pointer_type (array_type), 0));
20348 }
20349
20350 /* Expand a Neon builtin. */
20351 static rtx
20352 arm_expand_neon_args (rtx target, int icode, int have_retval,
20353 neon_builtin_type_mode type_mode,
20354 tree exp, ...)
20355 {
20356 va_list ap;
20357 rtx pat;
20358 tree arg[NEON_MAX_BUILTIN_ARGS];
20359 rtx op[NEON_MAX_BUILTIN_ARGS];
20360 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20361 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
20362 enum machine_mode other_mode;
20363 int argc = 0;
20364 int opno;
20365
20366 if (have_retval
20367 && (!target
20368 || GET_MODE (target) != tmode
20369 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
20370 target = gen_reg_rtx (tmode);
20371
20372 va_start (ap, exp);
20373
20374 for (;;)
20375 {
20376 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
20377
20378 if (thisarg == NEON_ARG_STOP)
20379 break;
20380 else
20381 {
20382 opno = argc + have_retval;
20383 mode[argc] = insn_data[icode].operand[opno].mode;
20384 arg[argc] = CALL_EXPR_ARG (exp, argc);
20385 if (thisarg == NEON_ARG_MEMORY)
20386 {
20387 other_mode = insn_data[icode].operand[1 - opno].mode;
20388 arg[argc] = neon_dereference_pointer (arg[argc], mode[argc],
20389 other_mode, type_mode);
20390 }
20391 op[argc] = expand_normal (arg[argc]);
20392
20393 switch (thisarg)
20394 {
20395 case NEON_ARG_COPY_TO_REG:
20396 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
20397 if (!(*insn_data[icode].operand[opno].predicate)
20398 (op[argc], mode[argc]))
20399 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
20400 break;
20401
20402 case NEON_ARG_CONSTANT:
20403 /* FIXME: This error message is somewhat unhelpful. */
20404 if (!(*insn_data[icode].operand[opno].predicate)
20405 (op[argc], mode[argc]))
20406 error ("argument must be a constant");
20407 break;
20408
20409 case NEON_ARG_MEMORY:
20410 gcc_assert (MEM_P (op[argc]));
20411 PUT_MODE (op[argc], mode[argc]);
20412 /* ??? arm_neon.h uses the same built-in functions for signed
20413 and unsigned accesses, casting where necessary. This isn't
20414 alias safe. */
20415 set_mem_alias_set (op[argc], 0);
20416 if (!(*insn_data[icode].operand[opno].predicate)
20417 (op[argc], mode[argc]))
20418 op[argc] = (replace_equiv_address
20419 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
20420 break;
20421
20422 case NEON_ARG_STOP:
20423 gcc_unreachable ();
20424 }
20425
20426 argc++;
20427 }
20428 }
20429
20430 va_end (ap);
20431
20432 if (have_retval)
20433 switch (argc)
20434 {
20435 case 1:
20436 pat = GEN_FCN (icode) (target, op[0]);
20437 break;
20438
20439 case 2:
20440 pat = GEN_FCN (icode) (target, op[0], op[1]);
20441 break;
20442
20443 case 3:
20444 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
20445 break;
20446
20447 case 4:
20448 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
20449 break;
20450
20451 case 5:
20452 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
20453 break;
20454
20455 default:
20456 gcc_unreachable ();
20457 }
20458 else
20459 switch (argc)
20460 {
20461 case 1:
20462 pat = GEN_FCN (icode) (op[0]);
20463 break;
20464
20465 case 2:
20466 pat = GEN_FCN (icode) (op[0], op[1]);
20467 break;
20468
20469 case 3:
20470 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
20471 break;
20472
20473 case 4:
20474 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
20475 break;
20476
20477 case 5:
20478 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
20479 break;
20480
20481 default:
20482 gcc_unreachable ();
20483 }
20484
20485 if (!pat)
20486 return 0;
20487
20488 emit_insn (pat);
20489
20490 return target;
20491 }
20492
20493 /* Expand a Neon builtin. These are "special" because they don't have symbolic
20494 constants defined per-instruction or per instruction-variant. Instead, the
20495 required info is looked up in the table neon_builtin_data. */
20496 static rtx
20497 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
20498 {
20499 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
20500 neon_itype itype = d->itype;
20501 enum insn_code icode = d->code;
20502 neon_builtin_type_mode type_mode = d->mode;
20503
20504 switch (itype)
20505 {
20506 case NEON_UNOP:
20507 case NEON_CONVERT:
20508 case NEON_DUPLANE:
20509 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20510 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20511
20512 case NEON_BINOP:
20513 case NEON_SETLANE:
20514 case NEON_SCALARMUL:
20515 case NEON_SCALARMULL:
20516 case NEON_SCALARMULH:
20517 case NEON_SHIFTINSERT:
20518 case NEON_LOGICBINOP:
20519 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20520 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20521 NEON_ARG_STOP);
20522
20523 case NEON_TERNOP:
20524 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20525 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20526 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20527
20528 case NEON_GETLANE:
20529 case NEON_FIXCONV:
20530 case NEON_SHIFTIMM:
20531 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20532 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
20533 NEON_ARG_STOP);
20534
20535 case NEON_CREATE:
20536 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20537 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20538
20539 case NEON_DUP:
20540 case NEON_SPLIT:
20541 case NEON_REINTERP:
20542 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20543 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20544
20545 case NEON_COMBINE:
20546 case NEON_VTBL:
20547 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20548 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20549
20550 case NEON_RESULTPAIR:
20551 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20552 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20553 NEON_ARG_STOP);
20554
20555 case NEON_LANEMUL:
20556 case NEON_LANEMULL:
20557 case NEON_LANEMULH:
20558 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20559 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20560 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20561
20562 case NEON_LANEMAC:
20563 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20564 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20565 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20566
20567 case NEON_SHIFTACC:
20568 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20569 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20570 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20571
20572 case NEON_SCALARMAC:
20573 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20574 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20575 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20576
20577 case NEON_SELECT:
20578 case NEON_VTBX:
20579 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20580 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20581 NEON_ARG_STOP);
20582
20583 case NEON_LOAD1:
20584 case NEON_LOADSTRUCT:
20585 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20586 NEON_ARG_MEMORY, NEON_ARG_STOP);
20587
20588 case NEON_LOAD1LANE:
20589 case NEON_LOADSTRUCTLANE:
20590 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20591 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20592 NEON_ARG_STOP);
20593
20594 case NEON_STORE1:
20595 case NEON_STORESTRUCT:
20596 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20597 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20598
20599 case NEON_STORE1LANE:
20600 case NEON_STORESTRUCTLANE:
20601 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20602 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20603 NEON_ARG_STOP);
20604 }
20605
20606 gcc_unreachable ();
20607 }
20608
20609 /* Emit code to reinterpret one Neon type as another, without altering bits. */
20610 void
20611 neon_reinterpret (rtx dest, rtx src)
20612 {
20613 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
20614 }
20615
20616 /* Emit code to place a Neon pair result in memory locations (with equal
20617 registers). */
20618 void
20619 neon_emit_pair_result_insn (enum machine_mode mode,
20620 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
20621 rtx op1, rtx op2)
20622 {
20623 rtx mem = gen_rtx_MEM (mode, destaddr);
20624 rtx tmp1 = gen_reg_rtx (mode);
20625 rtx tmp2 = gen_reg_rtx (mode);
20626
20627 emit_insn (intfn (tmp1, op1, op2, tmp2));
20628
20629 emit_move_insn (mem, tmp1);
20630 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
20631 emit_move_insn (mem, tmp2);
20632 }
20633
20634 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
20635 not to early-clobber SRC registers in the process.
20636
20637 We assume that the operands described by SRC and DEST represent a
20638 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
20639 number of components into which the copy has been decomposed. */
20640 void
20641 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
20642 {
20643 unsigned int i;
20644
20645 if (!reg_overlap_mentioned_p (operands[0], operands[1])
20646 || REGNO (operands[0]) < REGNO (operands[1]))
20647 {
20648 for (i = 0; i < count; i++)
20649 {
20650 operands[2 * i] = dest[i];
20651 operands[2 * i + 1] = src[i];
20652 }
20653 }
20654 else
20655 {
20656 for (i = 0; i < count; i++)
20657 {
20658 operands[2 * i] = dest[count - i - 1];
20659 operands[2 * i + 1] = src[count - i - 1];
20660 }
20661 }
20662 }
20663
20664 /* Split operands into moves from op[1] + op[2] into op[0]. */
20665
20666 void
20667 neon_split_vcombine (rtx operands[3])
20668 {
20669 unsigned int dest = REGNO (operands[0]);
20670 unsigned int src1 = REGNO (operands[1]);
20671 unsigned int src2 = REGNO (operands[2]);
20672 enum machine_mode halfmode = GET_MODE (operands[1]);
20673 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
20674 rtx destlo, desthi;
20675
20676 if (src1 == dest && src2 == dest + halfregs)
20677 {
20678 /* No-op move. Can't split to nothing; emit something. */
20679 emit_note (NOTE_INSN_DELETED);
20680 return;
20681 }
20682
20683 /* Preserve register attributes for variable tracking. */
20684 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
20685 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
20686 GET_MODE_SIZE (halfmode));
20687
20688 /* Special case of reversed high/low parts. Use VSWP. */
20689 if (src2 == dest && src1 == dest + halfregs)
20690 {
20691 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
20692 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
20693 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
20694 return;
20695 }
20696
20697 if (!reg_overlap_mentioned_p (operands[2], destlo))
20698 {
20699 /* Try to avoid unnecessary moves if part of the result
20700 is in the right place already. */
20701 if (src1 != dest)
20702 emit_move_insn (destlo, operands[1]);
20703 if (src2 != dest + halfregs)
20704 emit_move_insn (desthi, operands[2]);
20705 }
20706 else
20707 {
20708 if (src2 != dest + halfregs)
20709 emit_move_insn (desthi, operands[2]);
20710 if (src1 != dest)
20711 emit_move_insn (destlo, operands[1]);
20712 }
20713 }
20714
20715 /* Expand an expression EXP that calls a built-in function,
20716 with result going to TARGET if that's convenient
20717 (and in mode MODE if that's convenient).
20718 SUBTARGET may be used as the target for computing one of EXP's operands.
20719 IGNORE is nonzero if the value is to be ignored. */
20720
20721 static rtx
20722 arm_expand_builtin (tree exp,
20723 rtx target,
20724 rtx subtarget ATTRIBUTE_UNUSED,
20725 enum machine_mode mode ATTRIBUTE_UNUSED,
20726 int ignore ATTRIBUTE_UNUSED)
20727 {
20728 const struct builtin_description * d;
20729 enum insn_code icode;
20730 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20731 tree arg0;
20732 tree arg1;
20733 tree arg2;
20734 rtx op0;
20735 rtx op1;
20736 rtx op2;
20737 rtx pat;
20738 int fcode = DECL_FUNCTION_CODE (fndecl);
20739 size_t i;
20740 enum machine_mode tmode;
20741 enum machine_mode mode0;
20742 enum machine_mode mode1;
20743 enum machine_mode mode2;
20744 int opint;
20745 int selector;
20746 int mask;
20747 int imm;
20748
20749 if (fcode >= ARM_BUILTIN_NEON_BASE)
20750 return arm_expand_neon_builtin (fcode, exp, target);
20751
20752 switch (fcode)
20753 {
20754 case ARM_BUILTIN_TEXTRMSB:
20755 case ARM_BUILTIN_TEXTRMUB:
20756 case ARM_BUILTIN_TEXTRMSH:
20757 case ARM_BUILTIN_TEXTRMUH:
20758 case ARM_BUILTIN_TEXTRMSW:
20759 case ARM_BUILTIN_TEXTRMUW:
20760 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
20761 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
20762 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
20763 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
20764 : CODE_FOR_iwmmxt_textrmw);
20765
20766 arg0 = CALL_EXPR_ARG (exp, 0);
20767 arg1 = CALL_EXPR_ARG (exp, 1);
20768 op0 = expand_normal (arg0);
20769 op1 = expand_normal (arg1);
20770 tmode = insn_data[icode].operand[0].mode;
20771 mode0 = insn_data[icode].operand[1].mode;
20772 mode1 = insn_data[icode].operand[2].mode;
20773
20774 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20775 op0 = copy_to_mode_reg (mode0, op0);
20776 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20777 {
20778 /* @@@ better error message */
20779 error ("selector must be an immediate");
20780 return gen_reg_rtx (tmode);
20781 }
20782
20783 opint = INTVAL (op1);
20784 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
20785 {
20786 if (opint > 7 || opint < 0)
20787 error ("the range of selector should be in 0 to 7");
20788 }
20789 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
20790 {
20791 if (opint > 3 || opint < 0)
20792 error ("the range of selector should be in 0 to 3");
20793 }
20794 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
20795 {
20796 if (opint > 1 || opint < 0)
20797 error ("the range of selector should be in 0 to 1");
20798 }
20799
20800 if (target == 0
20801 || GET_MODE (target) != tmode
20802 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20803 target = gen_reg_rtx (tmode);
20804 pat = GEN_FCN (icode) (target, op0, op1);
20805 if (! pat)
20806 return 0;
20807 emit_insn (pat);
20808 return target;
20809
20810 case ARM_BUILTIN_WALIGNI:
20811 /* If op2 is immediate, call walighi, else call walighr. */
20812 arg0 = CALL_EXPR_ARG (exp, 0);
20813 arg1 = CALL_EXPR_ARG (exp, 1);
20814 arg2 = CALL_EXPR_ARG (exp, 2);
20815 op0 = expand_normal (arg0);
20816 op1 = expand_normal (arg1);
20817 op2 = expand_normal (arg2);
20818 if (GET_CODE (op2) == CONST_INT)
20819 {
20820 icode = CODE_FOR_iwmmxt_waligni;
20821 tmode = insn_data[icode].operand[0].mode;
20822 mode0 = insn_data[icode].operand[1].mode;
20823 mode1 = insn_data[icode].operand[2].mode;
20824 mode2 = insn_data[icode].operand[3].mode;
20825 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
20826 op0 = copy_to_mode_reg (mode0, op0);
20827 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
20828 op1 = copy_to_mode_reg (mode1, op1);
20829 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
20830 selector = INTVAL (op2);
20831 if (selector > 7 || selector < 0)
20832 error ("the range of selector should be in 0 to 7");
20833 }
20834 else
20835 {
20836 icode = CODE_FOR_iwmmxt_walignr;
20837 tmode = insn_data[icode].operand[0].mode;
20838 mode0 = insn_data[icode].operand[1].mode;
20839 mode1 = insn_data[icode].operand[2].mode;
20840 mode2 = insn_data[icode].operand[3].mode;
20841 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
20842 op0 = copy_to_mode_reg (mode0, op0);
20843 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
20844 op1 = copy_to_mode_reg (mode1, op1);
20845 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
20846 op2 = copy_to_mode_reg (mode2, op2);
20847 }
20848 if (target == 0
20849 || GET_MODE (target) != tmode
20850 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
20851 target = gen_reg_rtx (tmode);
20852 pat = GEN_FCN (icode) (target, op0, op1, op2);
20853 if (!pat)
20854 return 0;
20855 emit_insn (pat);
20856 return target;
20857
20858 case ARM_BUILTIN_TINSRB:
20859 case ARM_BUILTIN_TINSRH:
20860 case ARM_BUILTIN_TINSRW:
20861 case ARM_BUILTIN_WMERGE:
20862 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
20863 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
20864 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
20865 : CODE_FOR_iwmmxt_tinsrw);
20866 arg0 = CALL_EXPR_ARG (exp, 0);
20867 arg1 = CALL_EXPR_ARG (exp, 1);
20868 arg2 = CALL_EXPR_ARG (exp, 2);
20869 op0 = expand_normal (arg0);
20870 op1 = expand_normal (arg1);
20871 op2 = expand_normal (arg2);
20872 tmode = insn_data[icode].operand[0].mode;
20873 mode0 = insn_data[icode].operand[1].mode;
20874 mode1 = insn_data[icode].operand[2].mode;
20875 mode2 = insn_data[icode].operand[3].mode;
20876
20877 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20878 op0 = copy_to_mode_reg (mode0, op0);
20879 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20880 op1 = copy_to_mode_reg (mode1, op1);
20881 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
20882 {
20883 error ("selector must be an immediate");
20884 return const0_rtx;
20885 }
20886 if (icode == CODE_FOR_iwmmxt_wmerge)
20887 {
20888 selector = INTVAL (op2);
20889 if (selector > 7 || selector < 0)
20890 error ("the range of selector should be in 0 to 7");
20891 }
20892 if ((icode == CODE_FOR_iwmmxt_tinsrb)
20893 || (icode == CODE_FOR_iwmmxt_tinsrh)
20894 || (icode == CODE_FOR_iwmmxt_tinsrw))
20895 {
20896 mask = 0x01;
20897 selector= INTVAL (op2);
20898 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
20899 error ("the range of selector should be in 0 to 7");
20900 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
20901 error ("the range of selector should be in 0 to 3");
20902 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
20903 error ("the range of selector should be in 0 to 1");
20904 mask <<= selector;
20905 op2 = gen_rtx_CONST_INT (SImode, mask);
20906 }
20907 if (target == 0
20908 || GET_MODE (target) != tmode
20909 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20910 target = gen_reg_rtx (tmode);
20911 pat = GEN_FCN (icode) (target, op0, op1, op2);
20912 if (! pat)
20913 return 0;
20914 emit_insn (pat);
20915 return target;
20916
20917 case ARM_BUILTIN_SETWCGR0:
20918 case ARM_BUILTIN_SETWCGR1:
20919 case ARM_BUILTIN_SETWCGR2:
20920 case ARM_BUILTIN_SETWCGR3:
20921 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
20922 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
20923 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
20924 : CODE_FOR_iwmmxt_setwcgr3);
20925 arg0 = CALL_EXPR_ARG (exp, 0);
20926 op0 = expand_normal (arg0);
20927 mode0 = insn_data[icode].operand[0].mode;
20928 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
20929 op0 = copy_to_mode_reg (mode0, op0);
20930 pat = GEN_FCN (icode) (op0);
20931 if (!pat)
20932 return 0;
20933 emit_insn (pat);
20934 return 0;
20935
20936 case ARM_BUILTIN_GETWCGR0:
20937 case ARM_BUILTIN_GETWCGR1:
20938 case ARM_BUILTIN_GETWCGR2:
20939 case ARM_BUILTIN_GETWCGR3:
20940 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
20941 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
20942 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
20943 : CODE_FOR_iwmmxt_getwcgr3);
20944 tmode = insn_data[icode].operand[0].mode;
20945 if (target == 0
20946 || GET_MODE (target) != tmode
20947 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
20948 target = gen_reg_rtx (tmode);
20949 pat = GEN_FCN (icode) (target);
20950 if (!pat)
20951 return 0;
20952 emit_insn (pat);
20953 return target;
20954
20955 case ARM_BUILTIN_WSHUFH:
20956 icode = CODE_FOR_iwmmxt_wshufh;
20957 arg0 = CALL_EXPR_ARG (exp, 0);
20958 arg1 = CALL_EXPR_ARG (exp, 1);
20959 op0 = expand_normal (arg0);
20960 op1 = expand_normal (arg1);
20961 tmode = insn_data[icode].operand[0].mode;
20962 mode1 = insn_data[icode].operand[1].mode;
20963 mode2 = insn_data[icode].operand[2].mode;
20964
20965 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20966 op0 = copy_to_mode_reg (mode1, op0);
20967 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20968 {
20969 error ("mask must be an immediate");
20970 return const0_rtx;
20971 }
20972 selector = INTVAL (op1);
20973 if (selector < 0 || selector > 255)
20974 error ("the range of mask should be in 0 to 255");
20975 if (target == 0
20976 || GET_MODE (target) != tmode
20977 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20978 target = gen_reg_rtx (tmode);
20979 pat = GEN_FCN (icode) (target, op0, op1);
20980 if (! pat)
20981 return 0;
20982 emit_insn (pat);
20983 return target;
20984
20985 case ARM_BUILTIN_WMADDS:
20986 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
20987 case ARM_BUILTIN_WMADDSX:
20988 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
20989 case ARM_BUILTIN_WMADDSN:
20990 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
20991 case ARM_BUILTIN_WMADDU:
20992 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
20993 case ARM_BUILTIN_WMADDUX:
20994 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
20995 case ARM_BUILTIN_WMADDUN:
20996 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
20997 case ARM_BUILTIN_WSADBZ:
20998 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
20999 case ARM_BUILTIN_WSADHZ:
21000 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
21001
21002 /* Several three-argument builtins. */
21003 case ARM_BUILTIN_WMACS:
21004 case ARM_BUILTIN_WMACU:
21005 case ARM_BUILTIN_TMIA:
21006 case ARM_BUILTIN_TMIAPH:
21007 case ARM_BUILTIN_TMIATT:
21008 case ARM_BUILTIN_TMIATB:
21009 case ARM_BUILTIN_TMIABT:
21010 case ARM_BUILTIN_TMIABB:
21011 case ARM_BUILTIN_WQMIABB:
21012 case ARM_BUILTIN_WQMIABT:
21013 case ARM_BUILTIN_WQMIATB:
21014 case ARM_BUILTIN_WQMIATT:
21015 case ARM_BUILTIN_WQMIABBN:
21016 case ARM_BUILTIN_WQMIABTN:
21017 case ARM_BUILTIN_WQMIATBN:
21018 case ARM_BUILTIN_WQMIATTN:
21019 case ARM_BUILTIN_WMIABB:
21020 case ARM_BUILTIN_WMIABT:
21021 case ARM_BUILTIN_WMIATB:
21022 case ARM_BUILTIN_WMIATT:
21023 case ARM_BUILTIN_WMIABBN:
21024 case ARM_BUILTIN_WMIABTN:
21025 case ARM_BUILTIN_WMIATBN:
21026 case ARM_BUILTIN_WMIATTN:
21027 case ARM_BUILTIN_WMIAWBB:
21028 case ARM_BUILTIN_WMIAWBT:
21029 case ARM_BUILTIN_WMIAWTB:
21030 case ARM_BUILTIN_WMIAWTT:
21031 case ARM_BUILTIN_WMIAWBBN:
21032 case ARM_BUILTIN_WMIAWBTN:
21033 case ARM_BUILTIN_WMIAWTBN:
21034 case ARM_BUILTIN_WMIAWTTN:
21035 case ARM_BUILTIN_WSADB:
21036 case ARM_BUILTIN_WSADH:
21037 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
21038 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
21039 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
21040 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
21041 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
21042 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
21043 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
21044 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
21045 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
21046 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
21047 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
21048 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
21049 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
21050 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
21051 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
21052 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
21053 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
21054 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
21055 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
21056 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
21057 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
21058 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
21059 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
21060 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
21061 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
21062 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
21063 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
21064 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
21065 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
21066 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
21067 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
21068 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
21069 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
21070 : CODE_FOR_iwmmxt_wsadh);
21071 arg0 = CALL_EXPR_ARG (exp, 0);
21072 arg1 = CALL_EXPR_ARG (exp, 1);
21073 arg2 = CALL_EXPR_ARG (exp, 2);
21074 op0 = expand_normal (arg0);
21075 op1 = expand_normal (arg1);
21076 op2 = expand_normal (arg2);
21077 tmode = insn_data[icode].operand[0].mode;
21078 mode0 = insn_data[icode].operand[1].mode;
21079 mode1 = insn_data[icode].operand[2].mode;
21080 mode2 = insn_data[icode].operand[3].mode;
21081
21082 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21083 op0 = copy_to_mode_reg (mode0, op0);
21084 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21085 op1 = copy_to_mode_reg (mode1, op1);
21086 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
21087 op2 = copy_to_mode_reg (mode2, op2);
21088 if (target == 0
21089 || GET_MODE (target) != tmode
21090 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21091 target = gen_reg_rtx (tmode);
21092 pat = GEN_FCN (icode) (target, op0, op1, op2);
21093 if (! pat)
21094 return 0;
21095 emit_insn (pat);
21096 return target;
21097
21098 case ARM_BUILTIN_WZERO:
21099 target = gen_reg_rtx (DImode);
21100 emit_insn (gen_iwmmxt_clrdi (target));
21101 return target;
21102
21103 case ARM_BUILTIN_WSRLHI:
21104 case ARM_BUILTIN_WSRLWI:
21105 case ARM_BUILTIN_WSRLDI:
21106 case ARM_BUILTIN_WSLLHI:
21107 case ARM_BUILTIN_WSLLWI:
21108 case ARM_BUILTIN_WSLLDI:
21109 case ARM_BUILTIN_WSRAHI:
21110 case ARM_BUILTIN_WSRAWI:
21111 case ARM_BUILTIN_WSRADI:
21112 case ARM_BUILTIN_WRORHI:
21113 case ARM_BUILTIN_WRORWI:
21114 case ARM_BUILTIN_WRORDI:
21115 case ARM_BUILTIN_WSRLH:
21116 case ARM_BUILTIN_WSRLW:
21117 case ARM_BUILTIN_WSRLD:
21118 case ARM_BUILTIN_WSLLH:
21119 case ARM_BUILTIN_WSLLW:
21120 case ARM_BUILTIN_WSLLD:
21121 case ARM_BUILTIN_WSRAH:
21122 case ARM_BUILTIN_WSRAW:
21123 case ARM_BUILTIN_WSRAD:
21124 case ARM_BUILTIN_WRORH:
21125 case ARM_BUILTIN_WRORW:
21126 case ARM_BUILTIN_WRORD:
21127 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
21128 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
21129 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
21130 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
21131 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
21132 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
21133 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
21134 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
21135 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
21136 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
21137 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
21138 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
21139 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
21140 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
21141 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
21142 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
21143 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
21144 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
21145 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
21146 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
21147 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
21148 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
21149 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
21150 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
21151 : CODE_FOR_nothing);
21152 arg1 = CALL_EXPR_ARG (exp, 1);
21153 op1 = expand_normal (arg1);
21154 if (GET_MODE (op1) == VOIDmode)
21155 {
21156 imm = INTVAL (op1);
21157 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
21158 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
21159 && (imm < 0 || imm > 32))
21160 {
21161 if (fcode == ARM_BUILTIN_WRORHI)
21162 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
21163 else if (fcode == ARM_BUILTIN_WRORWI)
21164 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
21165 else if (fcode == ARM_BUILTIN_WRORH)
21166 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
21167 else
21168 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
21169 }
21170 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
21171 && (imm < 0 || imm > 64))
21172 {
21173 if (fcode == ARM_BUILTIN_WRORDI)
21174 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
21175 else
21176 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
21177 }
21178 else if (imm < 0)
21179 {
21180 if (fcode == ARM_BUILTIN_WSRLHI)
21181 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
21182 else if (fcode == ARM_BUILTIN_WSRLWI)
21183 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
21184 else if (fcode == ARM_BUILTIN_WSRLDI)
21185 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
21186 else if (fcode == ARM_BUILTIN_WSLLHI)
21187 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
21188 else if (fcode == ARM_BUILTIN_WSLLWI)
21189 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
21190 else if (fcode == ARM_BUILTIN_WSLLDI)
21191 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
21192 else if (fcode == ARM_BUILTIN_WSRAHI)
21193 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
21194 else if (fcode == ARM_BUILTIN_WSRAWI)
21195 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
21196 else if (fcode == ARM_BUILTIN_WSRADI)
21197 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
21198 else if (fcode == ARM_BUILTIN_WSRLH)
21199 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
21200 else if (fcode == ARM_BUILTIN_WSRLW)
21201 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
21202 else if (fcode == ARM_BUILTIN_WSRLD)
21203 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
21204 else if (fcode == ARM_BUILTIN_WSLLH)
21205 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
21206 else if (fcode == ARM_BUILTIN_WSLLW)
21207 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
21208 else if (fcode == ARM_BUILTIN_WSLLD)
21209 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
21210 else if (fcode == ARM_BUILTIN_WSRAH)
21211 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
21212 else if (fcode == ARM_BUILTIN_WSRAW)
21213 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
21214 else
21215 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
21216 }
21217 }
21218 return arm_expand_binop_builtin (icode, exp, target);
21219
21220 case ARM_BUILTIN_THREAD_POINTER:
21221 return arm_load_tp (target);
21222
21223 default:
21224 break;
21225 }
21226
21227 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21228 if (d->code == (const enum arm_builtins) fcode)
21229 return arm_expand_binop_builtin (d->icode, exp, target);
21230
21231 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
21232 if (d->code == (const enum arm_builtins) fcode)
21233 return arm_expand_unop_builtin (d->icode, exp, target, 0);
21234
21235 /* @@@ Should really do something sensible here. */
21236 return NULL_RTX;
21237 }
21238 \f
21239 /* Return the number (counting from 0) of
21240 the least significant set bit in MASK. */
21241
21242 inline static int
21243 number_of_first_bit_set (unsigned mask)
21244 {
21245 return ctz_hwi (mask);
21246 }
21247
21248 /* Like emit_multi_reg_push, but allowing for a different set of
21249 registers to be described as saved. MASK is the set of registers
21250 to be saved; REAL_REGS is the set of registers to be described as
21251 saved. If REAL_REGS is 0, only describe the stack adjustment. */
21252
21253 static rtx
21254 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
21255 {
21256 unsigned long regno;
21257 rtx par[10], tmp, reg, insn;
21258 int i, j;
21259
21260 /* Build the parallel of the registers actually being stored. */
21261 for (i = 0; mask; ++i, mask &= mask - 1)
21262 {
21263 regno = ctz_hwi (mask);
21264 reg = gen_rtx_REG (SImode, regno);
21265
21266 if (i == 0)
21267 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
21268 else
21269 tmp = gen_rtx_USE (VOIDmode, reg);
21270
21271 par[i] = tmp;
21272 }
21273
21274 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
21275 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21276 tmp = gen_frame_mem (BLKmode, tmp);
21277 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
21278 par[0] = tmp;
21279
21280 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
21281 insn = emit_insn (tmp);
21282
21283 /* Always build the stack adjustment note for unwind info. */
21284 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
21285 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
21286 par[0] = tmp;
21287
21288 /* Build the parallel of the registers recorded as saved for unwind. */
21289 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
21290 {
21291 regno = ctz_hwi (real_regs);
21292 reg = gen_rtx_REG (SImode, regno);
21293
21294 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
21295 tmp = gen_frame_mem (SImode, tmp);
21296 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
21297 RTX_FRAME_RELATED_P (tmp) = 1;
21298 par[j + 1] = tmp;
21299 }
21300
21301 if (j == 0)
21302 tmp = par[0];
21303 else
21304 {
21305 RTX_FRAME_RELATED_P (par[0]) = 1;
21306 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
21307 }
21308
21309 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
21310
21311 return insn;
21312 }
21313
21314 /* Emit code to push or pop registers to or from the stack. F is the
21315 assembly file. MASK is the registers to pop. */
21316 static void
21317 thumb_pop (FILE *f, unsigned long mask)
21318 {
21319 int regno;
21320 int lo_mask = mask & 0xFF;
21321 int pushed_words = 0;
21322
21323 gcc_assert (mask);
21324
21325 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
21326 {
21327 /* Special case. Do not generate a POP PC statement here, do it in
21328 thumb_exit() */
21329 thumb_exit (f, -1);
21330 return;
21331 }
21332
21333 fprintf (f, "\tpop\t{");
21334
21335 /* Look at the low registers first. */
21336 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
21337 {
21338 if (lo_mask & 1)
21339 {
21340 asm_fprintf (f, "%r", regno);
21341
21342 if ((lo_mask & ~1) != 0)
21343 fprintf (f, ", ");
21344
21345 pushed_words++;
21346 }
21347 }
21348
21349 if (mask & (1 << PC_REGNUM))
21350 {
21351 /* Catch popping the PC. */
21352 if (TARGET_INTERWORK || TARGET_BACKTRACE
21353 || crtl->calls_eh_return)
21354 {
21355 /* The PC is never poped directly, instead
21356 it is popped into r3 and then BX is used. */
21357 fprintf (f, "}\n");
21358
21359 thumb_exit (f, -1);
21360
21361 return;
21362 }
21363 else
21364 {
21365 if (mask & 0xFF)
21366 fprintf (f, ", ");
21367
21368 asm_fprintf (f, "%r", PC_REGNUM);
21369 }
21370 }
21371
21372 fprintf (f, "}\n");
21373 }
21374
21375 /* Generate code to return from a thumb function.
21376 If 'reg_containing_return_addr' is -1, then the return address is
21377 actually on the stack, at the stack pointer. */
21378 static void
21379 thumb_exit (FILE *f, int reg_containing_return_addr)
21380 {
21381 unsigned regs_available_for_popping;
21382 unsigned regs_to_pop;
21383 int pops_needed;
21384 unsigned available;
21385 unsigned required;
21386 int mode;
21387 int size;
21388 int restore_a4 = FALSE;
21389
21390 /* Compute the registers we need to pop. */
21391 regs_to_pop = 0;
21392 pops_needed = 0;
21393
21394 if (reg_containing_return_addr == -1)
21395 {
21396 regs_to_pop |= 1 << LR_REGNUM;
21397 ++pops_needed;
21398 }
21399
21400 if (TARGET_BACKTRACE)
21401 {
21402 /* Restore the (ARM) frame pointer and stack pointer. */
21403 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
21404 pops_needed += 2;
21405 }
21406
21407 /* If there is nothing to pop then just emit the BX instruction and
21408 return. */
21409 if (pops_needed == 0)
21410 {
21411 if (crtl->calls_eh_return)
21412 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21413
21414 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21415 return;
21416 }
21417 /* Otherwise if we are not supporting interworking and we have not created
21418 a backtrace structure and the function was not entered in ARM mode then
21419 just pop the return address straight into the PC. */
21420 else if (!TARGET_INTERWORK
21421 && !TARGET_BACKTRACE
21422 && !is_called_in_ARM_mode (current_function_decl)
21423 && !crtl->calls_eh_return)
21424 {
21425 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
21426 return;
21427 }
21428
21429 /* Find out how many of the (return) argument registers we can corrupt. */
21430 regs_available_for_popping = 0;
21431
21432 /* If returning via __builtin_eh_return, the bottom three registers
21433 all contain information needed for the return. */
21434 if (crtl->calls_eh_return)
21435 size = 12;
21436 else
21437 {
21438 /* If we can deduce the registers used from the function's
21439 return value. This is more reliable that examining
21440 df_regs_ever_live_p () because that will be set if the register is
21441 ever used in the function, not just if the register is used
21442 to hold a return value. */
21443
21444 if (crtl->return_rtx != 0)
21445 mode = GET_MODE (crtl->return_rtx);
21446 else
21447 mode = DECL_MODE (DECL_RESULT (current_function_decl));
21448
21449 size = GET_MODE_SIZE (mode);
21450
21451 if (size == 0)
21452 {
21453 /* In a void function we can use any argument register.
21454 In a function that returns a structure on the stack
21455 we can use the second and third argument registers. */
21456 if (mode == VOIDmode)
21457 regs_available_for_popping =
21458 (1 << ARG_REGISTER (1))
21459 | (1 << ARG_REGISTER (2))
21460 | (1 << ARG_REGISTER (3));
21461 else
21462 regs_available_for_popping =
21463 (1 << ARG_REGISTER (2))
21464 | (1 << ARG_REGISTER (3));
21465 }
21466 else if (size <= 4)
21467 regs_available_for_popping =
21468 (1 << ARG_REGISTER (2))
21469 | (1 << ARG_REGISTER (3));
21470 else if (size <= 8)
21471 regs_available_for_popping =
21472 (1 << ARG_REGISTER (3));
21473 }
21474
21475 /* Match registers to be popped with registers into which we pop them. */
21476 for (available = regs_available_for_popping,
21477 required = regs_to_pop;
21478 required != 0 && available != 0;
21479 available &= ~(available & - available),
21480 required &= ~(required & - required))
21481 -- pops_needed;
21482
21483 /* If we have any popping registers left over, remove them. */
21484 if (available > 0)
21485 regs_available_for_popping &= ~available;
21486
21487 /* Otherwise if we need another popping register we can use
21488 the fourth argument register. */
21489 else if (pops_needed)
21490 {
21491 /* If we have not found any free argument registers and
21492 reg a4 contains the return address, we must move it. */
21493 if (regs_available_for_popping == 0
21494 && reg_containing_return_addr == LAST_ARG_REGNUM)
21495 {
21496 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21497 reg_containing_return_addr = LR_REGNUM;
21498 }
21499 else if (size > 12)
21500 {
21501 /* Register a4 is being used to hold part of the return value,
21502 but we have dire need of a free, low register. */
21503 restore_a4 = TRUE;
21504
21505 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
21506 }
21507
21508 if (reg_containing_return_addr != LAST_ARG_REGNUM)
21509 {
21510 /* The fourth argument register is available. */
21511 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
21512
21513 --pops_needed;
21514 }
21515 }
21516
21517 /* Pop as many registers as we can. */
21518 thumb_pop (f, regs_available_for_popping);
21519
21520 /* Process the registers we popped. */
21521 if (reg_containing_return_addr == -1)
21522 {
21523 /* The return address was popped into the lowest numbered register. */
21524 regs_to_pop &= ~(1 << LR_REGNUM);
21525
21526 reg_containing_return_addr =
21527 number_of_first_bit_set (regs_available_for_popping);
21528
21529 /* Remove this register for the mask of available registers, so that
21530 the return address will not be corrupted by further pops. */
21531 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
21532 }
21533
21534 /* If we popped other registers then handle them here. */
21535 if (regs_available_for_popping)
21536 {
21537 int frame_pointer;
21538
21539 /* Work out which register currently contains the frame pointer. */
21540 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
21541
21542 /* Move it into the correct place. */
21543 asm_fprintf (f, "\tmov\t%r, %r\n",
21544 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
21545
21546 /* (Temporarily) remove it from the mask of popped registers. */
21547 regs_available_for_popping &= ~(1 << frame_pointer);
21548 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
21549
21550 if (regs_available_for_popping)
21551 {
21552 int stack_pointer;
21553
21554 /* We popped the stack pointer as well,
21555 find the register that contains it. */
21556 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
21557
21558 /* Move it into the stack register. */
21559 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
21560
21561 /* At this point we have popped all necessary registers, so
21562 do not worry about restoring regs_available_for_popping
21563 to its correct value:
21564
21565 assert (pops_needed == 0)
21566 assert (regs_available_for_popping == (1 << frame_pointer))
21567 assert (regs_to_pop == (1 << STACK_POINTER)) */
21568 }
21569 else
21570 {
21571 /* Since we have just move the popped value into the frame
21572 pointer, the popping register is available for reuse, and
21573 we know that we still have the stack pointer left to pop. */
21574 regs_available_for_popping |= (1 << frame_pointer);
21575 }
21576 }
21577
21578 /* If we still have registers left on the stack, but we no longer have
21579 any registers into which we can pop them, then we must move the return
21580 address into the link register and make available the register that
21581 contained it. */
21582 if (regs_available_for_popping == 0 && pops_needed > 0)
21583 {
21584 regs_available_for_popping |= 1 << reg_containing_return_addr;
21585
21586 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
21587 reg_containing_return_addr);
21588
21589 reg_containing_return_addr = LR_REGNUM;
21590 }
21591
21592 /* If we have registers left on the stack then pop some more.
21593 We know that at most we will want to pop FP and SP. */
21594 if (pops_needed > 0)
21595 {
21596 int popped_into;
21597 int move_to;
21598
21599 thumb_pop (f, regs_available_for_popping);
21600
21601 /* We have popped either FP or SP.
21602 Move whichever one it is into the correct register. */
21603 popped_into = number_of_first_bit_set (regs_available_for_popping);
21604 move_to = number_of_first_bit_set (regs_to_pop);
21605
21606 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
21607
21608 regs_to_pop &= ~(1 << move_to);
21609
21610 --pops_needed;
21611 }
21612
21613 /* If we still have not popped everything then we must have only
21614 had one register available to us and we are now popping the SP. */
21615 if (pops_needed > 0)
21616 {
21617 int popped_into;
21618
21619 thumb_pop (f, regs_available_for_popping);
21620
21621 popped_into = number_of_first_bit_set (regs_available_for_popping);
21622
21623 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
21624 /*
21625 assert (regs_to_pop == (1 << STACK_POINTER))
21626 assert (pops_needed == 1)
21627 */
21628 }
21629
21630 /* If necessary restore the a4 register. */
21631 if (restore_a4)
21632 {
21633 if (reg_containing_return_addr != LR_REGNUM)
21634 {
21635 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21636 reg_containing_return_addr = LR_REGNUM;
21637 }
21638
21639 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
21640 }
21641
21642 if (crtl->calls_eh_return)
21643 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21644
21645 /* Return to caller. */
21646 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21647 }
21648 \f
21649 /* Scan INSN just before assembler is output for it.
21650 For Thumb-1, we track the status of the condition codes; this
21651 information is used in the cbranchsi4_insn pattern. */
21652 void
21653 thumb1_final_prescan_insn (rtx insn)
21654 {
21655 if (flag_print_asm_name)
21656 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
21657 INSN_ADDRESSES (INSN_UID (insn)));
21658 /* Don't overwrite the previous setter when we get to a cbranch. */
21659 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
21660 {
21661 enum attr_conds conds;
21662
21663 if (cfun->machine->thumb1_cc_insn)
21664 {
21665 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
21666 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
21667 CC_STATUS_INIT;
21668 }
21669 conds = get_attr_conds (insn);
21670 if (conds == CONDS_SET)
21671 {
21672 rtx set = single_set (insn);
21673 cfun->machine->thumb1_cc_insn = insn;
21674 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
21675 cfun->machine->thumb1_cc_op1 = const0_rtx;
21676 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
21677 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
21678 {
21679 rtx src1 = XEXP (SET_SRC (set), 1);
21680 if (src1 == const0_rtx)
21681 cfun->machine->thumb1_cc_mode = CCmode;
21682 }
21683 }
21684 else if (conds != CONDS_NOCOND)
21685 cfun->machine->thumb1_cc_insn = NULL_RTX;
21686 }
21687 }
21688
21689 int
21690 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
21691 {
21692 unsigned HOST_WIDE_INT mask = 0xff;
21693 int i;
21694
21695 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
21696 if (val == 0) /* XXX */
21697 return 0;
21698
21699 for (i = 0; i < 25; i++)
21700 if ((val & (mask << i)) == val)
21701 return 1;
21702
21703 return 0;
21704 }
21705
21706 /* Returns nonzero if the current function contains,
21707 or might contain a far jump. */
21708 static int
21709 thumb_far_jump_used_p (void)
21710 {
21711 rtx insn;
21712
21713 /* This test is only important for leaf functions. */
21714 /* assert (!leaf_function_p ()); */
21715
21716 /* If we have already decided that far jumps may be used,
21717 do not bother checking again, and always return true even if
21718 it turns out that they are not being used. Once we have made
21719 the decision that far jumps are present (and that hence the link
21720 register will be pushed onto the stack) we cannot go back on it. */
21721 if (cfun->machine->far_jump_used)
21722 return 1;
21723
21724 /* If this function is not being called from the prologue/epilogue
21725 generation code then it must be being called from the
21726 INITIAL_ELIMINATION_OFFSET macro. */
21727 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
21728 {
21729 /* In this case we know that we are being asked about the elimination
21730 of the arg pointer register. If that register is not being used,
21731 then there are no arguments on the stack, and we do not have to
21732 worry that a far jump might force the prologue to push the link
21733 register, changing the stack offsets. In this case we can just
21734 return false, since the presence of far jumps in the function will
21735 not affect stack offsets.
21736
21737 If the arg pointer is live (or if it was live, but has now been
21738 eliminated and so set to dead) then we do have to test to see if
21739 the function might contain a far jump. This test can lead to some
21740 false negatives, since before reload is completed, then length of
21741 branch instructions is not known, so gcc defaults to returning their
21742 longest length, which in turn sets the far jump attribute to true.
21743
21744 A false negative will not result in bad code being generated, but it
21745 will result in a needless push and pop of the link register. We
21746 hope that this does not occur too often.
21747
21748 If we need doubleword stack alignment this could affect the other
21749 elimination offsets so we can't risk getting it wrong. */
21750 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
21751 cfun->machine->arg_pointer_live = 1;
21752 else if (!cfun->machine->arg_pointer_live)
21753 return 0;
21754 }
21755
21756 /* Check to see if the function contains a branch
21757 insn with the far jump attribute set. */
21758 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21759 {
21760 if (GET_CODE (insn) == JUMP_INSN
21761 /* Ignore tablejump patterns. */
21762 && GET_CODE (PATTERN (insn)) != ADDR_VEC
21763 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
21764 && get_attr_far_jump (insn) == FAR_JUMP_YES
21765 )
21766 {
21767 /* Record the fact that we have decided that
21768 the function does use far jumps. */
21769 cfun->machine->far_jump_used = 1;
21770 return 1;
21771 }
21772 }
21773
21774 return 0;
21775 }
21776
21777 /* Return nonzero if FUNC must be entered in ARM mode. */
21778 int
21779 is_called_in_ARM_mode (tree func)
21780 {
21781 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
21782
21783 /* Ignore the problem about functions whose address is taken. */
21784 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
21785 return TRUE;
21786
21787 #ifdef ARM_PE
21788 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
21789 #else
21790 return FALSE;
21791 #endif
21792 }
21793
21794 /* Given the stack offsets and register mask in OFFSETS, decide how
21795 many additional registers to push instead of subtracting a constant
21796 from SP. For epilogues the principle is the same except we use pop.
21797 FOR_PROLOGUE indicates which we're generating. */
21798 static int
21799 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
21800 {
21801 HOST_WIDE_INT amount;
21802 unsigned long live_regs_mask = offsets->saved_regs_mask;
21803 /* Extract a mask of the ones we can give to the Thumb's push/pop
21804 instruction. */
21805 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
21806 /* Then count how many other high registers will need to be pushed. */
21807 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21808 int n_free, reg_base;
21809
21810 if (!for_prologue && frame_pointer_needed)
21811 amount = offsets->locals_base - offsets->saved_regs;
21812 else
21813 amount = offsets->outgoing_args - offsets->saved_regs;
21814
21815 /* If the stack frame size is 512 exactly, we can save one load
21816 instruction, which should make this a win even when optimizing
21817 for speed. */
21818 if (!optimize_size && amount != 512)
21819 return 0;
21820
21821 /* Can't do this if there are high registers to push. */
21822 if (high_regs_pushed != 0)
21823 return 0;
21824
21825 /* Shouldn't do it in the prologue if no registers would normally
21826 be pushed at all. In the epilogue, also allow it if we'll have
21827 a pop insn for the PC. */
21828 if (l_mask == 0
21829 && (for_prologue
21830 || TARGET_BACKTRACE
21831 || (live_regs_mask & 1 << LR_REGNUM) == 0
21832 || TARGET_INTERWORK
21833 || crtl->args.pretend_args_size != 0))
21834 return 0;
21835
21836 /* Don't do this if thumb_expand_prologue wants to emit instructions
21837 between the push and the stack frame allocation. */
21838 if (for_prologue
21839 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
21840 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
21841 return 0;
21842
21843 reg_base = 0;
21844 n_free = 0;
21845 if (!for_prologue)
21846 {
21847 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
21848 live_regs_mask >>= reg_base;
21849 }
21850
21851 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
21852 && (for_prologue || call_used_regs[reg_base + n_free]))
21853 {
21854 live_regs_mask >>= 1;
21855 n_free++;
21856 }
21857
21858 if (n_free == 0)
21859 return 0;
21860 gcc_assert (amount / 4 * 4 == amount);
21861
21862 if (amount >= 512 && (amount - n_free * 4) < 512)
21863 return (amount - 508) / 4;
21864 if (amount <= n_free * 4)
21865 return amount / 4;
21866 return 0;
21867 }
21868
21869 /* The bits which aren't usefully expanded as rtl. */
21870 const char *
21871 thumb1_unexpanded_epilogue (void)
21872 {
21873 arm_stack_offsets *offsets;
21874 int regno;
21875 unsigned long live_regs_mask = 0;
21876 int high_regs_pushed = 0;
21877 int extra_pop;
21878 int had_to_push_lr;
21879 int size;
21880
21881 if (cfun->machine->return_used_this_function != 0)
21882 return "";
21883
21884 if (IS_NAKED (arm_current_func_type ()))
21885 return "";
21886
21887 offsets = arm_get_frame_offsets ();
21888 live_regs_mask = offsets->saved_regs_mask;
21889 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21890
21891 /* If we can deduce the registers used from the function's return value.
21892 This is more reliable that examining df_regs_ever_live_p () because that
21893 will be set if the register is ever used in the function, not just if
21894 the register is used to hold a return value. */
21895 size = arm_size_return_regs ();
21896
21897 extra_pop = thumb1_extra_regs_pushed (offsets, false);
21898 if (extra_pop > 0)
21899 {
21900 unsigned long extra_mask = (1 << extra_pop) - 1;
21901 live_regs_mask |= extra_mask << ((size + UNITS_PER_WORD - 1)
21902 / UNITS_PER_WORD);
21903 }
21904
21905 /* The prolog may have pushed some high registers to use as
21906 work registers. e.g. the testsuite file:
21907 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
21908 compiles to produce:
21909 push {r4, r5, r6, r7, lr}
21910 mov r7, r9
21911 mov r6, r8
21912 push {r6, r7}
21913 as part of the prolog. We have to undo that pushing here. */
21914
21915 if (high_regs_pushed)
21916 {
21917 unsigned long mask = live_regs_mask & 0xff;
21918 int next_hi_reg;
21919
21920 /* The available low registers depend on the size of the value we are
21921 returning. */
21922 if (size <= 12)
21923 mask |= 1 << 3;
21924 if (size <= 8)
21925 mask |= 1 << 2;
21926
21927 if (mask == 0)
21928 /* Oh dear! We have no low registers into which we can pop
21929 high registers! */
21930 internal_error
21931 ("no low registers available for popping high registers");
21932
21933 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
21934 if (live_regs_mask & (1 << next_hi_reg))
21935 break;
21936
21937 while (high_regs_pushed)
21938 {
21939 /* Find lo register(s) into which the high register(s) can
21940 be popped. */
21941 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
21942 {
21943 if (mask & (1 << regno))
21944 high_regs_pushed--;
21945 if (high_regs_pushed == 0)
21946 break;
21947 }
21948
21949 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
21950
21951 /* Pop the values into the low register(s). */
21952 thumb_pop (asm_out_file, mask);
21953
21954 /* Move the value(s) into the high registers. */
21955 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
21956 {
21957 if (mask & (1 << regno))
21958 {
21959 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
21960 regno);
21961
21962 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
21963 if (live_regs_mask & (1 << next_hi_reg))
21964 break;
21965 }
21966 }
21967 }
21968 live_regs_mask &= ~0x0f00;
21969 }
21970
21971 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
21972 live_regs_mask &= 0xff;
21973
21974 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
21975 {
21976 /* Pop the return address into the PC. */
21977 if (had_to_push_lr)
21978 live_regs_mask |= 1 << PC_REGNUM;
21979
21980 /* Either no argument registers were pushed or a backtrace
21981 structure was created which includes an adjusted stack
21982 pointer, so just pop everything. */
21983 if (live_regs_mask)
21984 thumb_pop (asm_out_file, live_regs_mask);
21985
21986 /* We have either just popped the return address into the
21987 PC or it is was kept in LR for the entire function.
21988 Note that thumb_pop has already called thumb_exit if the
21989 PC was in the list. */
21990 if (!had_to_push_lr)
21991 thumb_exit (asm_out_file, LR_REGNUM);
21992 }
21993 else
21994 {
21995 /* Pop everything but the return address. */
21996 if (live_regs_mask)
21997 thumb_pop (asm_out_file, live_regs_mask);
21998
21999 if (had_to_push_lr)
22000 {
22001 if (size > 12)
22002 {
22003 /* We have no free low regs, so save one. */
22004 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
22005 LAST_ARG_REGNUM);
22006 }
22007
22008 /* Get the return address into a temporary register. */
22009 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
22010
22011 if (size > 12)
22012 {
22013 /* Move the return address to lr. */
22014 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
22015 LAST_ARG_REGNUM);
22016 /* Restore the low register. */
22017 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
22018 IP_REGNUM);
22019 regno = LR_REGNUM;
22020 }
22021 else
22022 regno = LAST_ARG_REGNUM;
22023 }
22024 else
22025 regno = LR_REGNUM;
22026
22027 /* Remove the argument registers that were pushed onto the stack. */
22028 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
22029 SP_REGNUM, SP_REGNUM,
22030 crtl->args.pretend_args_size);
22031
22032 thumb_exit (asm_out_file, regno);
22033 }
22034
22035 return "";
22036 }
22037
22038 /* Functions to save and restore machine-specific function data. */
22039 static struct machine_function *
22040 arm_init_machine_status (void)
22041 {
22042 struct machine_function *machine;
22043 machine = ggc_alloc_cleared_machine_function ();
22044
22045 #if ARM_FT_UNKNOWN != 0
22046 machine->func_type = ARM_FT_UNKNOWN;
22047 #endif
22048 return machine;
22049 }
22050
22051 /* Return an RTX indicating where the return address to the
22052 calling function can be found. */
22053 rtx
22054 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
22055 {
22056 if (count != 0)
22057 return NULL_RTX;
22058
22059 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
22060 }
22061
22062 /* Do anything needed before RTL is emitted for each function. */
22063 void
22064 arm_init_expanders (void)
22065 {
22066 /* Arrange to initialize and mark the machine per-function status. */
22067 init_machine_status = arm_init_machine_status;
22068
22069 /* This is to stop the combine pass optimizing away the alignment
22070 adjustment of va_arg. */
22071 /* ??? It is claimed that this should not be necessary. */
22072 if (cfun)
22073 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
22074 }
22075
22076
22077 /* Like arm_compute_initial_elimination offset. Simpler because there
22078 isn't an ABI specified frame pointer for Thumb. Instead, we set it
22079 to point at the base of the local variables after static stack
22080 space for a function has been allocated. */
22081
22082 HOST_WIDE_INT
22083 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
22084 {
22085 arm_stack_offsets *offsets;
22086
22087 offsets = arm_get_frame_offsets ();
22088
22089 switch (from)
22090 {
22091 case ARG_POINTER_REGNUM:
22092 switch (to)
22093 {
22094 case STACK_POINTER_REGNUM:
22095 return offsets->outgoing_args - offsets->saved_args;
22096
22097 case FRAME_POINTER_REGNUM:
22098 return offsets->soft_frame - offsets->saved_args;
22099
22100 case ARM_HARD_FRAME_POINTER_REGNUM:
22101 return offsets->saved_regs - offsets->saved_args;
22102
22103 case THUMB_HARD_FRAME_POINTER_REGNUM:
22104 return offsets->locals_base - offsets->saved_args;
22105
22106 default:
22107 gcc_unreachable ();
22108 }
22109 break;
22110
22111 case FRAME_POINTER_REGNUM:
22112 switch (to)
22113 {
22114 case STACK_POINTER_REGNUM:
22115 return offsets->outgoing_args - offsets->soft_frame;
22116
22117 case ARM_HARD_FRAME_POINTER_REGNUM:
22118 return offsets->saved_regs - offsets->soft_frame;
22119
22120 case THUMB_HARD_FRAME_POINTER_REGNUM:
22121 return offsets->locals_base - offsets->soft_frame;
22122
22123 default:
22124 gcc_unreachable ();
22125 }
22126 break;
22127
22128 default:
22129 gcc_unreachable ();
22130 }
22131 }
22132
22133 /* Generate the function's prologue. */
22134
22135 void
22136 thumb1_expand_prologue (void)
22137 {
22138 rtx insn;
22139
22140 HOST_WIDE_INT amount;
22141 arm_stack_offsets *offsets;
22142 unsigned long func_type;
22143 int regno;
22144 unsigned long live_regs_mask;
22145 unsigned long l_mask;
22146 unsigned high_regs_pushed = 0;
22147
22148 func_type = arm_current_func_type ();
22149
22150 /* Naked functions don't have prologues. */
22151 if (IS_NAKED (func_type))
22152 return;
22153
22154 if (IS_INTERRUPT (func_type))
22155 {
22156 error ("interrupt Service Routines cannot be coded in Thumb mode");
22157 return;
22158 }
22159
22160 if (is_called_in_ARM_mode (current_function_decl))
22161 emit_insn (gen_prologue_thumb1_interwork ());
22162
22163 offsets = arm_get_frame_offsets ();
22164 live_regs_mask = offsets->saved_regs_mask;
22165
22166 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
22167 l_mask = live_regs_mask & 0x40ff;
22168 /* Then count how many other high registers will need to be pushed. */
22169 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
22170
22171 if (crtl->args.pretend_args_size)
22172 {
22173 rtx x = GEN_INT (-crtl->args.pretend_args_size);
22174
22175 if (cfun->machine->uses_anonymous_args)
22176 {
22177 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
22178 unsigned long mask;
22179
22180 mask = 1ul << (LAST_ARG_REGNUM + 1);
22181 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
22182
22183 insn = thumb1_emit_multi_reg_push (mask, 0);
22184 }
22185 else
22186 {
22187 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22188 stack_pointer_rtx, x));
22189 }
22190 RTX_FRAME_RELATED_P (insn) = 1;
22191 }
22192
22193 if (TARGET_BACKTRACE)
22194 {
22195 HOST_WIDE_INT offset = 0;
22196 unsigned work_register;
22197 rtx work_reg, x, arm_hfp_rtx;
22198
22199 /* We have been asked to create a stack backtrace structure.
22200 The code looks like this:
22201
22202 0 .align 2
22203 0 func:
22204 0 sub SP, #16 Reserve space for 4 registers.
22205 2 push {R7} Push low registers.
22206 4 add R7, SP, #20 Get the stack pointer before the push.
22207 6 str R7, [SP, #8] Store the stack pointer
22208 (before reserving the space).
22209 8 mov R7, PC Get hold of the start of this code + 12.
22210 10 str R7, [SP, #16] Store it.
22211 12 mov R7, FP Get hold of the current frame pointer.
22212 14 str R7, [SP, #4] Store it.
22213 16 mov R7, LR Get hold of the current return address.
22214 18 str R7, [SP, #12] Store it.
22215 20 add R7, SP, #16 Point at the start of the
22216 backtrace structure.
22217 22 mov FP, R7 Put this value into the frame pointer. */
22218
22219 work_register = thumb_find_work_register (live_regs_mask);
22220 work_reg = gen_rtx_REG (SImode, work_register);
22221 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
22222
22223 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22224 stack_pointer_rtx, GEN_INT (-16)));
22225 RTX_FRAME_RELATED_P (insn) = 1;
22226
22227 if (l_mask)
22228 {
22229 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
22230 RTX_FRAME_RELATED_P (insn) = 1;
22231
22232 offset = bit_count (l_mask) * UNITS_PER_WORD;
22233 }
22234
22235 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
22236 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22237
22238 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
22239 x = gen_frame_mem (SImode, x);
22240 emit_move_insn (x, work_reg);
22241
22242 /* Make sure that the instruction fetching the PC is in the right place
22243 to calculate "start of backtrace creation code + 12". */
22244 /* ??? The stores using the common WORK_REG ought to be enough to
22245 prevent the scheduler from doing anything weird. Failing that
22246 we could always move all of the following into an UNSPEC_VOLATILE. */
22247 if (l_mask)
22248 {
22249 x = gen_rtx_REG (SImode, PC_REGNUM);
22250 emit_move_insn (work_reg, x);
22251
22252 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
22253 x = gen_frame_mem (SImode, x);
22254 emit_move_insn (x, work_reg);
22255
22256 emit_move_insn (work_reg, arm_hfp_rtx);
22257
22258 x = plus_constant (Pmode, stack_pointer_rtx, offset);
22259 x = gen_frame_mem (SImode, x);
22260 emit_move_insn (x, work_reg);
22261 }
22262 else
22263 {
22264 emit_move_insn (work_reg, arm_hfp_rtx);
22265
22266 x = plus_constant (Pmode, stack_pointer_rtx, offset);
22267 x = gen_frame_mem (SImode, x);
22268 emit_move_insn (x, work_reg);
22269
22270 x = gen_rtx_REG (SImode, PC_REGNUM);
22271 emit_move_insn (work_reg, x);
22272
22273 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
22274 x = gen_frame_mem (SImode, x);
22275 emit_move_insn (x, work_reg);
22276 }
22277
22278 x = gen_rtx_REG (SImode, LR_REGNUM);
22279 emit_move_insn (work_reg, x);
22280
22281 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
22282 x = gen_frame_mem (SImode, x);
22283 emit_move_insn (x, work_reg);
22284
22285 x = GEN_INT (offset + 12);
22286 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22287
22288 emit_move_insn (arm_hfp_rtx, work_reg);
22289 }
22290 /* Optimization: If we are not pushing any low registers but we are going
22291 to push some high registers then delay our first push. This will just
22292 be a push of LR and we can combine it with the push of the first high
22293 register. */
22294 else if ((l_mask & 0xff) != 0
22295 || (high_regs_pushed == 0 && l_mask))
22296 {
22297 unsigned long mask = l_mask;
22298 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
22299 insn = thumb1_emit_multi_reg_push (mask, mask);
22300 RTX_FRAME_RELATED_P (insn) = 1;
22301 }
22302
22303 if (high_regs_pushed)
22304 {
22305 unsigned pushable_regs;
22306 unsigned next_hi_reg;
22307
22308 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
22309 if (live_regs_mask & (1 << next_hi_reg))
22310 break;
22311
22312 pushable_regs = l_mask & 0xff;
22313
22314 if (pushable_regs == 0)
22315 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
22316
22317 while (high_regs_pushed > 0)
22318 {
22319 unsigned long real_regs_mask = 0;
22320
22321 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
22322 {
22323 if (pushable_regs & (1 << regno))
22324 {
22325 emit_move_insn (gen_rtx_REG (SImode, regno),
22326 gen_rtx_REG (SImode, next_hi_reg));
22327
22328 high_regs_pushed --;
22329 real_regs_mask |= (1 << next_hi_reg);
22330
22331 if (high_regs_pushed)
22332 {
22333 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
22334 next_hi_reg --)
22335 if (live_regs_mask & (1 << next_hi_reg))
22336 break;
22337 }
22338 else
22339 {
22340 pushable_regs &= ~((1 << regno) - 1);
22341 break;
22342 }
22343 }
22344 }
22345
22346 /* If we had to find a work register and we have not yet
22347 saved the LR then add it to the list of regs to push. */
22348 if (l_mask == (1 << LR_REGNUM))
22349 {
22350 pushable_regs |= l_mask;
22351 real_regs_mask |= l_mask;
22352 l_mask = 0;
22353 }
22354
22355 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
22356 RTX_FRAME_RELATED_P (insn) = 1;
22357 }
22358 }
22359
22360 /* Load the pic register before setting the frame pointer,
22361 so we can use r7 as a temporary work register. */
22362 if (flag_pic && arm_pic_register != INVALID_REGNUM)
22363 arm_load_pic_register (live_regs_mask);
22364
22365 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
22366 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
22367 stack_pointer_rtx);
22368
22369 if (flag_stack_usage_info)
22370 current_function_static_stack_size
22371 = offsets->outgoing_args - offsets->saved_args;
22372
22373 amount = offsets->outgoing_args - offsets->saved_regs;
22374 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
22375 if (amount)
22376 {
22377 if (amount < 512)
22378 {
22379 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22380 GEN_INT (- amount)));
22381 RTX_FRAME_RELATED_P (insn) = 1;
22382 }
22383 else
22384 {
22385 rtx reg, dwarf;
22386
22387 /* The stack decrement is too big for an immediate value in a single
22388 insn. In theory we could issue multiple subtracts, but after
22389 three of them it becomes more space efficient to place the full
22390 value in the constant pool and load into a register. (Also the
22391 ARM debugger really likes to see only one stack decrement per
22392 function). So instead we look for a scratch register into which
22393 we can load the decrement, and then we subtract this from the
22394 stack pointer. Unfortunately on the thumb the only available
22395 scratch registers are the argument registers, and we cannot use
22396 these as they may hold arguments to the function. Instead we
22397 attempt to locate a call preserved register which is used by this
22398 function. If we can find one, then we know that it will have
22399 been pushed at the start of the prologue and so we can corrupt
22400 it now. */
22401 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
22402 if (live_regs_mask & (1 << regno))
22403 break;
22404
22405 gcc_assert(regno <= LAST_LO_REGNUM);
22406
22407 reg = gen_rtx_REG (SImode, regno);
22408
22409 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
22410
22411 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22412 stack_pointer_rtx, reg));
22413
22414 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
22415 plus_constant (Pmode, stack_pointer_rtx,
22416 -amount));
22417 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22418 RTX_FRAME_RELATED_P (insn) = 1;
22419 }
22420 }
22421
22422 if (frame_pointer_needed)
22423 thumb_set_frame_pointer (offsets);
22424
22425 /* If we are profiling, make sure no instructions are scheduled before
22426 the call to mcount. Similarly if the user has requested no
22427 scheduling in the prolog. Similarly if we want non-call exceptions
22428 using the EABI unwinder, to prevent faulting instructions from being
22429 swapped with a stack adjustment. */
22430 if (crtl->profile || !TARGET_SCHED_PROLOG
22431 || (arm_except_unwind_info (&global_options) == UI_TARGET
22432 && cfun->can_throw_non_call_exceptions))
22433 emit_insn (gen_blockage ());
22434
22435 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
22436 if (live_regs_mask & 0xff)
22437 cfun->machine->lr_save_eliminated = 0;
22438 }
22439
22440 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
22441 POP instruction can be generated. LR should be replaced by PC. All
22442 the checks required are already done by USE_RETURN_INSN (). Hence,
22443 all we really need to check here is if single register is to be
22444 returned, or multiple register return. */
22445 void
22446 thumb2_expand_return (void)
22447 {
22448 int i, num_regs;
22449 unsigned long saved_regs_mask;
22450 arm_stack_offsets *offsets;
22451
22452 offsets = arm_get_frame_offsets ();
22453 saved_regs_mask = offsets->saved_regs_mask;
22454
22455 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
22456 if (saved_regs_mask & (1 << i))
22457 num_regs++;
22458
22459 if (saved_regs_mask)
22460 {
22461 if (num_regs == 1)
22462 {
22463 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22464 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
22465 rtx addr = gen_rtx_MEM (SImode,
22466 gen_rtx_POST_INC (SImode,
22467 stack_pointer_rtx));
22468 set_mem_alias_set (addr, get_frame_alias_set ());
22469 XVECEXP (par, 0, 0) = ret_rtx;
22470 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
22471 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
22472 emit_jump_insn (par);
22473 }
22474 else
22475 {
22476 saved_regs_mask &= ~ (1 << LR_REGNUM);
22477 saved_regs_mask |= (1 << PC_REGNUM);
22478 arm_emit_multi_reg_pop (saved_regs_mask);
22479 }
22480 }
22481 else
22482 {
22483 emit_jump_insn (simple_return_rtx);
22484 }
22485 }
22486
22487 void
22488 thumb1_expand_epilogue (void)
22489 {
22490 HOST_WIDE_INT amount;
22491 arm_stack_offsets *offsets;
22492 int regno;
22493
22494 /* Naked functions don't have prologues. */
22495 if (IS_NAKED (arm_current_func_type ()))
22496 return;
22497
22498 offsets = arm_get_frame_offsets ();
22499 amount = offsets->outgoing_args - offsets->saved_regs;
22500
22501 if (frame_pointer_needed)
22502 {
22503 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
22504 amount = offsets->locals_base - offsets->saved_regs;
22505 }
22506 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
22507
22508 gcc_assert (amount >= 0);
22509 if (amount)
22510 {
22511 emit_insn (gen_blockage ());
22512
22513 if (amount < 512)
22514 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22515 GEN_INT (amount)));
22516 else
22517 {
22518 /* r3 is always free in the epilogue. */
22519 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
22520
22521 emit_insn (gen_movsi (reg, GEN_INT (amount)));
22522 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
22523 }
22524 }
22525
22526 /* Emit a USE (stack_pointer_rtx), so that
22527 the stack adjustment will not be deleted. */
22528 emit_insn (gen_prologue_use (stack_pointer_rtx));
22529
22530 if (crtl->profile || !TARGET_SCHED_PROLOG)
22531 emit_insn (gen_blockage ());
22532
22533 /* Emit a clobber for each insn that will be restored in the epilogue,
22534 so that flow2 will get register lifetimes correct. */
22535 for (regno = 0; regno < 13; regno++)
22536 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
22537 emit_clobber (gen_rtx_REG (SImode, regno));
22538
22539 if (! df_regs_ever_live_p (LR_REGNUM))
22540 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
22541 }
22542
22543 /* Epilogue code for APCS frame. */
22544 static void
22545 arm_expand_epilogue_apcs_frame (bool really_return)
22546 {
22547 unsigned long func_type;
22548 unsigned long saved_regs_mask;
22549 int num_regs = 0;
22550 int i;
22551 int floats_from_frame = 0;
22552 arm_stack_offsets *offsets;
22553
22554 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
22555 func_type = arm_current_func_type ();
22556
22557 /* Get frame offsets for ARM. */
22558 offsets = arm_get_frame_offsets ();
22559 saved_regs_mask = offsets->saved_regs_mask;
22560
22561 /* Find the offset of the floating-point save area in the frame. */
22562 floats_from_frame = offsets->saved_args - offsets->frame;
22563
22564 /* Compute how many core registers saved and how far away the floats are. */
22565 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22566 if (saved_regs_mask & (1 << i))
22567 {
22568 num_regs++;
22569 floats_from_frame += 4;
22570 }
22571
22572 if (TARGET_HARD_FLOAT && TARGET_VFP)
22573 {
22574 int start_reg;
22575
22576 /* The offset is from IP_REGNUM. */
22577 int saved_size = arm_get_vfp_saved_size ();
22578 if (saved_size > 0)
22579 {
22580 floats_from_frame += saved_size;
22581 emit_insn (gen_addsi3 (gen_rtx_REG (SImode, IP_REGNUM),
22582 hard_frame_pointer_rtx,
22583 GEN_INT (-floats_from_frame)));
22584 }
22585
22586 /* Generate VFP register multi-pop. */
22587 start_reg = FIRST_VFP_REGNUM;
22588
22589 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
22590 /* Look for a case where a reg does not need restoring. */
22591 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
22592 && (!df_regs_ever_live_p (i + 1)
22593 || call_used_regs[i + 1]))
22594 {
22595 if (start_reg != i)
22596 arm_emit_vfp_multi_reg_pop (start_reg,
22597 (i - start_reg) / 2,
22598 gen_rtx_REG (SImode,
22599 IP_REGNUM));
22600 start_reg = i + 2;
22601 }
22602
22603 /* Restore the remaining regs that we have discovered (or possibly
22604 even all of them, if the conditional in the for loop never
22605 fired). */
22606 if (start_reg != i)
22607 arm_emit_vfp_multi_reg_pop (start_reg,
22608 (i - start_reg) / 2,
22609 gen_rtx_REG (SImode, IP_REGNUM));
22610 }
22611
22612 if (TARGET_IWMMXT)
22613 {
22614 /* The frame pointer is guaranteed to be non-double-word aligned, as
22615 it is set to double-word-aligned old_stack_pointer - 4. */
22616 rtx insn;
22617 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
22618
22619 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
22620 if (df_regs_ever_live_p (i) && !call_used_regs[i])
22621 {
22622 rtx addr = gen_frame_mem (V2SImode,
22623 plus_constant (Pmode, hard_frame_pointer_rtx,
22624 - lrm_count * 4));
22625 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
22626 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
22627 gen_rtx_REG (V2SImode, i),
22628 NULL_RTX);
22629 lrm_count += 2;
22630 }
22631 }
22632
22633 /* saved_regs_mask should contain IP which contains old stack pointer
22634 at the time of activation creation. Since SP and IP are adjacent registers,
22635 we can restore the value directly into SP. */
22636 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
22637 saved_regs_mask &= ~(1 << IP_REGNUM);
22638 saved_regs_mask |= (1 << SP_REGNUM);
22639
22640 /* There are two registers left in saved_regs_mask - LR and PC. We
22641 only need to restore LR (the return address), but to
22642 save time we can load it directly into PC, unless we need a
22643 special function exit sequence, or we are not really returning. */
22644 if (really_return
22645 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
22646 && !crtl->calls_eh_return)
22647 /* Delete LR from the register mask, so that LR on
22648 the stack is loaded into the PC in the register mask. */
22649 saved_regs_mask &= ~(1 << LR_REGNUM);
22650 else
22651 saved_regs_mask &= ~(1 << PC_REGNUM);
22652
22653 num_regs = bit_count (saved_regs_mask);
22654 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
22655 {
22656 /* Unwind the stack to just below the saved registers. */
22657 emit_insn (gen_addsi3 (stack_pointer_rtx,
22658 hard_frame_pointer_rtx,
22659 GEN_INT (- 4 * num_regs)));
22660 }
22661
22662 arm_emit_multi_reg_pop (saved_regs_mask);
22663
22664 if (IS_INTERRUPT (func_type))
22665 {
22666 /* Interrupt handlers will have pushed the
22667 IP onto the stack, so restore it now. */
22668 rtx insn;
22669 rtx addr = gen_rtx_MEM (SImode,
22670 gen_rtx_POST_INC (SImode,
22671 stack_pointer_rtx));
22672 set_mem_alias_set (addr, get_frame_alias_set ());
22673 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
22674 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
22675 gen_rtx_REG (SImode, IP_REGNUM),
22676 NULL_RTX);
22677 }
22678
22679 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
22680 return;
22681
22682 if (crtl->calls_eh_return)
22683 emit_insn (gen_addsi3 (stack_pointer_rtx,
22684 stack_pointer_rtx,
22685 GEN_INT (ARM_EH_STACKADJ_REGNUM)));
22686
22687 if (IS_STACKALIGN (func_type))
22688 /* Restore the original stack pointer. Before prologue, the stack was
22689 realigned and the original stack pointer saved in r0. For details,
22690 see comment in arm_expand_prologue. */
22691 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
22692
22693 emit_jump_insn (simple_return_rtx);
22694 }
22695
22696 /* Generate RTL to represent ARM epilogue. Really_return is true if the
22697 function is not a sibcall. */
22698 void
22699 arm_expand_epilogue (bool really_return)
22700 {
22701 unsigned long func_type;
22702 unsigned long saved_regs_mask;
22703 int num_regs = 0;
22704 int i;
22705 int amount;
22706 int floats_from_frame = 0;
22707 arm_stack_offsets *offsets;
22708
22709 func_type = arm_current_func_type ();
22710
22711 /* Naked functions don't have epilogue. Hence, generate return pattern, and
22712 let output_return_instruction take care of instruction emition if any. */
22713 if (IS_NAKED (func_type)
22714 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
22715 {
22716 emit_jump_insn (simple_return_rtx);
22717 return;
22718 }
22719
22720 /* If we are throwing an exception, then we really must be doing a
22721 return, so we can't tail-call. */
22722 gcc_assert (!crtl->calls_eh_return || really_return);
22723
22724 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
22725 {
22726 arm_expand_epilogue_apcs_frame (really_return);
22727 return;
22728 }
22729
22730 /* Get frame offsets for ARM. */
22731 offsets = arm_get_frame_offsets ();
22732 saved_regs_mask = offsets->saved_regs_mask;
22733
22734 /* Find offset of floating point register from frame pointer.
22735 The initialization is done in this way to take care of frame pointer
22736 and static-chain register, if stored. */
22737 floats_from_frame = offsets->saved_args - offsets->frame;
22738 /* Compute how many registers saved and how far away the floats will be. */
22739 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22740 if (saved_regs_mask & (1 << i))
22741 {
22742 num_regs++;
22743 floats_from_frame += 4;
22744 }
22745
22746 if (frame_pointer_needed)
22747 {
22748 /* Restore stack pointer if necessary. */
22749 if (TARGET_ARM)
22750 {
22751 /* In ARM mode, frame pointer points to first saved register.
22752 Restore stack pointer to last saved register. */
22753 amount = offsets->frame - offsets->saved_regs;
22754
22755 /* Force out any pending memory operations that reference stacked data
22756 before stack de-allocation occurs. */
22757 emit_insn (gen_blockage ());
22758 emit_insn (gen_addsi3 (stack_pointer_rtx,
22759 hard_frame_pointer_rtx,
22760 GEN_INT (amount)));
22761
22762 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
22763 deleted. */
22764 emit_insn (gen_prologue_use (stack_pointer_rtx));
22765 }
22766 else
22767 {
22768 /* In Thumb-2 mode, the frame pointer points to the last saved
22769 register. */
22770 amount = offsets->locals_base - offsets->saved_regs;
22771 if (amount)
22772 emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22773 hard_frame_pointer_rtx,
22774 GEN_INT (amount)));
22775
22776 /* Force out any pending memory operations that reference stacked data
22777 before stack de-allocation occurs. */
22778 emit_insn (gen_blockage ());
22779 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
22780 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
22781 deleted. */
22782 emit_insn (gen_prologue_use (stack_pointer_rtx));
22783 }
22784 }
22785 else
22786 {
22787 /* Pop off outgoing args and local frame to adjust stack pointer to
22788 last saved register. */
22789 amount = offsets->outgoing_args - offsets->saved_regs;
22790 if (amount)
22791 {
22792 /* Force out any pending memory operations that reference stacked data
22793 before stack de-allocation occurs. */
22794 emit_insn (gen_blockage ());
22795 emit_insn (gen_addsi3 (stack_pointer_rtx,
22796 stack_pointer_rtx,
22797 GEN_INT (amount)));
22798 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
22799 not deleted. */
22800 emit_insn (gen_prologue_use (stack_pointer_rtx));
22801 }
22802 }
22803
22804 if (TARGET_HARD_FLOAT && TARGET_VFP)
22805 {
22806 /* Generate VFP register multi-pop. */
22807 int end_reg = LAST_VFP_REGNUM + 1;
22808
22809 /* Scan the registers in reverse order. We need to match
22810 any groupings made in the prologue and generate matching
22811 vldm operations. The need to match groups is because,
22812 unlike pop, vldm can only do consecutive regs. */
22813 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
22814 /* Look for a case where a reg does not need restoring. */
22815 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
22816 && (!df_regs_ever_live_p (i + 1)
22817 || call_used_regs[i + 1]))
22818 {
22819 /* Restore the regs discovered so far (from reg+2 to
22820 end_reg). */
22821 if (end_reg > i + 2)
22822 arm_emit_vfp_multi_reg_pop (i + 2,
22823 (end_reg - (i + 2)) / 2,
22824 stack_pointer_rtx);
22825 end_reg = i;
22826 }
22827
22828 /* Restore the remaining regs that we have discovered (or possibly
22829 even all of them, if the conditional in the for loop never
22830 fired). */
22831 if (end_reg > i + 2)
22832 arm_emit_vfp_multi_reg_pop (i + 2,
22833 (end_reg - (i + 2)) / 2,
22834 stack_pointer_rtx);
22835 }
22836
22837 if (TARGET_IWMMXT)
22838 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
22839 if (df_regs_ever_live_p (i) && !call_used_regs[i])
22840 {
22841 rtx insn;
22842 rtx addr = gen_rtx_MEM (V2SImode,
22843 gen_rtx_POST_INC (SImode,
22844 stack_pointer_rtx));
22845 set_mem_alias_set (addr, get_frame_alias_set ());
22846 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
22847 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
22848 gen_rtx_REG (V2SImode, i),
22849 NULL_RTX);
22850 }
22851
22852 if (saved_regs_mask)
22853 {
22854 rtx insn;
22855 bool return_in_pc = false;
22856
22857 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
22858 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
22859 && !IS_STACKALIGN (func_type)
22860 && really_return
22861 && crtl->args.pretend_args_size == 0
22862 && saved_regs_mask & (1 << LR_REGNUM)
22863 && !crtl->calls_eh_return)
22864 {
22865 saved_regs_mask &= ~(1 << LR_REGNUM);
22866 saved_regs_mask |= (1 << PC_REGNUM);
22867 return_in_pc = true;
22868 }
22869
22870 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
22871 {
22872 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22873 if (saved_regs_mask & (1 << i))
22874 {
22875 rtx addr = gen_rtx_MEM (SImode,
22876 gen_rtx_POST_INC (SImode,
22877 stack_pointer_rtx));
22878 set_mem_alias_set (addr, get_frame_alias_set ());
22879
22880 if (i == PC_REGNUM)
22881 {
22882 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22883 XVECEXP (insn, 0, 0) = ret_rtx;
22884 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
22885 gen_rtx_REG (SImode, i),
22886 addr);
22887 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
22888 insn = emit_jump_insn (insn);
22889 }
22890 else
22891 {
22892 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
22893 addr));
22894 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
22895 gen_rtx_REG (SImode, i),
22896 NULL_RTX);
22897 }
22898 }
22899 }
22900 else
22901 {
22902 arm_emit_multi_reg_pop (saved_regs_mask);
22903 }
22904
22905 if (return_in_pc == true)
22906 return;
22907 }
22908
22909 if (crtl->args.pretend_args_size)
22910 emit_insn (gen_addsi3 (stack_pointer_rtx,
22911 stack_pointer_rtx,
22912 GEN_INT (crtl->args.pretend_args_size)));
22913
22914 if (!really_return)
22915 return;
22916
22917 if (crtl->calls_eh_return)
22918 emit_insn (gen_addsi3 (stack_pointer_rtx,
22919 stack_pointer_rtx,
22920 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
22921
22922 if (IS_STACKALIGN (func_type))
22923 /* Restore the original stack pointer. Before prologue, the stack was
22924 realigned and the original stack pointer saved in r0. For details,
22925 see comment in arm_expand_prologue. */
22926 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
22927
22928 emit_jump_insn (simple_return_rtx);
22929 }
22930
22931 /* Implementation of insn prologue_thumb1_interwork. This is the first
22932 "instruction" of a function called in ARM mode. Swap to thumb mode. */
22933
22934 const char *
22935 thumb1_output_interwork (void)
22936 {
22937 const char * name;
22938 FILE *f = asm_out_file;
22939
22940 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
22941 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
22942 == SYMBOL_REF);
22943 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
22944
22945 /* Generate code sequence to switch us into Thumb mode. */
22946 /* The .code 32 directive has already been emitted by
22947 ASM_DECLARE_FUNCTION_NAME. */
22948 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
22949 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
22950
22951 /* Generate a label, so that the debugger will notice the
22952 change in instruction sets. This label is also used by
22953 the assembler to bypass the ARM code when this function
22954 is called from a Thumb encoded function elsewhere in the
22955 same file. Hence the definition of STUB_NAME here must
22956 agree with the definition in gas/config/tc-arm.c. */
22957
22958 #define STUB_NAME ".real_start_of"
22959
22960 fprintf (f, "\t.code\t16\n");
22961 #ifdef ARM_PE
22962 if (arm_dllexport_name_p (name))
22963 name = arm_strip_name_encoding (name);
22964 #endif
22965 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
22966 fprintf (f, "\t.thumb_func\n");
22967 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
22968
22969 return "";
22970 }
22971
22972 /* Handle the case of a double word load into a low register from
22973 a computed memory address. The computed address may involve a
22974 register which is overwritten by the load. */
22975 const char *
22976 thumb_load_double_from_address (rtx *operands)
22977 {
22978 rtx addr;
22979 rtx base;
22980 rtx offset;
22981 rtx arg1;
22982 rtx arg2;
22983
22984 gcc_assert (GET_CODE (operands[0]) == REG);
22985 gcc_assert (GET_CODE (operands[1]) == MEM);
22986
22987 /* Get the memory address. */
22988 addr = XEXP (operands[1], 0);
22989
22990 /* Work out how the memory address is computed. */
22991 switch (GET_CODE (addr))
22992 {
22993 case REG:
22994 operands[2] = adjust_address (operands[1], SImode, 4);
22995
22996 if (REGNO (operands[0]) == REGNO (addr))
22997 {
22998 output_asm_insn ("ldr\t%H0, %2", operands);
22999 output_asm_insn ("ldr\t%0, %1", operands);
23000 }
23001 else
23002 {
23003 output_asm_insn ("ldr\t%0, %1", operands);
23004 output_asm_insn ("ldr\t%H0, %2", operands);
23005 }
23006 break;
23007
23008 case CONST:
23009 /* Compute <address> + 4 for the high order load. */
23010 operands[2] = adjust_address (operands[1], SImode, 4);
23011
23012 output_asm_insn ("ldr\t%0, %1", operands);
23013 output_asm_insn ("ldr\t%H0, %2", operands);
23014 break;
23015
23016 case PLUS:
23017 arg1 = XEXP (addr, 0);
23018 arg2 = XEXP (addr, 1);
23019
23020 if (CONSTANT_P (arg1))
23021 base = arg2, offset = arg1;
23022 else
23023 base = arg1, offset = arg2;
23024
23025 gcc_assert (GET_CODE (base) == REG);
23026
23027 /* Catch the case of <address> = <reg> + <reg> */
23028 if (GET_CODE (offset) == REG)
23029 {
23030 int reg_offset = REGNO (offset);
23031 int reg_base = REGNO (base);
23032 int reg_dest = REGNO (operands[0]);
23033
23034 /* Add the base and offset registers together into the
23035 higher destination register. */
23036 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
23037 reg_dest + 1, reg_base, reg_offset);
23038
23039 /* Load the lower destination register from the address in
23040 the higher destination register. */
23041 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
23042 reg_dest, reg_dest + 1);
23043
23044 /* Load the higher destination register from its own address
23045 plus 4. */
23046 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
23047 reg_dest + 1, reg_dest + 1);
23048 }
23049 else
23050 {
23051 /* Compute <address> + 4 for the high order load. */
23052 operands[2] = adjust_address (operands[1], SImode, 4);
23053
23054 /* If the computed address is held in the low order register
23055 then load the high order register first, otherwise always
23056 load the low order register first. */
23057 if (REGNO (operands[0]) == REGNO (base))
23058 {
23059 output_asm_insn ("ldr\t%H0, %2", operands);
23060 output_asm_insn ("ldr\t%0, %1", operands);
23061 }
23062 else
23063 {
23064 output_asm_insn ("ldr\t%0, %1", operands);
23065 output_asm_insn ("ldr\t%H0, %2", operands);
23066 }
23067 }
23068 break;
23069
23070 case LABEL_REF:
23071 /* With no registers to worry about we can just load the value
23072 directly. */
23073 operands[2] = adjust_address (operands[1], SImode, 4);
23074
23075 output_asm_insn ("ldr\t%H0, %2", operands);
23076 output_asm_insn ("ldr\t%0, %1", operands);
23077 break;
23078
23079 default:
23080 gcc_unreachable ();
23081 }
23082
23083 return "";
23084 }
23085
23086 const char *
23087 thumb_output_move_mem_multiple (int n, rtx *operands)
23088 {
23089 rtx tmp;
23090
23091 switch (n)
23092 {
23093 case 2:
23094 if (REGNO (operands[4]) > REGNO (operands[5]))
23095 {
23096 tmp = operands[4];
23097 operands[4] = operands[5];
23098 operands[5] = tmp;
23099 }
23100 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
23101 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
23102 break;
23103
23104 case 3:
23105 if (REGNO (operands[4]) > REGNO (operands[5]))
23106 {
23107 tmp = operands[4];
23108 operands[4] = operands[5];
23109 operands[5] = tmp;
23110 }
23111 if (REGNO (operands[5]) > REGNO (operands[6]))
23112 {
23113 tmp = operands[5];
23114 operands[5] = operands[6];
23115 operands[6] = tmp;
23116 }
23117 if (REGNO (operands[4]) > REGNO (operands[5]))
23118 {
23119 tmp = operands[4];
23120 operands[4] = operands[5];
23121 operands[5] = tmp;
23122 }
23123
23124 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
23125 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
23126 break;
23127
23128 default:
23129 gcc_unreachable ();
23130 }
23131
23132 return "";
23133 }
23134
23135 /* Output a call-via instruction for thumb state. */
23136 const char *
23137 thumb_call_via_reg (rtx reg)
23138 {
23139 int regno = REGNO (reg);
23140 rtx *labelp;
23141
23142 gcc_assert (regno < LR_REGNUM);
23143
23144 /* If we are in the normal text section we can use a single instance
23145 per compilation unit. If we are doing function sections, then we need
23146 an entry per section, since we can't rely on reachability. */
23147 if (in_section == text_section)
23148 {
23149 thumb_call_reg_needed = 1;
23150
23151 if (thumb_call_via_label[regno] == NULL)
23152 thumb_call_via_label[regno] = gen_label_rtx ();
23153 labelp = thumb_call_via_label + regno;
23154 }
23155 else
23156 {
23157 if (cfun->machine->call_via[regno] == NULL)
23158 cfun->machine->call_via[regno] = gen_label_rtx ();
23159 labelp = cfun->machine->call_via + regno;
23160 }
23161
23162 output_asm_insn ("bl\t%a0", labelp);
23163 return "";
23164 }
23165
23166 /* Routines for generating rtl. */
23167 void
23168 thumb_expand_movmemqi (rtx *operands)
23169 {
23170 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
23171 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
23172 HOST_WIDE_INT len = INTVAL (operands[2]);
23173 HOST_WIDE_INT offset = 0;
23174
23175 while (len >= 12)
23176 {
23177 emit_insn (gen_movmem12b (out, in, out, in));
23178 len -= 12;
23179 }
23180
23181 if (len >= 8)
23182 {
23183 emit_insn (gen_movmem8b (out, in, out, in));
23184 len -= 8;
23185 }
23186
23187 if (len >= 4)
23188 {
23189 rtx reg = gen_reg_rtx (SImode);
23190 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
23191 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
23192 len -= 4;
23193 offset += 4;
23194 }
23195
23196 if (len >= 2)
23197 {
23198 rtx reg = gen_reg_rtx (HImode);
23199 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
23200 plus_constant (Pmode, in,
23201 offset))));
23202 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
23203 offset)),
23204 reg));
23205 len -= 2;
23206 offset += 2;
23207 }
23208
23209 if (len)
23210 {
23211 rtx reg = gen_reg_rtx (QImode);
23212 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
23213 plus_constant (Pmode, in,
23214 offset))));
23215 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
23216 offset)),
23217 reg));
23218 }
23219 }
23220
23221 void
23222 thumb_reload_out_hi (rtx *operands)
23223 {
23224 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
23225 }
23226
23227 /* Handle reading a half-word from memory during reload. */
23228 void
23229 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
23230 {
23231 gcc_unreachable ();
23232 }
23233
23234 /* Return the length of a function name prefix
23235 that starts with the character 'c'. */
23236 static int
23237 arm_get_strip_length (int c)
23238 {
23239 switch (c)
23240 {
23241 ARM_NAME_ENCODING_LENGTHS
23242 default: return 0;
23243 }
23244 }
23245
23246 /* Return a pointer to a function's name with any
23247 and all prefix encodings stripped from it. */
23248 const char *
23249 arm_strip_name_encoding (const char *name)
23250 {
23251 int skip;
23252
23253 while ((skip = arm_get_strip_length (* name)))
23254 name += skip;
23255
23256 return name;
23257 }
23258
23259 /* If there is a '*' anywhere in the name's prefix, then
23260 emit the stripped name verbatim, otherwise prepend an
23261 underscore if leading underscores are being used. */
23262 void
23263 arm_asm_output_labelref (FILE *stream, const char *name)
23264 {
23265 int skip;
23266 int verbatim = 0;
23267
23268 while ((skip = arm_get_strip_length (* name)))
23269 {
23270 verbatim |= (*name == '*');
23271 name += skip;
23272 }
23273
23274 if (verbatim)
23275 fputs (name, stream);
23276 else
23277 asm_fprintf (stream, "%U%s", name);
23278 }
23279
23280 /* This function is used to emit an EABI tag and its associated value.
23281 We emit the numerical value of the tag in case the assembler does not
23282 support textual tags. (Eg gas prior to 2.20). If requested we include
23283 the tag name in a comment so that anyone reading the assembler output
23284 will know which tag is being set.
23285
23286 This function is not static because arm-c.c needs it too. */
23287
23288 void
23289 arm_emit_eabi_attribute (const char *name, int num, int val)
23290 {
23291 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
23292 if (flag_verbose_asm || flag_debug_asm)
23293 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
23294 asm_fprintf (asm_out_file, "\n");
23295 }
23296
23297 static void
23298 arm_file_start (void)
23299 {
23300 int val;
23301
23302 if (TARGET_UNIFIED_ASM)
23303 asm_fprintf (asm_out_file, "\t.syntax unified\n");
23304
23305 if (TARGET_BPABI)
23306 {
23307 const char *fpu_name;
23308 if (arm_selected_arch)
23309 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
23310 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
23311 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
23312 else
23313 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
23314
23315 if (TARGET_SOFT_FLOAT)
23316 {
23317 fpu_name = "softvfp";
23318 }
23319 else
23320 {
23321 fpu_name = arm_fpu_desc->name;
23322 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
23323 {
23324 if (TARGET_HARD_FLOAT)
23325 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
23326 if (TARGET_HARD_FLOAT_ABI)
23327 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
23328 }
23329 }
23330 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
23331
23332 /* Some of these attributes only apply when the corresponding features
23333 are used. However we don't have any easy way of figuring this out.
23334 Conservatively record the setting that would have been used. */
23335
23336 if (flag_rounding_math)
23337 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
23338
23339 if (!flag_unsafe_math_optimizations)
23340 {
23341 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
23342 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
23343 }
23344 if (flag_signaling_nans)
23345 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
23346
23347 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
23348 flag_finite_math_only ? 1 : 3);
23349
23350 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
23351 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
23352 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
23353 flag_short_enums ? 1 : 2);
23354
23355 /* Tag_ABI_optimization_goals. */
23356 if (optimize_size)
23357 val = 4;
23358 else if (optimize >= 2)
23359 val = 2;
23360 else if (optimize)
23361 val = 1;
23362 else
23363 val = 6;
23364 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
23365
23366 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
23367 unaligned_access);
23368
23369 if (arm_fp16_format)
23370 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
23371 (int) arm_fp16_format);
23372
23373 if (arm_lang_output_object_attributes_hook)
23374 arm_lang_output_object_attributes_hook();
23375 }
23376
23377 default_file_start ();
23378 }
23379
23380 static void
23381 arm_file_end (void)
23382 {
23383 int regno;
23384
23385 if (NEED_INDICATE_EXEC_STACK)
23386 /* Add .note.GNU-stack. */
23387 file_end_indicate_exec_stack ();
23388
23389 if (! thumb_call_reg_needed)
23390 return;
23391
23392 switch_to_section (text_section);
23393 asm_fprintf (asm_out_file, "\t.code 16\n");
23394 ASM_OUTPUT_ALIGN (asm_out_file, 1);
23395
23396 for (regno = 0; regno < LR_REGNUM; regno++)
23397 {
23398 rtx label = thumb_call_via_label[regno];
23399
23400 if (label != 0)
23401 {
23402 targetm.asm_out.internal_label (asm_out_file, "L",
23403 CODE_LABEL_NUMBER (label));
23404 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
23405 }
23406 }
23407 }
23408
23409 #ifndef ARM_PE
23410 /* Symbols in the text segment can be accessed without indirecting via the
23411 constant pool; it may take an extra binary operation, but this is still
23412 faster than indirecting via memory. Don't do this when not optimizing,
23413 since we won't be calculating al of the offsets necessary to do this
23414 simplification. */
23415
23416 static void
23417 arm_encode_section_info (tree decl, rtx rtl, int first)
23418 {
23419 if (optimize > 0 && TREE_CONSTANT (decl))
23420 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
23421
23422 default_encode_section_info (decl, rtl, first);
23423 }
23424 #endif /* !ARM_PE */
23425
23426 static void
23427 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
23428 {
23429 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
23430 && !strcmp (prefix, "L"))
23431 {
23432 arm_ccfsm_state = 0;
23433 arm_target_insn = NULL;
23434 }
23435 default_internal_label (stream, prefix, labelno);
23436 }
23437
23438 /* Output code to add DELTA to the first argument, and then jump
23439 to FUNCTION. Used for C++ multiple inheritance. */
23440 static void
23441 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
23442 HOST_WIDE_INT delta,
23443 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
23444 tree function)
23445 {
23446 static int thunk_label = 0;
23447 char label[256];
23448 char labelpc[256];
23449 int mi_delta = delta;
23450 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
23451 int shift = 0;
23452 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
23453 ? 1 : 0);
23454 if (mi_delta < 0)
23455 mi_delta = - mi_delta;
23456
23457 if (TARGET_THUMB1)
23458 {
23459 int labelno = thunk_label++;
23460 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
23461 /* Thunks are entered in arm mode when avaiable. */
23462 if (TARGET_THUMB1_ONLY)
23463 {
23464 /* push r3 so we can use it as a temporary. */
23465 /* TODO: Omit this save if r3 is not used. */
23466 fputs ("\tpush {r3}\n", file);
23467 fputs ("\tldr\tr3, ", file);
23468 }
23469 else
23470 {
23471 fputs ("\tldr\tr12, ", file);
23472 }
23473 assemble_name (file, label);
23474 fputc ('\n', file);
23475 if (flag_pic)
23476 {
23477 /* If we are generating PIC, the ldr instruction below loads
23478 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
23479 the address of the add + 8, so we have:
23480
23481 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
23482 = target + 1.
23483
23484 Note that we have "+ 1" because some versions of GNU ld
23485 don't set the low bit of the result for R_ARM_REL32
23486 relocations against thumb function symbols.
23487 On ARMv6M this is +4, not +8. */
23488 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
23489 assemble_name (file, labelpc);
23490 fputs (":\n", file);
23491 if (TARGET_THUMB1_ONLY)
23492 {
23493 /* This is 2 insns after the start of the thunk, so we know it
23494 is 4-byte aligned. */
23495 fputs ("\tadd\tr3, pc, r3\n", file);
23496 fputs ("\tmov r12, r3\n", file);
23497 }
23498 else
23499 fputs ("\tadd\tr12, pc, r12\n", file);
23500 }
23501 else if (TARGET_THUMB1_ONLY)
23502 fputs ("\tmov r12, r3\n", file);
23503 }
23504 if (TARGET_THUMB1_ONLY)
23505 {
23506 if (mi_delta > 255)
23507 {
23508 fputs ("\tldr\tr3, ", file);
23509 assemble_name (file, label);
23510 fputs ("+4\n", file);
23511 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
23512 mi_op, this_regno, this_regno);
23513 }
23514 else if (mi_delta != 0)
23515 {
23516 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
23517 mi_op, this_regno, this_regno,
23518 mi_delta);
23519 }
23520 }
23521 else
23522 {
23523 /* TODO: Use movw/movt for large constants when available. */
23524 while (mi_delta != 0)
23525 {
23526 if ((mi_delta & (3 << shift)) == 0)
23527 shift += 2;
23528 else
23529 {
23530 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
23531 mi_op, this_regno, this_regno,
23532 mi_delta & (0xff << shift));
23533 mi_delta &= ~(0xff << shift);
23534 shift += 8;
23535 }
23536 }
23537 }
23538 if (TARGET_THUMB1)
23539 {
23540 if (TARGET_THUMB1_ONLY)
23541 fputs ("\tpop\t{r3}\n", file);
23542
23543 fprintf (file, "\tbx\tr12\n");
23544 ASM_OUTPUT_ALIGN (file, 2);
23545 assemble_name (file, label);
23546 fputs (":\n", file);
23547 if (flag_pic)
23548 {
23549 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
23550 rtx tem = XEXP (DECL_RTL (function), 0);
23551 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
23552 tem = gen_rtx_MINUS (GET_MODE (tem),
23553 tem,
23554 gen_rtx_SYMBOL_REF (Pmode,
23555 ggc_strdup (labelpc)));
23556 assemble_integer (tem, 4, BITS_PER_WORD, 1);
23557 }
23558 else
23559 /* Output ".word .LTHUNKn". */
23560 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
23561
23562 if (TARGET_THUMB1_ONLY && mi_delta > 255)
23563 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
23564 }
23565 else
23566 {
23567 fputs ("\tb\t", file);
23568 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
23569 if (NEED_PLT_RELOC)
23570 fputs ("(PLT)", file);
23571 fputc ('\n', file);
23572 }
23573 }
23574
23575 int
23576 arm_emit_vector_const (FILE *file, rtx x)
23577 {
23578 int i;
23579 const char * pattern;
23580
23581 gcc_assert (GET_CODE (x) == CONST_VECTOR);
23582
23583 switch (GET_MODE (x))
23584 {
23585 case V2SImode: pattern = "%08x"; break;
23586 case V4HImode: pattern = "%04x"; break;
23587 case V8QImode: pattern = "%02x"; break;
23588 default: gcc_unreachable ();
23589 }
23590
23591 fprintf (file, "0x");
23592 for (i = CONST_VECTOR_NUNITS (x); i--;)
23593 {
23594 rtx element;
23595
23596 element = CONST_VECTOR_ELT (x, i);
23597 fprintf (file, pattern, INTVAL (element));
23598 }
23599
23600 return 1;
23601 }
23602
23603 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
23604 HFmode constant pool entries are actually loaded with ldr. */
23605 void
23606 arm_emit_fp16_const (rtx c)
23607 {
23608 REAL_VALUE_TYPE r;
23609 long bits;
23610
23611 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
23612 bits = real_to_target (NULL, &r, HFmode);
23613 if (WORDS_BIG_ENDIAN)
23614 assemble_zeros (2);
23615 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
23616 if (!WORDS_BIG_ENDIAN)
23617 assemble_zeros (2);
23618 }
23619
23620 const char *
23621 arm_output_load_gr (rtx *operands)
23622 {
23623 rtx reg;
23624 rtx offset;
23625 rtx wcgr;
23626 rtx sum;
23627
23628 if (GET_CODE (operands [1]) != MEM
23629 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
23630 || GET_CODE (reg = XEXP (sum, 0)) != REG
23631 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
23632 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
23633 return "wldrw%?\t%0, %1";
23634
23635 /* Fix up an out-of-range load of a GR register. */
23636 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
23637 wcgr = operands[0];
23638 operands[0] = reg;
23639 output_asm_insn ("ldr%?\t%0, %1", operands);
23640
23641 operands[0] = wcgr;
23642 operands[1] = reg;
23643 output_asm_insn ("tmcr%?\t%0, %1", operands);
23644 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
23645
23646 return "";
23647 }
23648
23649 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
23650
23651 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
23652 named arg and all anonymous args onto the stack.
23653 XXX I know the prologue shouldn't be pushing registers, but it is faster
23654 that way. */
23655
23656 static void
23657 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
23658 enum machine_mode mode,
23659 tree type,
23660 int *pretend_size,
23661 int second_time ATTRIBUTE_UNUSED)
23662 {
23663 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
23664 int nregs;
23665
23666 cfun->machine->uses_anonymous_args = 1;
23667 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
23668 {
23669 nregs = pcum->aapcs_ncrn;
23670 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
23671 nregs++;
23672 }
23673 else
23674 nregs = pcum->nregs;
23675
23676 if (nregs < NUM_ARG_REGS)
23677 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
23678 }
23679
23680 /* Return nonzero if the CONSUMER instruction (a store) does not need
23681 PRODUCER's value to calculate the address. */
23682
23683 int
23684 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
23685 {
23686 rtx value = PATTERN (producer);
23687 rtx addr = PATTERN (consumer);
23688
23689 if (GET_CODE (value) == COND_EXEC)
23690 value = COND_EXEC_CODE (value);
23691 if (GET_CODE (value) == PARALLEL)
23692 value = XVECEXP (value, 0, 0);
23693 value = XEXP (value, 0);
23694 if (GET_CODE (addr) == COND_EXEC)
23695 addr = COND_EXEC_CODE (addr);
23696 if (GET_CODE (addr) == PARALLEL)
23697 addr = XVECEXP (addr, 0, 0);
23698 addr = XEXP (addr, 0);
23699
23700 return !reg_overlap_mentioned_p (value, addr);
23701 }
23702
23703 /* Return nonzero if the CONSUMER instruction (a store) does need
23704 PRODUCER's value to calculate the address. */
23705
23706 int
23707 arm_early_store_addr_dep (rtx producer, rtx consumer)
23708 {
23709 return !arm_no_early_store_addr_dep (producer, consumer);
23710 }
23711
23712 /* Return nonzero if the CONSUMER instruction (a load) does need
23713 PRODUCER's value to calculate the address. */
23714
23715 int
23716 arm_early_load_addr_dep (rtx producer, rtx consumer)
23717 {
23718 rtx value = PATTERN (producer);
23719 rtx addr = PATTERN (consumer);
23720
23721 if (GET_CODE (value) == COND_EXEC)
23722 value = COND_EXEC_CODE (value);
23723 if (GET_CODE (value) == PARALLEL)
23724 value = XVECEXP (value, 0, 0);
23725 value = XEXP (value, 0);
23726 if (GET_CODE (addr) == COND_EXEC)
23727 addr = COND_EXEC_CODE (addr);
23728 if (GET_CODE (addr) == PARALLEL)
23729 {
23730 if (GET_CODE (XVECEXP (addr, 0, 0)) == RETURN)
23731 addr = XVECEXP (addr, 0, 1);
23732 else
23733 addr = XVECEXP (addr, 0, 0);
23734 }
23735 addr = XEXP (addr, 1);
23736
23737 return reg_overlap_mentioned_p (value, addr);
23738 }
23739
23740 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23741 have an early register shift value or amount dependency on the
23742 result of PRODUCER. */
23743
23744 int
23745 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
23746 {
23747 rtx value = PATTERN (producer);
23748 rtx op = PATTERN (consumer);
23749 rtx early_op;
23750
23751 if (GET_CODE (value) == COND_EXEC)
23752 value = COND_EXEC_CODE (value);
23753 if (GET_CODE (value) == PARALLEL)
23754 value = XVECEXP (value, 0, 0);
23755 value = XEXP (value, 0);
23756 if (GET_CODE (op) == COND_EXEC)
23757 op = COND_EXEC_CODE (op);
23758 if (GET_CODE (op) == PARALLEL)
23759 op = XVECEXP (op, 0, 0);
23760 op = XEXP (op, 1);
23761
23762 early_op = XEXP (op, 0);
23763 /* This is either an actual independent shift, or a shift applied to
23764 the first operand of another operation. We want the whole shift
23765 operation. */
23766 if (GET_CODE (early_op) == REG)
23767 early_op = op;
23768
23769 return !reg_overlap_mentioned_p (value, early_op);
23770 }
23771
23772 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23773 have an early register shift value dependency on the result of
23774 PRODUCER. */
23775
23776 int
23777 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
23778 {
23779 rtx value = PATTERN (producer);
23780 rtx op = PATTERN (consumer);
23781 rtx early_op;
23782
23783 if (GET_CODE (value) == COND_EXEC)
23784 value = COND_EXEC_CODE (value);
23785 if (GET_CODE (value) == PARALLEL)
23786 value = XVECEXP (value, 0, 0);
23787 value = XEXP (value, 0);
23788 if (GET_CODE (op) == COND_EXEC)
23789 op = COND_EXEC_CODE (op);
23790 if (GET_CODE (op) == PARALLEL)
23791 op = XVECEXP (op, 0, 0);
23792 op = XEXP (op, 1);
23793
23794 early_op = XEXP (op, 0);
23795
23796 /* This is either an actual independent shift, or a shift applied to
23797 the first operand of another operation. We want the value being
23798 shifted, in either case. */
23799 if (GET_CODE (early_op) != REG)
23800 early_op = XEXP (early_op, 0);
23801
23802 return !reg_overlap_mentioned_p (value, early_op);
23803 }
23804
23805 /* Return nonzero if the CONSUMER (a mul or mac op) does not
23806 have an early register mult dependency on the result of
23807 PRODUCER. */
23808
23809 int
23810 arm_no_early_mul_dep (rtx producer, rtx consumer)
23811 {
23812 rtx value = PATTERN (producer);
23813 rtx op = PATTERN (consumer);
23814
23815 if (GET_CODE (value) == COND_EXEC)
23816 value = COND_EXEC_CODE (value);
23817 if (GET_CODE (value) == PARALLEL)
23818 value = XVECEXP (value, 0, 0);
23819 value = XEXP (value, 0);
23820 if (GET_CODE (op) == COND_EXEC)
23821 op = COND_EXEC_CODE (op);
23822 if (GET_CODE (op) == PARALLEL)
23823 op = XVECEXP (op, 0, 0);
23824 op = XEXP (op, 1);
23825
23826 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
23827 {
23828 if (GET_CODE (XEXP (op, 0)) == MULT)
23829 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
23830 else
23831 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
23832 }
23833
23834 return 0;
23835 }
23836
23837 /* We can't rely on the caller doing the proper promotion when
23838 using APCS or ATPCS. */
23839
23840 static bool
23841 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
23842 {
23843 return !TARGET_AAPCS_BASED;
23844 }
23845
23846 static enum machine_mode
23847 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
23848 enum machine_mode mode,
23849 int *punsignedp ATTRIBUTE_UNUSED,
23850 const_tree fntype ATTRIBUTE_UNUSED,
23851 int for_return ATTRIBUTE_UNUSED)
23852 {
23853 if (GET_MODE_CLASS (mode) == MODE_INT
23854 && GET_MODE_SIZE (mode) < 4)
23855 return SImode;
23856
23857 return mode;
23858 }
23859
23860 /* AAPCS based ABIs use short enums by default. */
23861
23862 static bool
23863 arm_default_short_enums (void)
23864 {
23865 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
23866 }
23867
23868
23869 /* AAPCS requires that anonymous bitfields affect structure alignment. */
23870
23871 static bool
23872 arm_align_anon_bitfield (void)
23873 {
23874 return TARGET_AAPCS_BASED;
23875 }
23876
23877
23878 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
23879
23880 static tree
23881 arm_cxx_guard_type (void)
23882 {
23883 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
23884 }
23885
23886 /* Return non-zero if the consumer (a multiply-accumulate instruction)
23887 has an accumulator dependency on the result of the producer (a
23888 multiplication instruction) and no other dependency on that result. */
23889 int
23890 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
23891 {
23892 rtx mul = PATTERN (producer);
23893 rtx mac = PATTERN (consumer);
23894 rtx mul_result;
23895 rtx mac_op0, mac_op1, mac_acc;
23896
23897 if (GET_CODE (mul) == COND_EXEC)
23898 mul = COND_EXEC_CODE (mul);
23899 if (GET_CODE (mac) == COND_EXEC)
23900 mac = COND_EXEC_CODE (mac);
23901
23902 /* Check that mul is of the form (set (...) (mult ...))
23903 and mla is of the form (set (...) (plus (mult ...) (...))). */
23904 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
23905 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
23906 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
23907 return 0;
23908
23909 mul_result = XEXP (mul, 0);
23910 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
23911 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
23912 mac_acc = XEXP (XEXP (mac, 1), 1);
23913
23914 return (reg_overlap_mentioned_p (mul_result, mac_acc)
23915 && !reg_overlap_mentioned_p (mul_result, mac_op0)
23916 && !reg_overlap_mentioned_p (mul_result, mac_op1));
23917 }
23918
23919
23920 /* The EABI says test the least significant bit of a guard variable. */
23921
23922 static bool
23923 arm_cxx_guard_mask_bit (void)
23924 {
23925 return TARGET_AAPCS_BASED;
23926 }
23927
23928
23929 /* The EABI specifies that all array cookies are 8 bytes long. */
23930
23931 static tree
23932 arm_get_cookie_size (tree type)
23933 {
23934 tree size;
23935
23936 if (!TARGET_AAPCS_BASED)
23937 return default_cxx_get_cookie_size (type);
23938
23939 size = build_int_cst (sizetype, 8);
23940 return size;
23941 }
23942
23943
23944 /* The EABI says that array cookies should also contain the element size. */
23945
23946 static bool
23947 arm_cookie_has_size (void)
23948 {
23949 return TARGET_AAPCS_BASED;
23950 }
23951
23952
23953 /* The EABI says constructors and destructors should return a pointer to
23954 the object constructed/destroyed. */
23955
23956 static bool
23957 arm_cxx_cdtor_returns_this (void)
23958 {
23959 return TARGET_AAPCS_BASED;
23960 }
23961
23962 /* The EABI says that an inline function may never be the key
23963 method. */
23964
23965 static bool
23966 arm_cxx_key_method_may_be_inline (void)
23967 {
23968 return !TARGET_AAPCS_BASED;
23969 }
23970
23971 static void
23972 arm_cxx_determine_class_data_visibility (tree decl)
23973 {
23974 if (!TARGET_AAPCS_BASED
23975 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
23976 return;
23977
23978 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
23979 is exported. However, on systems without dynamic vague linkage,
23980 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
23981 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
23982 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
23983 else
23984 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
23985 DECL_VISIBILITY_SPECIFIED (decl) = 1;
23986 }
23987
23988 static bool
23989 arm_cxx_class_data_always_comdat (void)
23990 {
23991 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
23992 vague linkage if the class has no key function. */
23993 return !TARGET_AAPCS_BASED;
23994 }
23995
23996
23997 /* The EABI says __aeabi_atexit should be used to register static
23998 destructors. */
23999
24000 static bool
24001 arm_cxx_use_aeabi_atexit (void)
24002 {
24003 return TARGET_AAPCS_BASED;
24004 }
24005
24006
24007 void
24008 arm_set_return_address (rtx source, rtx scratch)
24009 {
24010 arm_stack_offsets *offsets;
24011 HOST_WIDE_INT delta;
24012 rtx addr;
24013 unsigned long saved_regs;
24014
24015 offsets = arm_get_frame_offsets ();
24016 saved_regs = offsets->saved_regs_mask;
24017
24018 if ((saved_regs & (1 << LR_REGNUM)) == 0)
24019 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
24020 else
24021 {
24022 if (frame_pointer_needed)
24023 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
24024 else
24025 {
24026 /* LR will be the first saved register. */
24027 delta = offsets->outgoing_args - (offsets->frame + 4);
24028
24029
24030 if (delta >= 4096)
24031 {
24032 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
24033 GEN_INT (delta & ~4095)));
24034 addr = scratch;
24035 delta &= 4095;
24036 }
24037 else
24038 addr = stack_pointer_rtx;
24039
24040 addr = plus_constant (Pmode, addr, delta);
24041 }
24042 emit_move_insn (gen_frame_mem (Pmode, addr), source);
24043 }
24044 }
24045
24046
24047 void
24048 thumb_set_return_address (rtx source, rtx scratch)
24049 {
24050 arm_stack_offsets *offsets;
24051 HOST_WIDE_INT delta;
24052 HOST_WIDE_INT limit;
24053 int reg;
24054 rtx addr;
24055 unsigned long mask;
24056
24057 emit_use (source);
24058
24059 offsets = arm_get_frame_offsets ();
24060 mask = offsets->saved_regs_mask;
24061 if (mask & (1 << LR_REGNUM))
24062 {
24063 limit = 1024;
24064 /* Find the saved regs. */
24065 if (frame_pointer_needed)
24066 {
24067 delta = offsets->soft_frame - offsets->saved_args;
24068 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
24069 if (TARGET_THUMB1)
24070 limit = 128;
24071 }
24072 else
24073 {
24074 delta = offsets->outgoing_args - offsets->saved_args;
24075 reg = SP_REGNUM;
24076 }
24077 /* Allow for the stack frame. */
24078 if (TARGET_THUMB1 && TARGET_BACKTRACE)
24079 delta -= 16;
24080 /* The link register is always the first saved register. */
24081 delta -= 4;
24082
24083 /* Construct the address. */
24084 addr = gen_rtx_REG (SImode, reg);
24085 if (delta > limit)
24086 {
24087 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
24088 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
24089 addr = scratch;
24090 }
24091 else
24092 addr = plus_constant (Pmode, addr, delta);
24093
24094 emit_move_insn (gen_frame_mem (Pmode, addr), source);
24095 }
24096 else
24097 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
24098 }
24099
24100 /* Implements target hook vector_mode_supported_p. */
24101 bool
24102 arm_vector_mode_supported_p (enum machine_mode mode)
24103 {
24104 /* Neon also supports V2SImode, etc. listed in the clause below. */
24105 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
24106 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
24107 return true;
24108
24109 if ((TARGET_NEON || TARGET_IWMMXT)
24110 && ((mode == V2SImode)
24111 || (mode == V4HImode)
24112 || (mode == V8QImode)))
24113 return true;
24114
24115 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
24116 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
24117 || mode == V2HAmode))
24118 return true;
24119
24120 return false;
24121 }
24122
24123 /* Implements target hook array_mode_supported_p. */
24124
24125 static bool
24126 arm_array_mode_supported_p (enum machine_mode mode,
24127 unsigned HOST_WIDE_INT nelems)
24128 {
24129 if (TARGET_NEON
24130 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
24131 && (nelems >= 2 && nelems <= 4))
24132 return true;
24133
24134 return false;
24135 }
24136
24137 /* Use the option -mvectorize-with-neon-double to override the use of quardword
24138 registers when autovectorizing for Neon, at least until multiple vector
24139 widths are supported properly by the middle-end. */
24140
24141 static enum machine_mode
24142 arm_preferred_simd_mode (enum machine_mode mode)
24143 {
24144 if (TARGET_NEON)
24145 switch (mode)
24146 {
24147 case SFmode:
24148 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
24149 case SImode:
24150 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
24151 case HImode:
24152 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
24153 case QImode:
24154 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
24155 case DImode:
24156 if (!TARGET_NEON_VECTORIZE_DOUBLE)
24157 return V2DImode;
24158 break;
24159
24160 default:;
24161 }
24162
24163 if (TARGET_REALLY_IWMMXT)
24164 switch (mode)
24165 {
24166 case SImode:
24167 return V2SImode;
24168 case HImode:
24169 return V4HImode;
24170 case QImode:
24171 return V8QImode;
24172
24173 default:;
24174 }
24175
24176 return word_mode;
24177 }
24178
24179 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
24180
24181 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
24182 using r0-r4 for function arguments, r7 for the stack frame and don't have
24183 enough left over to do doubleword arithmetic. For Thumb-2 all the
24184 potentially problematic instructions accept high registers so this is not
24185 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
24186 that require many low registers. */
24187 static bool
24188 arm_class_likely_spilled_p (reg_class_t rclass)
24189 {
24190 if ((TARGET_THUMB1 && rclass == LO_REGS)
24191 || rclass == CC_REG)
24192 return true;
24193
24194 return false;
24195 }
24196
24197 /* Implements target hook small_register_classes_for_mode_p. */
24198 bool
24199 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
24200 {
24201 return TARGET_THUMB1;
24202 }
24203
24204 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
24205 ARM insns and therefore guarantee that the shift count is modulo 256.
24206 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
24207 guarantee no particular behavior for out-of-range counts. */
24208
24209 static unsigned HOST_WIDE_INT
24210 arm_shift_truncation_mask (enum machine_mode mode)
24211 {
24212 return mode == SImode ? 255 : 0;
24213 }
24214
24215
24216 /* Map internal gcc register numbers to DWARF2 register numbers. */
24217
24218 unsigned int
24219 arm_dbx_register_number (unsigned int regno)
24220 {
24221 if (regno < 16)
24222 return regno;
24223
24224 if (IS_VFP_REGNUM (regno))
24225 {
24226 /* See comment in arm_dwarf_register_span. */
24227 if (VFP_REGNO_OK_FOR_SINGLE (regno))
24228 return 64 + regno - FIRST_VFP_REGNUM;
24229 else
24230 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
24231 }
24232
24233 if (IS_IWMMXT_GR_REGNUM (regno))
24234 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
24235
24236 if (IS_IWMMXT_REGNUM (regno))
24237 return 112 + regno - FIRST_IWMMXT_REGNUM;
24238
24239 gcc_unreachable ();
24240 }
24241
24242 /* Dwarf models VFPv3 registers as 32 64-bit registers.
24243 GCC models tham as 64 32-bit registers, so we need to describe this to
24244 the DWARF generation code. Other registers can use the default. */
24245 static rtx
24246 arm_dwarf_register_span (rtx rtl)
24247 {
24248 unsigned regno;
24249 int nregs;
24250 int i;
24251 rtx p;
24252
24253 regno = REGNO (rtl);
24254 if (!IS_VFP_REGNUM (regno))
24255 return NULL_RTX;
24256
24257 /* XXX FIXME: The EABI defines two VFP register ranges:
24258 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
24259 256-287: D0-D31
24260 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
24261 corresponding D register. Until GDB supports this, we shall use the
24262 legacy encodings. We also use these encodings for D0-D15 for
24263 compatibility with older debuggers. */
24264 if (VFP_REGNO_OK_FOR_SINGLE (regno))
24265 return NULL_RTX;
24266
24267 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
24268 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
24269 regno = (regno - FIRST_VFP_REGNUM) / 2;
24270 for (i = 0; i < nregs; i++)
24271 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
24272
24273 return p;
24274 }
24275
24276 #if ARM_UNWIND_INFO
24277 /* Emit unwind directives for a store-multiple instruction or stack pointer
24278 push during alignment.
24279 These should only ever be generated by the function prologue code, so
24280 expect them to have a particular form. */
24281
24282 static void
24283 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
24284 {
24285 int i;
24286 HOST_WIDE_INT offset;
24287 HOST_WIDE_INT nregs;
24288 int reg_size;
24289 unsigned reg;
24290 unsigned lastreg;
24291 rtx e;
24292
24293 e = XVECEXP (p, 0, 0);
24294 if (GET_CODE (e) != SET)
24295 abort ();
24296
24297 /* First insn will adjust the stack pointer. */
24298 if (GET_CODE (e) != SET
24299 || GET_CODE (XEXP (e, 0)) != REG
24300 || REGNO (XEXP (e, 0)) != SP_REGNUM
24301 || GET_CODE (XEXP (e, 1)) != PLUS)
24302 abort ();
24303
24304 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
24305 nregs = XVECLEN (p, 0) - 1;
24306
24307 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
24308 if (reg < 16)
24309 {
24310 /* The function prologue may also push pc, but not annotate it as it is
24311 never restored. We turn this into a stack pointer adjustment. */
24312 if (nregs * 4 == offset - 4)
24313 {
24314 fprintf (asm_out_file, "\t.pad #4\n");
24315 offset -= 4;
24316 }
24317 reg_size = 4;
24318 fprintf (asm_out_file, "\t.save {");
24319 }
24320 else if (IS_VFP_REGNUM (reg))
24321 {
24322 reg_size = 8;
24323 fprintf (asm_out_file, "\t.vsave {");
24324 }
24325 else
24326 /* Unknown register type. */
24327 abort ();
24328
24329 /* If the stack increment doesn't match the size of the saved registers,
24330 something has gone horribly wrong. */
24331 if (offset != nregs * reg_size)
24332 abort ();
24333
24334 offset = 0;
24335 lastreg = 0;
24336 /* The remaining insns will describe the stores. */
24337 for (i = 1; i <= nregs; i++)
24338 {
24339 /* Expect (set (mem <addr>) (reg)).
24340 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
24341 e = XVECEXP (p, 0, i);
24342 if (GET_CODE (e) != SET
24343 || GET_CODE (XEXP (e, 0)) != MEM
24344 || GET_CODE (XEXP (e, 1)) != REG)
24345 abort ();
24346
24347 reg = REGNO (XEXP (e, 1));
24348 if (reg < lastreg)
24349 abort ();
24350
24351 if (i != 1)
24352 fprintf (asm_out_file, ", ");
24353 /* We can't use %r for vfp because we need to use the
24354 double precision register names. */
24355 if (IS_VFP_REGNUM (reg))
24356 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
24357 else
24358 asm_fprintf (asm_out_file, "%r", reg);
24359
24360 #ifdef ENABLE_CHECKING
24361 /* Check that the addresses are consecutive. */
24362 e = XEXP (XEXP (e, 0), 0);
24363 if (GET_CODE (e) == PLUS)
24364 {
24365 offset += reg_size;
24366 if (GET_CODE (XEXP (e, 0)) != REG
24367 || REGNO (XEXP (e, 0)) != SP_REGNUM
24368 || GET_CODE (XEXP (e, 1)) != CONST_INT
24369 || offset != INTVAL (XEXP (e, 1)))
24370 abort ();
24371 }
24372 else if (i != 1
24373 || GET_CODE (e) != REG
24374 || REGNO (e) != SP_REGNUM)
24375 abort ();
24376 #endif
24377 }
24378 fprintf (asm_out_file, "}\n");
24379 }
24380
24381 /* Emit unwind directives for a SET. */
24382
24383 static void
24384 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
24385 {
24386 rtx e0;
24387 rtx e1;
24388 unsigned reg;
24389
24390 e0 = XEXP (p, 0);
24391 e1 = XEXP (p, 1);
24392 switch (GET_CODE (e0))
24393 {
24394 case MEM:
24395 /* Pushing a single register. */
24396 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
24397 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
24398 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
24399 abort ();
24400
24401 asm_fprintf (asm_out_file, "\t.save ");
24402 if (IS_VFP_REGNUM (REGNO (e1)))
24403 asm_fprintf(asm_out_file, "{d%d}\n",
24404 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
24405 else
24406 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
24407 break;
24408
24409 case REG:
24410 if (REGNO (e0) == SP_REGNUM)
24411 {
24412 /* A stack increment. */
24413 if (GET_CODE (e1) != PLUS
24414 || GET_CODE (XEXP (e1, 0)) != REG
24415 || REGNO (XEXP (e1, 0)) != SP_REGNUM
24416 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
24417 abort ();
24418
24419 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
24420 -INTVAL (XEXP (e1, 1)));
24421 }
24422 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
24423 {
24424 HOST_WIDE_INT offset;
24425
24426 if (GET_CODE (e1) == PLUS)
24427 {
24428 if (GET_CODE (XEXP (e1, 0)) != REG
24429 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
24430 abort ();
24431 reg = REGNO (XEXP (e1, 0));
24432 offset = INTVAL (XEXP (e1, 1));
24433 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
24434 HARD_FRAME_POINTER_REGNUM, reg,
24435 offset);
24436 }
24437 else if (GET_CODE (e1) == REG)
24438 {
24439 reg = REGNO (e1);
24440 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
24441 HARD_FRAME_POINTER_REGNUM, reg);
24442 }
24443 else
24444 abort ();
24445 }
24446 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
24447 {
24448 /* Move from sp to reg. */
24449 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
24450 }
24451 else if (GET_CODE (e1) == PLUS
24452 && GET_CODE (XEXP (e1, 0)) == REG
24453 && REGNO (XEXP (e1, 0)) == SP_REGNUM
24454 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
24455 {
24456 /* Set reg to offset from sp. */
24457 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
24458 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
24459 }
24460 else
24461 abort ();
24462 break;
24463
24464 default:
24465 abort ();
24466 }
24467 }
24468
24469
24470 /* Emit unwind directives for the given insn. */
24471
24472 static void
24473 arm_unwind_emit (FILE * asm_out_file, rtx insn)
24474 {
24475 rtx note, pat;
24476 bool handled_one = false;
24477
24478 if (arm_except_unwind_info (&global_options) != UI_TARGET)
24479 return;
24480
24481 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
24482 && (TREE_NOTHROW (current_function_decl)
24483 || crtl->all_throwers_are_sibcalls))
24484 return;
24485
24486 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
24487 return;
24488
24489 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
24490 {
24491 pat = XEXP (note, 0);
24492 switch (REG_NOTE_KIND (note))
24493 {
24494 case REG_FRAME_RELATED_EXPR:
24495 goto found;
24496
24497 case REG_CFA_REGISTER:
24498 if (pat == NULL)
24499 {
24500 pat = PATTERN (insn);
24501 if (GET_CODE (pat) == PARALLEL)
24502 pat = XVECEXP (pat, 0, 0);
24503 }
24504
24505 /* Only emitted for IS_STACKALIGN re-alignment. */
24506 {
24507 rtx dest, src;
24508 unsigned reg;
24509
24510 src = SET_SRC (pat);
24511 dest = SET_DEST (pat);
24512
24513 gcc_assert (src == stack_pointer_rtx);
24514 reg = REGNO (dest);
24515 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
24516 reg + 0x90, reg);
24517 }
24518 handled_one = true;
24519 break;
24520
24521 case REG_CFA_DEF_CFA:
24522 case REG_CFA_EXPRESSION:
24523 case REG_CFA_ADJUST_CFA:
24524 case REG_CFA_OFFSET:
24525 /* ??? Only handling here what we actually emit. */
24526 gcc_unreachable ();
24527
24528 default:
24529 break;
24530 }
24531 }
24532 if (handled_one)
24533 return;
24534 pat = PATTERN (insn);
24535 found:
24536
24537 switch (GET_CODE (pat))
24538 {
24539 case SET:
24540 arm_unwind_emit_set (asm_out_file, pat);
24541 break;
24542
24543 case SEQUENCE:
24544 /* Store multiple. */
24545 arm_unwind_emit_sequence (asm_out_file, pat);
24546 break;
24547
24548 default:
24549 abort();
24550 }
24551 }
24552
24553
24554 /* Output a reference from a function exception table to the type_info
24555 object X. The EABI specifies that the symbol should be relocated by
24556 an R_ARM_TARGET2 relocation. */
24557
24558 static bool
24559 arm_output_ttype (rtx x)
24560 {
24561 fputs ("\t.word\t", asm_out_file);
24562 output_addr_const (asm_out_file, x);
24563 /* Use special relocations for symbol references. */
24564 if (GET_CODE (x) != CONST_INT)
24565 fputs ("(TARGET2)", asm_out_file);
24566 fputc ('\n', asm_out_file);
24567
24568 return TRUE;
24569 }
24570
24571 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
24572
24573 static void
24574 arm_asm_emit_except_personality (rtx personality)
24575 {
24576 fputs ("\t.personality\t", asm_out_file);
24577 output_addr_const (asm_out_file, personality);
24578 fputc ('\n', asm_out_file);
24579 }
24580
24581 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
24582
24583 static void
24584 arm_asm_init_sections (void)
24585 {
24586 exception_section = get_unnamed_section (0, output_section_asm_op,
24587 "\t.handlerdata");
24588 }
24589 #endif /* ARM_UNWIND_INFO */
24590
24591 /* Output unwind directives for the start/end of a function. */
24592
24593 void
24594 arm_output_fn_unwind (FILE * f, bool prologue)
24595 {
24596 if (arm_except_unwind_info (&global_options) != UI_TARGET)
24597 return;
24598
24599 if (prologue)
24600 fputs ("\t.fnstart\n", f);
24601 else
24602 {
24603 /* If this function will never be unwound, then mark it as such.
24604 The came condition is used in arm_unwind_emit to suppress
24605 the frame annotations. */
24606 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
24607 && (TREE_NOTHROW (current_function_decl)
24608 || crtl->all_throwers_are_sibcalls))
24609 fputs("\t.cantunwind\n", f);
24610
24611 fputs ("\t.fnend\n", f);
24612 }
24613 }
24614
24615 static bool
24616 arm_emit_tls_decoration (FILE *fp, rtx x)
24617 {
24618 enum tls_reloc reloc;
24619 rtx val;
24620
24621 val = XVECEXP (x, 0, 0);
24622 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
24623
24624 output_addr_const (fp, val);
24625
24626 switch (reloc)
24627 {
24628 case TLS_GD32:
24629 fputs ("(tlsgd)", fp);
24630 break;
24631 case TLS_LDM32:
24632 fputs ("(tlsldm)", fp);
24633 break;
24634 case TLS_LDO32:
24635 fputs ("(tlsldo)", fp);
24636 break;
24637 case TLS_IE32:
24638 fputs ("(gottpoff)", fp);
24639 break;
24640 case TLS_LE32:
24641 fputs ("(tpoff)", fp);
24642 break;
24643 case TLS_DESCSEQ:
24644 fputs ("(tlsdesc)", fp);
24645 break;
24646 default:
24647 gcc_unreachable ();
24648 }
24649
24650 switch (reloc)
24651 {
24652 case TLS_GD32:
24653 case TLS_LDM32:
24654 case TLS_IE32:
24655 case TLS_DESCSEQ:
24656 fputs (" + (. - ", fp);
24657 output_addr_const (fp, XVECEXP (x, 0, 2));
24658 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
24659 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
24660 output_addr_const (fp, XVECEXP (x, 0, 3));
24661 fputc (')', fp);
24662 break;
24663 default:
24664 break;
24665 }
24666
24667 return TRUE;
24668 }
24669
24670 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
24671
24672 static void
24673 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
24674 {
24675 gcc_assert (size == 4);
24676 fputs ("\t.word\t", file);
24677 output_addr_const (file, x);
24678 fputs ("(tlsldo)", file);
24679 }
24680
24681 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
24682
24683 static bool
24684 arm_output_addr_const_extra (FILE *fp, rtx x)
24685 {
24686 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
24687 return arm_emit_tls_decoration (fp, x);
24688 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
24689 {
24690 char label[256];
24691 int labelno = INTVAL (XVECEXP (x, 0, 0));
24692
24693 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
24694 assemble_name_raw (fp, label);
24695
24696 return TRUE;
24697 }
24698 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
24699 {
24700 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
24701 if (GOT_PCREL)
24702 fputs ("+.", fp);
24703 fputs ("-(", fp);
24704 output_addr_const (fp, XVECEXP (x, 0, 0));
24705 fputc (')', fp);
24706 return TRUE;
24707 }
24708 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
24709 {
24710 output_addr_const (fp, XVECEXP (x, 0, 0));
24711 if (GOT_PCREL)
24712 fputs ("+.", fp);
24713 fputs ("-(", fp);
24714 output_addr_const (fp, XVECEXP (x, 0, 1));
24715 fputc (')', fp);
24716 return TRUE;
24717 }
24718 else if (GET_CODE (x) == CONST_VECTOR)
24719 return arm_emit_vector_const (fp, x);
24720
24721 return FALSE;
24722 }
24723
24724 /* Output assembly for a shift instruction.
24725 SET_FLAGS determines how the instruction modifies the condition codes.
24726 0 - Do not set condition codes.
24727 1 - Set condition codes.
24728 2 - Use smallest instruction. */
24729 const char *
24730 arm_output_shift(rtx * operands, int set_flags)
24731 {
24732 char pattern[100];
24733 static const char flag_chars[3] = {'?', '.', '!'};
24734 const char *shift;
24735 HOST_WIDE_INT val;
24736 char c;
24737
24738 c = flag_chars[set_flags];
24739 if (TARGET_UNIFIED_ASM)
24740 {
24741 shift = shift_op(operands[3], &val);
24742 if (shift)
24743 {
24744 if (val != -1)
24745 operands[2] = GEN_INT(val);
24746 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
24747 }
24748 else
24749 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
24750 }
24751 else
24752 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
24753 output_asm_insn (pattern, operands);
24754 return "";
24755 }
24756
24757 /* Output assembly for a WMMX immediate shift instruction. */
24758 const char *
24759 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
24760 {
24761 int shift = INTVAL (operands[2]);
24762 char templ[50];
24763 enum machine_mode opmode = GET_MODE (operands[0]);
24764
24765 gcc_assert (shift >= 0);
24766
24767 /* If the shift value in the register versions is > 63 (for D qualifier),
24768 31 (for W qualifier) or 15 (for H qualifier). */
24769 if (((opmode == V4HImode) && (shift > 15))
24770 || ((opmode == V2SImode) && (shift > 31))
24771 || ((opmode == DImode) && (shift > 63)))
24772 {
24773 if (wror_or_wsra)
24774 {
24775 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
24776 output_asm_insn (templ, operands);
24777 if (opmode == DImode)
24778 {
24779 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
24780 output_asm_insn (templ, operands);
24781 }
24782 }
24783 else
24784 {
24785 /* The destination register will contain all zeros. */
24786 sprintf (templ, "wzero\t%%0");
24787 output_asm_insn (templ, operands);
24788 }
24789 return "";
24790 }
24791
24792 if ((opmode == DImode) && (shift > 32))
24793 {
24794 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
24795 output_asm_insn (templ, operands);
24796 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
24797 output_asm_insn (templ, operands);
24798 }
24799 else
24800 {
24801 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
24802 output_asm_insn (templ, operands);
24803 }
24804 return "";
24805 }
24806
24807 /* Output assembly for a WMMX tinsr instruction. */
24808 const char *
24809 arm_output_iwmmxt_tinsr (rtx *operands)
24810 {
24811 int mask = INTVAL (operands[3]);
24812 int i;
24813 char templ[50];
24814 int units = mode_nunits[GET_MODE (operands[0])];
24815 gcc_assert ((mask & (mask - 1)) == 0);
24816 for (i = 0; i < units; ++i)
24817 {
24818 if ((mask & 0x01) == 1)
24819 {
24820 break;
24821 }
24822 mask >>= 1;
24823 }
24824 gcc_assert (i < units);
24825 {
24826 switch (GET_MODE (operands[0]))
24827 {
24828 case V8QImode:
24829 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
24830 break;
24831 case V4HImode:
24832 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
24833 break;
24834 case V2SImode:
24835 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
24836 break;
24837 default:
24838 gcc_unreachable ();
24839 break;
24840 }
24841 output_asm_insn (templ, operands);
24842 }
24843 return "";
24844 }
24845
24846 /* Output a Thumb-1 casesi dispatch sequence. */
24847 const char *
24848 thumb1_output_casesi (rtx *operands)
24849 {
24850 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
24851
24852 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24853
24854 switch (GET_MODE(diff_vec))
24855 {
24856 case QImode:
24857 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24858 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
24859 case HImode:
24860 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24861 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
24862 case SImode:
24863 return "bl\t%___gnu_thumb1_case_si";
24864 default:
24865 gcc_unreachable ();
24866 }
24867 }
24868
24869 /* Output a Thumb-2 casesi instruction. */
24870 const char *
24871 thumb2_output_casesi (rtx *operands)
24872 {
24873 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
24874
24875 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24876
24877 output_asm_insn ("cmp\t%0, %1", operands);
24878 output_asm_insn ("bhi\t%l3", operands);
24879 switch (GET_MODE(diff_vec))
24880 {
24881 case QImode:
24882 return "tbb\t[%|pc, %0]";
24883 case HImode:
24884 return "tbh\t[%|pc, %0, lsl #1]";
24885 case SImode:
24886 if (flag_pic)
24887 {
24888 output_asm_insn ("adr\t%4, %l2", operands);
24889 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
24890 output_asm_insn ("add\t%4, %4, %5", operands);
24891 return "bx\t%4";
24892 }
24893 else
24894 {
24895 output_asm_insn ("adr\t%4, %l2", operands);
24896 return "ldr\t%|pc, [%4, %0, lsl #2]";
24897 }
24898 default:
24899 gcc_unreachable ();
24900 }
24901 }
24902
24903 /* Most ARM cores are single issue, but some newer ones can dual issue.
24904 The scheduler descriptions rely on this being correct. */
24905 static int
24906 arm_issue_rate (void)
24907 {
24908 switch (arm_tune)
24909 {
24910 case cortexa15:
24911 return 3;
24912
24913 case cortexr4:
24914 case cortexr4f:
24915 case cortexr5:
24916 case genericv7a:
24917 case cortexa5:
24918 case cortexa8:
24919 case cortexa9:
24920 case fa726te:
24921 return 2;
24922
24923 default:
24924 return 1;
24925 }
24926 }
24927
24928 /* A table and a function to perform ARM-specific name mangling for
24929 NEON vector types in order to conform to the AAPCS (see "Procedure
24930 Call Standard for the ARM Architecture", Appendix A). To qualify
24931 for emission with the mangled names defined in that document, a
24932 vector type must not only be of the correct mode but also be
24933 composed of NEON vector element types (e.g. __builtin_neon_qi). */
24934 typedef struct
24935 {
24936 enum machine_mode mode;
24937 const char *element_type_name;
24938 const char *aapcs_name;
24939 } arm_mangle_map_entry;
24940
24941 static arm_mangle_map_entry arm_mangle_map[] = {
24942 /* 64-bit containerized types. */
24943 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
24944 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
24945 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
24946 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
24947 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
24948 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
24949 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
24950 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
24951 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
24952 /* 128-bit containerized types. */
24953 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
24954 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
24955 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
24956 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
24957 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
24958 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
24959 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
24960 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
24961 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
24962 { VOIDmode, NULL, NULL }
24963 };
24964
24965 const char *
24966 arm_mangle_type (const_tree type)
24967 {
24968 arm_mangle_map_entry *pos = arm_mangle_map;
24969
24970 /* The ARM ABI documents (10th October 2008) say that "__va_list"
24971 has to be managled as if it is in the "std" namespace. */
24972 if (TARGET_AAPCS_BASED
24973 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
24974 {
24975 static bool warned;
24976 if (!warned && warn_psabi && !in_system_header)
24977 {
24978 warned = true;
24979 inform (input_location,
24980 "the mangling of %<va_list%> has changed in GCC 4.4");
24981 }
24982 return "St9__va_list";
24983 }
24984
24985 /* Half-precision float. */
24986 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
24987 return "Dh";
24988
24989 if (TREE_CODE (type) != VECTOR_TYPE)
24990 return NULL;
24991
24992 /* Check the mode of the vector type, and the name of the vector
24993 element type, against the table. */
24994 while (pos->mode != VOIDmode)
24995 {
24996 tree elt_type = TREE_TYPE (type);
24997
24998 if (pos->mode == TYPE_MODE (type)
24999 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
25000 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
25001 pos->element_type_name))
25002 return pos->aapcs_name;
25003
25004 pos++;
25005 }
25006
25007 /* Use the default mangling for unrecognized (possibly user-defined)
25008 vector types. */
25009 return NULL;
25010 }
25011
25012 /* Order of allocation of core registers for Thumb: this allocation is
25013 written over the corresponding initial entries of the array
25014 initialized with REG_ALLOC_ORDER. We allocate all low registers
25015 first. Saving and restoring a low register is usually cheaper than
25016 using a call-clobbered high register. */
25017
25018 static const int thumb_core_reg_alloc_order[] =
25019 {
25020 3, 2, 1, 0, 4, 5, 6, 7,
25021 14, 12, 8, 9, 10, 11
25022 };
25023
25024 /* Adjust register allocation order when compiling for Thumb. */
25025
25026 void
25027 arm_order_regs_for_local_alloc (void)
25028 {
25029 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
25030 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
25031 if (TARGET_THUMB)
25032 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
25033 sizeof (thumb_core_reg_alloc_order));
25034 }
25035
25036 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
25037
25038 bool
25039 arm_frame_pointer_required (void)
25040 {
25041 return (cfun->has_nonlocal_label
25042 || SUBTARGET_FRAME_POINTER_REQUIRED
25043 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
25044 }
25045
25046 /* Only thumb1 can't support conditional execution, so return true if
25047 the target is not thumb1. */
25048 static bool
25049 arm_have_conditional_execution (void)
25050 {
25051 return !TARGET_THUMB1;
25052 }
25053
25054 static unsigned int
25055 arm_autovectorize_vector_sizes (void)
25056 {
25057 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
25058 }
25059
25060 static bool
25061 arm_vector_alignment_reachable (const_tree type, bool is_packed)
25062 {
25063 /* Vectors which aren't in packed structures will not be less aligned than
25064 the natural alignment of their element type, so this is safe. */
25065 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
25066 return !is_packed;
25067
25068 return default_builtin_vector_alignment_reachable (type, is_packed);
25069 }
25070
25071 static bool
25072 arm_builtin_support_vector_misalignment (enum machine_mode mode,
25073 const_tree type, int misalignment,
25074 bool is_packed)
25075 {
25076 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
25077 {
25078 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
25079
25080 if (is_packed)
25081 return align == 1;
25082
25083 /* If the misalignment is unknown, we should be able to handle the access
25084 so long as it is not to a member of a packed data structure. */
25085 if (misalignment == -1)
25086 return true;
25087
25088 /* Return true if the misalignment is a multiple of the natural alignment
25089 of the vector's element type. This is probably always going to be
25090 true in practice, since we've already established that this isn't a
25091 packed access. */
25092 return ((misalignment % align) == 0);
25093 }
25094
25095 return default_builtin_support_vector_misalignment (mode, type, misalignment,
25096 is_packed);
25097 }
25098
25099 static void
25100 arm_conditional_register_usage (void)
25101 {
25102 int regno;
25103
25104 if (TARGET_THUMB1 && optimize_size)
25105 {
25106 /* When optimizing for size on Thumb-1, it's better not
25107 to use the HI regs, because of the overhead of
25108 stacking them. */
25109 for (regno = FIRST_HI_REGNUM;
25110 regno <= LAST_HI_REGNUM; ++regno)
25111 fixed_regs[regno] = call_used_regs[regno] = 1;
25112 }
25113
25114 /* The link register can be clobbered by any branch insn,
25115 but we have no way to track that at present, so mark
25116 it as unavailable. */
25117 if (TARGET_THUMB1)
25118 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
25119
25120 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
25121 {
25122 /* VFPv3 registers are disabled when earlier VFP
25123 versions are selected due to the definition of
25124 LAST_VFP_REGNUM. */
25125 for (regno = FIRST_VFP_REGNUM;
25126 regno <= LAST_VFP_REGNUM; ++ regno)
25127 {
25128 fixed_regs[regno] = 0;
25129 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
25130 || regno >= FIRST_VFP_REGNUM + 32;
25131 }
25132 }
25133
25134 if (TARGET_REALLY_IWMMXT)
25135 {
25136 regno = FIRST_IWMMXT_GR_REGNUM;
25137 /* The 2002/10/09 revision of the XScale ABI has wCG0
25138 and wCG1 as call-preserved registers. The 2002/11/21
25139 revision changed this so that all wCG registers are
25140 scratch registers. */
25141 for (regno = FIRST_IWMMXT_GR_REGNUM;
25142 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
25143 fixed_regs[regno] = 0;
25144 /* The XScale ABI has wR0 - wR9 as scratch registers,
25145 the rest as call-preserved registers. */
25146 for (regno = FIRST_IWMMXT_REGNUM;
25147 regno <= LAST_IWMMXT_REGNUM; ++ regno)
25148 {
25149 fixed_regs[regno] = 0;
25150 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
25151 }
25152 }
25153
25154 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
25155 {
25156 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
25157 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
25158 }
25159 else if (TARGET_APCS_STACK)
25160 {
25161 fixed_regs[10] = 1;
25162 call_used_regs[10] = 1;
25163 }
25164 /* -mcaller-super-interworking reserves r11 for calls to
25165 _interwork_r11_call_via_rN(). Making the register global
25166 is an easy way of ensuring that it remains valid for all
25167 calls. */
25168 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
25169 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
25170 {
25171 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
25172 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
25173 if (TARGET_CALLER_INTERWORKING)
25174 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
25175 }
25176 SUBTARGET_CONDITIONAL_REGISTER_USAGE
25177 }
25178
25179 static reg_class_t
25180 arm_preferred_rename_class (reg_class_t rclass)
25181 {
25182 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
25183 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
25184 and code size can be reduced. */
25185 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
25186 return LO_REGS;
25187 else
25188 return NO_REGS;
25189 }
25190
25191 /* Compute the atrribute "length" of insn "*push_multi".
25192 So this function MUST be kept in sync with that insn pattern. */
25193 int
25194 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
25195 {
25196 int i, regno, hi_reg;
25197 int num_saves = XVECLEN (parallel_op, 0);
25198
25199 /* ARM mode. */
25200 if (TARGET_ARM)
25201 return 4;
25202 /* Thumb1 mode. */
25203 if (TARGET_THUMB1)
25204 return 2;
25205
25206 /* Thumb2 mode. */
25207 regno = REGNO (first_op);
25208 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
25209 for (i = 1; i < num_saves && !hi_reg; i++)
25210 {
25211 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
25212 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
25213 }
25214
25215 if (!hi_reg)
25216 return 2;
25217 return 4;
25218 }
25219
25220 /* Compute the number of instructions emitted by output_move_double. */
25221 int
25222 arm_count_output_move_double_insns (rtx *operands)
25223 {
25224 int count;
25225 rtx ops[2];
25226 /* output_move_double may modify the operands array, so call it
25227 here on a copy of the array. */
25228 ops[0] = operands[0];
25229 ops[1] = operands[1];
25230 output_move_double (ops, false, &count);
25231 return count;
25232 }
25233
25234 int
25235 vfp3_const_double_for_fract_bits (rtx operand)
25236 {
25237 REAL_VALUE_TYPE r0;
25238
25239 if (GET_CODE (operand) != CONST_DOUBLE)
25240 return 0;
25241
25242 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
25243 if (exact_real_inverse (DFmode, &r0))
25244 {
25245 if (exact_real_truncate (DFmode, &r0))
25246 {
25247 HOST_WIDE_INT value = real_to_integer (&r0);
25248 value = value & 0xffffffff;
25249 if ((value != 0) && ( (value & (value - 1)) == 0))
25250 return int_log2 (value);
25251 }
25252 }
25253 return 0;
25254 }
25255 \f
25256 /* Emit a memory barrier around an atomic sequence according to MODEL. */
25257
25258 static void
25259 arm_pre_atomic_barrier (enum memmodel model)
25260 {
25261 if (need_atomic_barrier_p (model, true))
25262 emit_insn (gen_memory_barrier ());
25263 }
25264
25265 static void
25266 arm_post_atomic_barrier (enum memmodel model)
25267 {
25268 if (need_atomic_barrier_p (model, false))
25269 emit_insn (gen_memory_barrier ());
25270 }
25271
25272 /* Emit the load-exclusive and store-exclusive instructions. */
25273
25274 static void
25275 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem)
25276 {
25277 rtx (*gen) (rtx, rtx);
25278
25279 switch (mode)
25280 {
25281 case QImode: gen = gen_arm_load_exclusiveqi; break;
25282 case HImode: gen = gen_arm_load_exclusivehi; break;
25283 case SImode: gen = gen_arm_load_exclusivesi; break;
25284 case DImode: gen = gen_arm_load_exclusivedi; break;
25285 default:
25286 gcc_unreachable ();
25287 }
25288
25289 emit_insn (gen (rval, mem));
25290 }
25291
25292 static void
25293 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval, rtx mem)
25294 {
25295 rtx (*gen) (rtx, rtx, rtx);
25296
25297 switch (mode)
25298 {
25299 case QImode: gen = gen_arm_store_exclusiveqi; break;
25300 case HImode: gen = gen_arm_store_exclusivehi; break;
25301 case SImode: gen = gen_arm_store_exclusivesi; break;
25302 case DImode: gen = gen_arm_store_exclusivedi; break;
25303 default:
25304 gcc_unreachable ();
25305 }
25306
25307 emit_insn (gen (bval, rval, mem));
25308 }
25309
25310 /* Mark the previous jump instruction as unlikely. */
25311
25312 static void
25313 emit_unlikely_jump (rtx insn)
25314 {
25315 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
25316
25317 insn = emit_jump_insn (insn);
25318 add_reg_note (insn, REG_BR_PROB, very_unlikely);
25319 }
25320
25321 /* Expand a compare and swap pattern. */
25322
25323 void
25324 arm_expand_compare_and_swap (rtx operands[])
25325 {
25326 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
25327 enum machine_mode mode;
25328 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
25329
25330 bval = operands[0];
25331 rval = operands[1];
25332 mem = operands[2];
25333 oldval = operands[3];
25334 newval = operands[4];
25335 is_weak = operands[5];
25336 mod_s = operands[6];
25337 mod_f = operands[7];
25338 mode = GET_MODE (mem);
25339
25340 switch (mode)
25341 {
25342 case QImode:
25343 case HImode:
25344 /* For narrow modes, we're going to perform the comparison in SImode,
25345 so do the zero-extension now. */
25346 rval = gen_reg_rtx (SImode);
25347 oldval = convert_modes (SImode, mode, oldval, true);
25348 /* FALLTHRU */
25349
25350 case SImode:
25351 /* Force the value into a register if needed. We waited until after
25352 the zero-extension above to do this properly. */
25353 if (!arm_add_operand (oldval, mode))
25354 oldval = force_reg (mode, oldval);
25355 break;
25356
25357 case DImode:
25358 if (!cmpdi_operand (oldval, mode))
25359 oldval = force_reg (mode, oldval);
25360 break;
25361
25362 default:
25363 gcc_unreachable ();
25364 }
25365
25366 switch (mode)
25367 {
25368 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
25369 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
25370 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
25371 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
25372 default:
25373 gcc_unreachable ();
25374 }
25375
25376 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
25377
25378 if (mode == QImode || mode == HImode)
25379 emit_move_insn (operands[1], gen_lowpart (mode, rval));
25380
25381 /* In all cases, we arrange for success to be signaled by Z set.
25382 This arrangement allows for the boolean result to be used directly
25383 in a subsequent branch, post optimization. */
25384 x = gen_rtx_REG (CCmode, CC_REGNUM);
25385 x = gen_rtx_EQ (SImode, x, const0_rtx);
25386 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
25387 }
25388
25389 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
25390 another memory store between the load-exclusive and store-exclusive can
25391 reset the monitor from Exclusive to Open state. This means we must wait
25392 until after reload to split the pattern, lest we get a register spill in
25393 the middle of the atomic sequence. */
25394
25395 void
25396 arm_split_compare_and_swap (rtx operands[])
25397 {
25398 rtx rval, mem, oldval, newval, scratch;
25399 enum machine_mode mode;
25400 enum memmodel mod_s, mod_f;
25401 bool is_weak;
25402 rtx label1, label2, x, cond;
25403
25404 rval = operands[0];
25405 mem = operands[1];
25406 oldval = operands[2];
25407 newval = operands[3];
25408 is_weak = (operands[4] != const0_rtx);
25409 mod_s = (enum memmodel) INTVAL (operands[5]);
25410 mod_f = (enum memmodel) INTVAL (operands[6]);
25411 scratch = operands[7];
25412 mode = GET_MODE (mem);
25413
25414 arm_pre_atomic_barrier (mod_s);
25415
25416 label1 = NULL_RTX;
25417 if (!is_weak)
25418 {
25419 label1 = gen_label_rtx ();
25420 emit_label (label1);
25421 }
25422 label2 = gen_label_rtx ();
25423
25424 arm_emit_load_exclusive (mode, rval, mem);
25425
25426 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
25427 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25428 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
25429 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
25430 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
25431
25432 arm_emit_store_exclusive (mode, scratch, mem, newval);
25433
25434 /* Weak or strong, we want EQ to be true for success, so that we
25435 match the flags that we got from the compare above. */
25436 cond = gen_rtx_REG (CCmode, CC_REGNUM);
25437 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
25438 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
25439
25440 if (!is_weak)
25441 {
25442 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25443 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
25444 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
25445 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
25446 }
25447
25448 if (mod_f != MEMMODEL_RELAXED)
25449 emit_label (label2);
25450
25451 arm_post_atomic_barrier (mod_s);
25452
25453 if (mod_f == MEMMODEL_RELAXED)
25454 emit_label (label2);
25455 }
25456
25457 void
25458 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
25459 rtx value, rtx model_rtx, rtx cond)
25460 {
25461 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
25462 enum machine_mode mode = GET_MODE (mem);
25463 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
25464 rtx label, x;
25465
25466 arm_pre_atomic_barrier (model);
25467
25468 label = gen_label_rtx ();
25469 emit_label (label);
25470
25471 if (new_out)
25472 new_out = gen_lowpart (wmode, new_out);
25473 if (old_out)
25474 old_out = gen_lowpart (wmode, old_out);
25475 else
25476 old_out = new_out;
25477 value = simplify_gen_subreg (wmode, value, mode, 0);
25478
25479 arm_emit_load_exclusive (mode, old_out, mem);
25480
25481 switch (code)
25482 {
25483 case SET:
25484 new_out = value;
25485 break;
25486
25487 case NOT:
25488 x = gen_rtx_AND (wmode, old_out, value);
25489 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25490 x = gen_rtx_NOT (wmode, new_out);
25491 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25492 break;
25493
25494 case MINUS:
25495 if (CONST_INT_P (value))
25496 {
25497 value = GEN_INT (-INTVAL (value));
25498 code = PLUS;
25499 }
25500 /* FALLTHRU */
25501
25502 case PLUS:
25503 if (mode == DImode)
25504 {
25505 /* DImode plus/minus need to clobber flags. */
25506 /* The adddi3 and subdi3 patterns are incorrectly written so that
25507 they require matching operands, even when we could easily support
25508 three operands. Thankfully, this can be fixed up post-splitting,
25509 as the individual add+adc patterns do accept three operands and
25510 post-reload cprop can make these moves go away. */
25511 emit_move_insn (new_out, old_out);
25512 if (code == PLUS)
25513 x = gen_adddi3 (new_out, new_out, value);
25514 else
25515 x = gen_subdi3 (new_out, new_out, value);
25516 emit_insn (x);
25517 break;
25518 }
25519 /* FALLTHRU */
25520
25521 default:
25522 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
25523 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25524 break;
25525 }
25526
25527 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out));
25528
25529 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25530 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
25531
25532 arm_post_atomic_barrier (model);
25533 }
25534 \f
25535 #define MAX_VECT_LEN 16
25536
25537 struct expand_vec_perm_d
25538 {
25539 rtx target, op0, op1;
25540 unsigned char perm[MAX_VECT_LEN];
25541 enum machine_mode vmode;
25542 unsigned char nelt;
25543 bool one_vector_p;
25544 bool testing_p;
25545 };
25546
25547 /* Generate a variable permutation. */
25548
25549 static void
25550 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
25551 {
25552 enum machine_mode vmode = GET_MODE (target);
25553 bool one_vector_p = rtx_equal_p (op0, op1);
25554
25555 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
25556 gcc_checking_assert (GET_MODE (op0) == vmode);
25557 gcc_checking_assert (GET_MODE (op1) == vmode);
25558 gcc_checking_assert (GET_MODE (sel) == vmode);
25559 gcc_checking_assert (TARGET_NEON);
25560
25561 if (one_vector_p)
25562 {
25563 if (vmode == V8QImode)
25564 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
25565 else
25566 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
25567 }
25568 else
25569 {
25570 rtx pair;
25571
25572 if (vmode == V8QImode)
25573 {
25574 pair = gen_reg_rtx (V16QImode);
25575 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
25576 pair = gen_lowpart (TImode, pair);
25577 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
25578 }
25579 else
25580 {
25581 pair = gen_reg_rtx (OImode);
25582 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
25583 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
25584 }
25585 }
25586 }
25587
25588 void
25589 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
25590 {
25591 enum machine_mode vmode = GET_MODE (target);
25592 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
25593 bool one_vector_p = rtx_equal_p (op0, op1);
25594 rtx rmask[MAX_VECT_LEN], mask;
25595
25596 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
25597 numbering of elements for big-endian, we must reverse the order. */
25598 gcc_checking_assert (!BYTES_BIG_ENDIAN);
25599
25600 /* The VTBL instruction does not use a modulo index, so we must take care
25601 of that ourselves. */
25602 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
25603 for (i = 0; i < nelt; ++i)
25604 rmask[i] = mask;
25605 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
25606 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
25607
25608 arm_expand_vec_perm_1 (target, op0, op1, sel);
25609 }
25610
25611 /* Generate or test for an insn that supports a constant permutation. */
25612
25613 /* Recognize patterns for the VUZP insns. */
25614
25615 static bool
25616 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
25617 {
25618 unsigned int i, odd, mask, nelt = d->nelt;
25619 rtx out0, out1, in0, in1, x;
25620 rtx (*gen)(rtx, rtx, rtx, rtx);
25621
25622 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25623 return false;
25624
25625 /* Note that these are little-endian tests. Adjust for big-endian later. */
25626 if (d->perm[0] == 0)
25627 odd = 0;
25628 else if (d->perm[0] == 1)
25629 odd = 1;
25630 else
25631 return false;
25632 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25633
25634 for (i = 0; i < nelt; i++)
25635 {
25636 unsigned elt = (i * 2 + odd) & mask;
25637 if (d->perm[i] != elt)
25638 return false;
25639 }
25640
25641 /* Success! */
25642 if (d->testing_p)
25643 return true;
25644
25645 switch (d->vmode)
25646 {
25647 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
25648 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
25649 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
25650 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
25651 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
25652 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
25653 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
25654 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
25655 default:
25656 gcc_unreachable ();
25657 }
25658
25659 in0 = d->op0;
25660 in1 = d->op1;
25661 if (BYTES_BIG_ENDIAN)
25662 {
25663 x = in0, in0 = in1, in1 = x;
25664 odd = !odd;
25665 }
25666
25667 out0 = d->target;
25668 out1 = gen_reg_rtx (d->vmode);
25669 if (odd)
25670 x = out0, out0 = out1, out1 = x;
25671
25672 emit_insn (gen (out0, in0, in1, out1));
25673 return true;
25674 }
25675
25676 /* Recognize patterns for the VZIP insns. */
25677
25678 static bool
25679 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
25680 {
25681 unsigned int i, high, mask, nelt = d->nelt;
25682 rtx out0, out1, in0, in1, x;
25683 rtx (*gen)(rtx, rtx, rtx, rtx);
25684
25685 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25686 return false;
25687
25688 /* Note that these are little-endian tests. Adjust for big-endian later. */
25689 high = nelt / 2;
25690 if (d->perm[0] == high)
25691 ;
25692 else if (d->perm[0] == 0)
25693 high = 0;
25694 else
25695 return false;
25696 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25697
25698 for (i = 0; i < nelt / 2; i++)
25699 {
25700 unsigned elt = (i + high) & mask;
25701 if (d->perm[i * 2] != elt)
25702 return false;
25703 elt = (elt + nelt) & mask;
25704 if (d->perm[i * 2 + 1] != elt)
25705 return false;
25706 }
25707
25708 /* Success! */
25709 if (d->testing_p)
25710 return true;
25711
25712 switch (d->vmode)
25713 {
25714 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
25715 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
25716 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
25717 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
25718 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
25719 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
25720 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
25721 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
25722 default:
25723 gcc_unreachable ();
25724 }
25725
25726 in0 = d->op0;
25727 in1 = d->op1;
25728 if (BYTES_BIG_ENDIAN)
25729 {
25730 x = in0, in0 = in1, in1 = x;
25731 high = !high;
25732 }
25733
25734 out0 = d->target;
25735 out1 = gen_reg_rtx (d->vmode);
25736 if (high)
25737 x = out0, out0 = out1, out1 = x;
25738
25739 emit_insn (gen (out0, in0, in1, out1));
25740 return true;
25741 }
25742
25743 /* Recognize patterns for the VREV insns. */
25744
25745 static bool
25746 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
25747 {
25748 unsigned int i, j, diff, nelt = d->nelt;
25749 rtx (*gen)(rtx, rtx, rtx);
25750
25751 if (!d->one_vector_p)
25752 return false;
25753
25754 diff = d->perm[0];
25755 switch (diff)
25756 {
25757 case 7:
25758 switch (d->vmode)
25759 {
25760 case V16QImode: gen = gen_neon_vrev64v16qi; break;
25761 case V8QImode: gen = gen_neon_vrev64v8qi; break;
25762 default:
25763 return false;
25764 }
25765 break;
25766 case 3:
25767 switch (d->vmode)
25768 {
25769 case V16QImode: gen = gen_neon_vrev32v16qi; break;
25770 case V8QImode: gen = gen_neon_vrev32v8qi; break;
25771 case V8HImode: gen = gen_neon_vrev64v8hi; break;
25772 case V4HImode: gen = gen_neon_vrev64v4hi; break;
25773 default:
25774 return false;
25775 }
25776 break;
25777 case 1:
25778 switch (d->vmode)
25779 {
25780 case V16QImode: gen = gen_neon_vrev16v16qi; break;
25781 case V8QImode: gen = gen_neon_vrev16v8qi; break;
25782 case V8HImode: gen = gen_neon_vrev32v8hi; break;
25783 case V4HImode: gen = gen_neon_vrev32v4hi; break;
25784 case V4SImode: gen = gen_neon_vrev64v4si; break;
25785 case V2SImode: gen = gen_neon_vrev64v2si; break;
25786 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
25787 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
25788 default:
25789 return false;
25790 }
25791 break;
25792 default:
25793 return false;
25794 }
25795
25796 for (i = 0; i < nelt ; i += diff + 1)
25797 for (j = 0; j <= diff; j += 1)
25798 {
25799 /* This is guaranteed to be true as the value of diff
25800 is 7, 3, 1 and we should have enough elements in the
25801 queue to generate this. Getting a vector mask with a
25802 value of diff other than these values implies that
25803 something is wrong by the time we get here. */
25804 gcc_assert (i + j < nelt);
25805 if (d->perm[i + j] != i + diff - j)
25806 return false;
25807 }
25808
25809 /* Success! */
25810 if (d->testing_p)
25811 return true;
25812
25813 /* ??? The third operand is an artifact of the builtin infrastructure
25814 and is ignored by the actual instruction. */
25815 emit_insn (gen (d->target, d->op0, const0_rtx));
25816 return true;
25817 }
25818
25819 /* Recognize patterns for the VTRN insns. */
25820
25821 static bool
25822 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
25823 {
25824 unsigned int i, odd, mask, nelt = d->nelt;
25825 rtx out0, out1, in0, in1, x;
25826 rtx (*gen)(rtx, rtx, rtx, rtx);
25827
25828 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25829 return false;
25830
25831 /* Note that these are little-endian tests. Adjust for big-endian later. */
25832 if (d->perm[0] == 0)
25833 odd = 0;
25834 else if (d->perm[0] == 1)
25835 odd = 1;
25836 else
25837 return false;
25838 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25839
25840 for (i = 0; i < nelt; i += 2)
25841 {
25842 if (d->perm[i] != i + odd)
25843 return false;
25844 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
25845 return false;
25846 }
25847
25848 /* Success! */
25849 if (d->testing_p)
25850 return true;
25851
25852 switch (d->vmode)
25853 {
25854 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
25855 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
25856 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
25857 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
25858 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
25859 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
25860 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
25861 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
25862 default:
25863 gcc_unreachable ();
25864 }
25865
25866 in0 = d->op0;
25867 in1 = d->op1;
25868 if (BYTES_BIG_ENDIAN)
25869 {
25870 x = in0, in0 = in1, in1 = x;
25871 odd = !odd;
25872 }
25873
25874 out0 = d->target;
25875 out1 = gen_reg_rtx (d->vmode);
25876 if (odd)
25877 x = out0, out0 = out1, out1 = x;
25878
25879 emit_insn (gen (out0, in0, in1, out1));
25880 return true;
25881 }
25882
25883 /* The NEON VTBL instruction is a fully variable permuation that's even
25884 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
25885 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
25886 can do slightly better by expanding this as a constant where we don't
25887 have to apply a mask. */
25888
25889 static bool
25890 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
25891 {
25892 rtx rperm[MAX_VECT_LEN], sel;
25893 enum machine_mode vmode = d->vmode;
25894 unsigned int i, nelt = d->nelt;
25895
25896 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
25897 numbering of elements for big-endian, we must reverse the order. */
25898 if (BYTES_BIG_ENDIAN)
25899 return false;
25900
25901 if (d->testing_p)
25902 return true;
25903
25904 /* Generic code will try constant permutation twice. Once with the
25905 original mode and again with the elements lowered to QImode.
25906 So wait and don't do the selector expansion ourselves. */
25907 if (vmode != V8QImode && vmode != V16QImode)
25908 return false;
25909
25910 for (i = 0; i < nelt; ++i)
25911 rperm[i] = GEN_INT (d->perm[i]);
25912 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
25913 sel = force_reg (vmode, sel);
25914
25915 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
25916 return true;
25917 }
25918
25919 static bool
25920 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
25921 {
25922 /* The pattern matching functions above are written to look for a small
25923 number to begin the sequence (0, 1, N/2). If we begin with an index
25924 from the second operand, we can swap the operands. */
25925 if (d->perm[0] >= d->nelt)
25926 {
25927 unsigned i, nelt = d->nelt;
25928 rtx x;
25929
25930 for (i = 0; i < nelt; ++i)
25931 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
25932
25933 x = d->op0;
25934 d->op0 = d->op1;
25935 d->op1 = x;
25936 }
25937
25938 if (TARGET_NEON)
25939 {
25940 if (arm_evpc_neon_vuzp (d))
25941 return true;
25942 if (arm_evpc_neon_vzip (d))
25943 return true;
25944 if (arm_evpc_neon_vrev (d))
25945 return true;
25946 if (arm_evpc_neon_vtrn (d))
25947 return true;
25948 return arm_evpc_neon_vtbl (d);
25949 }
25950 return false;
25951 }
25952
25953 /* Expand a vec_perm_const pattern. */
25954
25955 bool
25956 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
25957 {
25958 struct expand_vec_perm_d d;
25959 int i, nelt, which;
25960
25961 d.target = target;
25962 d.op0 = op0;
25963 d.op1 = op1;
25964
25965 d.vmode = GET_MODE (target);
25966 gcc_assert (VECTOR_MODE_P (d.vmode));
25967 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
25968 d.testing_p = false;
25969
25970 for (i = which = 0; i < nelt; ++i)
25971 {
25972 rtx e = XVECEXP (sel, 0, i);
25973 int ei = INTVAL (e) & (2 * nelt - 1);
25974 which |= (ei < nelt ? 1 : 2);
25975 d.perm[i] = ei;
25976 }
25977
25978 switch (which)
25979 {
25980 default:
25981 gcc_unreachable();
25982
25983 case 3:
25984 d.one_vector_p = false;
25985 if (!rtx_equal_p (op0, op1))
25986 break;
25987
25988 /* The elements of PERM do not suggest that only the first operand
25989 is used, but both operands are identical. Allow easier matching
25990 of the permutation by folding the permutation into the single
25991 input vector. */
25992 /* FALLTHRU */
25993 case 2:
25994 for (i = 0; i < nelt; ++i)
25995 d.perm[i] &= nelt - 1;
25996 d.op0 = op1;
25997 d.one_vector_p = true;
25998 break;
25999
26000 case 1:
26001 d.op1 = op0;
26002 d.one_vector_p = true;
26003 break;
26004 }
26005
26006 return arm_expand_vec_perm_const_1 (&d);
26007 }
26008
26009 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
26010
26011 static bool
26012 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
26013 const unsigned char *sel)
26014 {
26015 struct expand_vec_perm_d d;
26016 unsigned int i, nelt, which;
26017 bool ret;
26018
26019 d.vmode = vmode;
26020 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
26021 d.testing_p = true;
26022 memcpy (d.perm, sel, nelt);
26023
26024 /* Categorize the set of elements in the selector. */
26025 for (i = which = 0; i < nelt; ++i)
26026 {
26027 unsigned char e = d.perm[i];
26028 gcc_assert (e < 2 * nelt);
26029 which |= (e < nelt ? 1 : 2);
26030 }
26031
26032 /* For all elements from second vector, fold the elements to first. */
26033 if (which == 2)
26034 for (i = 0; i < nelt; ++i)
26035 d.perm[i] -= nelt;
26036
26037 /* Check whether the mask can be applied to the vector type. */
26038 d.one_vector_p = (which != 3);
26039
26040 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
26041 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
26042 if (!d.one_vector_p)
26043 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
26044
26045 start_sequence ();
26046 ret = arm_expand_vec_perm_const_1 (&d);
26047 end_sequence ();
26048
26049 return ret;
26050 }
26051
26052 bool
26053 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
26054 {
26055 /* If we are soft float and we do not have ldrd
26056 then all auto increment forms are ok. */
26057 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
26058 return true;
26059
26060 switch (code)
26061 {
26062 /* Post increment and Pre Decrement are supported for all
26063 instruction forms except for vector forms. */
26064 case ARM_POST_INC:
26065 case ARM_PRE_DEC:
26066 if (VECTOR_MODE_P (mode))
26067 {
26068 if (code != ARM_PRE_DEC)
26069 return true;
26070 else
26071 return false;
26072 }
26073
26074 return true;
26075
26076 case ARM_POST_DEC:
26077 case ARM_PRE_INC:
26078 /* Without LDRD and mode size greater than
26079 word size, there is no point in auto-incrementing
26080 because ldm and stm will not have these forms. */
26081 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
26082 return false;
26083
26084 /* Vector and floating point modes do not support
26085 these auto increment forms. */
26086 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
26087 return false;
26088
26089 return true;
26090
26091 default:
26092 return false;
26093
26094 }
26095
26096 return false;
26097 }
26098
26099 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
26100 on ARM, since we know that shifts by negative amounts are no-ops.
26101 Additionally, the default expansion code is not available or suitable
26102 for post-reload insn splits (this can occur when the register allocator
26103 chooses not to do a shift in NEON).
26104
26105 This function is used in both initial expand and post-reload splits, and
26106 handles all kinds of 64-bit shifts.
26107
26108 Input requirements:
26109 - It is safe for the input and output to be the same register, but
26110 early-clobber rules apply for the shift amount and scratch registers.
26111 - Shift by register requires both scratch registers. Shift by a constant
26112 less than 32 in Thumb2 mode requires SCRATCH1 only. In all other cases
26113 the scratch registers may be NULL.
26114 - Ashiftrt by a register also clobbers the CC register. */
26115 void
26116 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
26117 rtx amount, rtx scratch1, rtx scratch2)
26118 {
26119 rtx out_high = gen_highpart (SImode, out);
26120 rtx out_low = gen_lowpart (SImode, out);
26121 rtx in_high = gen_highpart (SImode, in);
26122 rtx in_low = gen_lowpart (SImode, in);
26123
26124 /* Terminology:
26125 in = the register pair containing the input value.
26126 out = the destination register pair.
26127 up = the high- or low-part of each pair.
26128 down = the opposite part to "up".
26129 In a shift, we can consider bits to shift from "up"-stream to
26130 "down"-stream, so in a left-shift "up" is the low-part and "down"
26131 is the high-part of each register pair. */
26132
26133 rtx out_up = code == ASHIFT ? out_low : out_high;
26134 rtx out_down = code == ASHIFT ? out_high : out_low;
26135 rtx in_up = code == ASHIFT ? in_low : in_high;
26136 rtx in_down = code == ASHIFT ? in_high : in_low;
26137
26138 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
26139 gcc_assert (out
26140 && (REG_P (out) || GET_CODE (out) == SUBREG)
26141 && GET_MODE (out) == DImode);
26142 gcc_assert (in
26143 && (REG_P (in) || GET_CODE (in) == SUBREG)
26144 && GET_MODE (in) == DImode);
26145 gcc_assert (amount
26146 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
26147 && GET_MODE (amount) == SImode)
26148 || CONST_INT_P (amount)));
26149 gcc_assert (scratch1 == NULL
26150 || (GET_CODE (scratch1) == SCRATCH)
26151 || (GET_MODE (scratch1) == SImode
26152 && REG_P (scratch1)));
26153 gcc_assert (scratch2 == NULL
26154 || (GET_CODE (scratch2) == SCRATCH)
26155 || (GET_MODE (scratch2) == SImode
26156 && REG_P (scratch2)));
26157 gcc_assert (!REG_P (out) || !REG_P (amount)
26158 || !HARD_REGISTER_P (out)
26159 || (REGNO (out) != REGNO (amount)
26160 && REGNO (out) + 1 != REGNO (amount)));
26161
26162 /* Macros to make following code more readable. */
26163 #define SUB_32(DEST,SRC) \
26164 gen_addsi3 ((DEST), (SRC), gen_rtx_CONST_INT (VOIDmode, -32))
26165 #define RSB_32(DEST,SRC) \
26166 gen_subsi3 ((DEST), gen_rtx_CONST_INT (VOIDmode, 32), (SRC))
26167 #define SUB_S_32(DEST,SRC) \
26168 gen_addsi3_compare0 ((DEST), (SRC), \
26169 gen_rtx_CONST_INT (VOIDmode, -32))
26170 #define SET(DEST,SRC) \
26171 gen_rtx_SET (SImode, (DEST), (SRC))
26172 #define SHIFT(CODE,SRC,AMOUNT) \
26173 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
26174 #define LSHIFT(CODE,SRC,AMOUNT) \
26175 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
26176 SImode, (SRC), (AMOUNT))
26177 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
26178 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
26179 SImode, (SRC), (AMOUNT))
26180 #define ORR(A,B) \
26181 gen_rtx_IOR (SImode, (A), (B))
26182 #define BRANCH(COND,LABEL) \
26183 gen_arm_cond_branch ((LABEL), \
26184 gen_rtx_ ## COND (CCmode, cc_reg, \
26185 const0_rtx), \
26186 cc_reg)
26187
26188 /* Shifts by register and shifts by constant are handled separately. */
26189 if (CONST_INT_P (amount))
26190 {
26191 /* We have a shift-by-constant. */
26192
26193 /* First, handle out-of-range shift amounts.
26194 In both cases we try to match the result an ARM instruction in a
26195 shift-by-register would give. This helps reduce execution
26196 differences between optimization levels, but it won't stop other
26197 parts of the compiler doing different things. This is "undefined
26198 behaviour, in any case. */
26199 if (INTVAL (amount) <= 0)
26200 emit_insn (gen_movdi (out, in));
26201 else if (INTVAL (amount) >= 64)
26202 {
26203 if (code == ASHIFTRT)
26204 {
26205 rtx const31_rtx = gen_rtx_CONST_INT (VOIDmode, 31);
26206 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
26207 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
26208 }
26209 else
26210 emit_insn (gen_movdi (out, const0_rtx));
26211 }
26212
26213 /* Now handle valid shifts. */
26214 else if (INTVAL (amount) < 32)
26215 {
26216 /* Shifts by a constant less than 32. */
26217 rtx reverse_amount = gen_rtx_CONST_INT (VOIDmode,
26218 32 - INTVAL (amount));
26219
26220 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
26221 emit_insn (SET (out_down,
26222 ORR (REV_LSHIFT (code, in_up, reverse_amount),
26223 out_down)));
26224 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
26225 }
26226 else
26227 {
26228 /* Shifts by a constant greater than 31. */
26229 rtx adj_amount = gen_rtx_CONST_INT (VOIDmode, INTVAL (amount) - 32);
26230
26231 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
26232 if (code == ASHIFTRT)
26233 emit_insn (gen_ashrsi3 (out_up, in_up,
26234 gen_rtx_CONST_INT (VOIDmode, 31)));
26235 else
26236 emit_insn (SET (out_up, const0_rtx));
26237 }
26238 }
26239 else
26240 {
26241 /* We have a shift-by-register. */
26242 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
26243
26244 /* This alternative requires the scratch registers. */
26245 gcc_assert (scratch1 && REG_P (scratch1));
26246 gcc_assert (scratch2 && REG_P (scratch2));
26247
26248 /* We will need the values "amount-32" and "32-amount" later.
26249 Swapping them around now allows the later code to be more general. */
26250 switch (code)
26251 {
26252 case ASHIFT:
26253 emit_insn (SUB_32 (scratch1, amount));
26254 emit_insn (RSB_32 (scratch2, amount));
26255 break;
26256 case ASHIFTRT:
26257 emit_insn (RSB_32 (scratch1, amount));
26258 /* Also set CC = amount > 32. */
26259 emit_insn (SUB_S_32 (scratch2, amount));
26260 break;
26261 case LSHIFTRT:
26262 emit_insn (RSB_32 (scratch1, amount));
26263 emit_insn (SUB_32 (scratch2, amount));
26264 break;
26265 default:
26266 gcc_unreachable ();
26267 }
26268
26269 /* Emit code like this:
26270
26271 arithmetic-left:
26272 out_down = in_down << amount;
26273 out_down = (in_up << (amount - 32)) | out_down;
26274 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
26275 out_up = in_up << amount;
26276
26277 arithmetic-right:
26278 out_down = in_down >> amount;
26279 out_down = (in_up << (32 - amount)) | out_down;
26280 if (amount < 32)
26281 out_down = ((signed)in_up >> (amount - 32)) | out_down;
26282 out_up = in_up << amount;
26283
26284 logical-right:
26285 out_down = in_down >> amount;
26286 out_down = (in_up << (32 - amount)) | out_down;
26287 if (amount < 32)
26288 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
26289 out_up = in_up << amount;
26290
26291 The ARM and Thumb2 variants are the same but implemented slightly
26292 differently. If this were only called during expand we could just
26293 use the Thumb2 case and let combine do the right thing, but this
26294 can also be called from post-reload splitters. */
26295
26296 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
26297
26298 if (!TARGET_THUMB2)
26299 {
26300 /* Emit code for ARM mode. */
26301 emit_insn (SET (out_down,
26302 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
26303 if (code == ASHIFTRT)
26304 {
26305 rtx done_label = gen_label_rtx ();
26306 emit_jump_insn (BRANCH (LT, done_label));
26307 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
26308 out_down)));
26309 emit_label (done_label);
26310 }
26311 else
26312 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
26313 out_down)));
26314 }
26315 else
26316 {
26317 /* Emit code for Thumb2 mode.
26318 Thumb2 can't do shift and or in one insn. */
26319 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
26320 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
26321
26322 if (code == ASHIFTRT)
26323 {
26324 rtx done_label = gen_label_rtx ();
26325 emit_jump_insn (BRANCH (LT, done_label));
26326 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
26327 emit_insn (SET (out_down, ORR (out_down, scratch2)));
26328 emit_label (done_label);
26329 }
26330 else
26331 {
26332 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
26333 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
26334 }
26335 }
26336
26337 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
26338 }
26339
26340 #undef SUB_32
26341 #undef RSB_32
26342 #undef SUB_S_32
26343 #undef SET
26344 #undef SHIFT
26345 #undef LSHIFT
26346 #undef REV_LSHIFT
26347 #undef ORR
26348 #undef BRANCH
26349 }
26350
26351
26352 /* Returns true if a valid comparison operation and makes
26353 the operands in a form that is valid. */
26354 bool
26355 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
26356 {
26357 enum rtx_code code = GET_CODE (*comparison);
26358 enum rtx_code canonical_code;
26359 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
26360 ? GET_MODE (*op2) : GET_MODE (*op1);
26361
26362 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
26363
26364 if (code == UNEQ || code == LTGT)
26365 return false;
26366
26367 canonical_code = arm_canonicalize_comparison (code, op1, op2);
26368 PUT_CODE (*comparison, canonical_code);
26369
26370 switch (mode)
26371 {
26372 case SImode:
26373 if (!arm_add_operand (*op1, mode))
26374 *op1 = force_reg (mode, *op1);
26375 if (!arm_add_operand (*op2, mode))
26376 *op2 = force_reg (mode, *op2);
26377 return true;
26378
26379 case DImode:
26380 if (!cmpdi_operand (*op1, mode))
26381 *op1 = force_reg (mode, *op1);
26382 if (!cmpdi_operand (*op2, mode))
26383 *op2 = force_reg (mode, *op2);
26384 return true;
26385
26386 case SFmode:
26387 case DFmode:
26388 if (!arm_float_compare_operand (*op1, mode))
26389 *op1 = force_reg (mode, *op1);
26390 if (!arm_float_compare_operand (*op2, mode))
26391 *op2 = force_reg (mode, *op2);
26392 return true;
26393 default:
26394 break;
26395 }
26396
26397 return false;
26398
26399 }
26400
26401 #include "gt-arm.h"