304b4302cc61b53c1d04297f1aba0ce64623229c
[gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
4 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
5 and Martin Simmons (@harleqn.co.uk).
6 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published
12 by the Free Software Foundation; either version 3, or (at your
13 option) any later version.
14
15 GCC is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
18 License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "obstack.h"
31 #include "regs.h"
32 #include "hard-reg-set.h"
33 #include "real.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "flags.h"
39 #include "reload.h"
40 #include "function.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "recog.h"
45 #include "ggc.h"
46 #include "except.h"
47 #include "c-pragma.h"
48 #include "integrate.h"
49 #include "tm_p.h"
50 #include "target.h"
51 #include "target-def.h"
52 #include "debug.h"
53 #include "langhooks.h"
54 #include "df.h"
55
56 /* Forward definitions of types. */
57 typedef struct minipool_node Mnode;
58 typedef struct minipool_fixup Mfix;
59
60 const struct attribute_spec arm_attribute_table[];
61
62 void (*arm_lang_output_object_attributes_hook)(void);
63
64 /* Forward function declarations. */
65 static arm_stack_offsets *arm_get_frame_offsets (void);
66 static void arm_add_gc_roots (void);
67 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
68 HOST_WIDE_INT, rtx, rtx, int, int);
69 static unsigned bit_count (unsigned long);
70 static int arm_address_register_rtx_p (rtx, int);
71 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
72 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
73 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
74 inline static int thumb1_index_register_rtx_p (rtx, int);
75 static int thumb_far_jump_used_p (void);
76 static bool thumb_force_lr_save (void);
77 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
78 static rtx emit_sfm (int, int);
79 static unsigned arm_size_return_regs (void);
80 static bool arm_assemble_integer (rtx, unsigned int, int);
81 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
82 static arm_cc get_arm_condition_code (rtx);
83 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
84 static rtx is_jump_table (rtx);
85 static const char *output_multi_immediate (rtx *, const char *, const char *,
86 int, HOST_WIDE_INT);
87 static const char *shift_op (rtx, HOST_WIDE_INT *);
88 static struct machine_function *arm_init_machine_status (void);
89 static void thumb_exit (FILE *, int);
90 static rtx is_jump_table (rtx);
91 static HOST_WIDE_INT get_jump_table_size (rtx);
92 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
93 static Mnode *add_minipool_forward_ref (Mfix *);
94 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
95 static Mnode *add_minipool_backward_ref (Mfix *);
96 static void assign_minipool_offsets (Mfix *);
97 static void arm_print_value (FILE *, rtx);
98 static void dump_minipool (rtx);
99 static int arm_barrier_cost (rtx);
100 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
101 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
102 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
103 rtx);
104 static void arm_reorg (void);
105 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
106 static unsigned long arm_compute_save_reg0_reg12_mask (void);
107 static unsigned long arm_compute_save_reg_mask (void);
108 static unsigned long arm_isr_value (tree);
109 static unsigned long arm_compute_func_type (void);
110 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
111 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
112 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
113 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
114 #endif
115 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
116 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
117 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
118 static int arm_comp_type_attributes (const_tree, const_tree);
119 static void arm_set_default_type_attributes (tree);
120 static int arm_adjust_cost (rtx, rtx, rtx, int);
121 static int count_insns_for_constant (HOST_WIDE_INT, int);
122 static int arm_get_strip_length (int);
123 static bool arm_function_ok_for_sibcall (tree, tree);
124 static void arm_internal_label (FILE *, const char *, unsigned long);
125 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
126 tree);
127 static int arm_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code);
128 static bool arm_size_rtx_costs (rtx, int, int, int *);
129 static bool arm_slowmul_rtx_costs (rtx, int, int, int *);
130 static bool arm_fastmul_rtx_costs (rtx, int, int, int *);
131 static bool arm_xscale_rtx_costs (rtx, int, int, int *);
132 static bool arm_9e_rtx_costs (rtx, int, int, int *);
133 static int arm_address_cost (rtx);
134 static bool arm_memory_load_p (rtx);
135 static bool arm_cirrus_insn_p (rtx);
136 static void cirrus_reorg (rtx);
137 static void arm_init_builtins (void);
138 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
139 static void arm_init_iwmmxt_builtins (void);
140 static rtx safe_vector_operand (rtx, enum machine_mode);
141 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
142 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
143 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
144 static void emit_constant_insn (rtx cond, rtx pattern);
145 static rtx emit_set_insn (rtx, rtx);
146 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
147 tree, bool);
148
149 #ifdef OBJECT_FORMAT_ELF
150 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
151 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
152 #endif
153 #ifndef ARM_PE
154 static void arm_encode_section_info (tree, rtx, int);
155 #endif
156
157 static void arm_file_end (void);
158 static void arm_file_start (void);
159
160 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
161 tree, int *, int);
162 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
163 enum machine_mode, const_tree, bool);
164 static bool arm_promote_prototypes (const_tree);
165 static bool arm_default_short_enums (void);
166 static bool arm_align_anon_bitfield (void);
167 static bool arm_return_in_msb (const_tree);
168 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
169 #ifdef TARGET_UNWIND_INFO
170 static void arm_unwind_emit (FILE *, rtx);
171 static bool arm_output_ttype (rtx);
172 #endif
173 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
174
175 static tree arm_cxx_guard_type (void);
176 static bool arm_cxx_guard_mask_bit (void);
177 static tree arm_get_cookie_size (tree);
178 static bool arm_cookie_has_size (void);
179 static bool arm_cxx_cdtor_returns_this (void);
180 static bool arm_cxx_key_method_may_be_inline (void);
181 static void arm_cxx_determine_class_data_visibility (tree);
182 static bool arm_cxx_class_data_always_comdat (void);
183 static bool arm_cxx_use_aeabi_atexit (void);
184 static void arm_init_libfuncs (void);
185 static bool arm_handle_option (size_t, const char *, int);
186 static void arm_target_help (void);
187 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
188 static bool arm_cannot_copy_insn_p (rtx);
189 static bool arm_tls_symbol_p (rtx x);
190 static int arm_issue_rate (void);
191 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
192
193 \f
194 /* Initialize the GCC target structure. */
195 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
196 #undef TARGET_MERGE_DECL_ATTRIBUTES
197 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
198 #endif
199
200 #undef TARGET_ATTRIBUTE_TABLE
201 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
202
203 #undef TARGET_ASM_FILE_START
204 #define TARGET_ASM_FILE_START arm_file_start
205 #undef TARGET_ASM_FILE_END
206 #define TARGET_ASM_FILE_END arm_file_end
207
208 #undef TARGET_ASM_ALIGNED_SI_OP
209 #define TARGET_ASM_ALIGNED_SI_OP NULL
210 #undef TARGET_ASM_INTEGER
211 #define TARGET_ASM_INTEGER arm_assemble_integer
212
213 #undef TARGET_ASM_FUNCTION_PROLOGUE
214 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
215
216 #undef TARGET_ASM_FUNCTION_EPILOGUE
217 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
218
219 #undef TARGET_DEFAULT_TARGET_FLAGS
220 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
221 #undef TARGET_HANDLE_OPTION
222 #define TARGET_HANDLE_OPTION arm_handle_option
223 #undef TARGET_HELP
224 #define TARGET_HELP arm_target_help
225
226 #undef TARGET_COMP_TYPE_ATTRIBUTES
227 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
228
229 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
230 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
231
232 #undef TARGET_SCHED_ADJUST_COST
233 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
234
235 #undef TARGET_ENCODE_SECTION_INFO
236 #ifdef ARM_PE
237 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
238 #else
239 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
240 #endif
241
242 #undef TARGET_STRIP_NAME_ENCODING
243 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
244
245 #undef TARGET_ASM_INTERNAL_LABEL
246 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
247
248 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
249 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
250
251 #undef TARGET_ASM_OUTPUT_MI_THUNK
252 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
253 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
254 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
255
256 /* This will be overridden in arm_override_options. */
257 #undef TARGET_RTX_COSTS
258 #define TARGET_RTX_COSTS arm_slowmul_rtx_costs
259 #undef TARGET_ADDRESS_COST
260 #define TARGET_ADDRESS_COST arm_address_cost
261
262 #undef TARGET_SHIFT_TRUNCATION_MASK
263 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
264 #undef TARGET_VECTOR_MODE_SUPPORTED_P
265 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
266
267 #undef TARGET_MACHINE_DEPENDENT_REORG
268 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
269
270 #undef TARGET_INIT_BUILTINS
271 #define TARGET_INIT_BUILTINS arm_init_builtins
272 #undef TARGET_EXPAND_BUILTIN
273 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
274
275 #undef TARGET_INIT_LIBFUNCS
276 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
277
278 #undef TARGET_PROMOTE_FUNCTION_ARGS
279 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_const_tree_true
280 #undef TARGET_PROMOTE_FUNCTION_RETURN
281 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true
282 #undef TARGET_PROMOTE_PROTOTYPES
283 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
284 #undef TARGET_PASS_BY_REFERENCE
285 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
286 #undef TARGET_ARG_PARTIAL_BYTES
287 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
288
289 #undef TARGET_SETUP_INCOMING_VARARGS
290 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
291
292 #undef TARGET_DEFAULT_SHORT_ENUMS
293 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
294
295 #undef TARGET_ALIGN_ANON_BITFIELD
296 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
297
298 #undef TARGET_NARROW_VOLATILE_BITFIELD
299 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
300
301 #undef TARGET_CXX_GUARD_TYPE
302 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
303
304 #undef TARGET_CXX_GUARD_MASK_BIT
305 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
306
307 #undef TARGET_CXX_GET_COOKIE_SIZE
308 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
309
310 #undef TARGET_CXX_COOKIE_HAS_SIZE
311 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
312
313 #undef TARGET_CXX_CDTOR_RETURNS_THIS
314 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
315
316 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
317 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
318
319 #undef TARGET_CXX_USE_AEABI_ATEXIT
320 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
321
322 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
323 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
324 arm_cxx_determine_class_data_visibility
325
326 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
327 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
328
329 #undef TARGET_RETURN_IN_MSB
330 #define TARGET_RETURN_IN_MSB arm_return_in_msb
331
332 #undef TARGET_MUST_PASS_IN_STACK
333 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
334
335 #ifdef TARGET_UNWIND_INFO
336 #undef TARGET_UNWIND_EMIT
337 #define TARGET_UNWIND_EMIT arm_unwind_emit
338
339 /* EABI unwinding tables use a different format for the typeinfo tables. */
340 #undef TARGET_ASM_TTYPE
341 #define TARGET_ASM_TTYPE arm_output_ttype
342
343 #undef TARGET_ARM_EABI_UNWINDER
344 #define TARGET_ARM_EABI_UNWINDER true
345 #endif /* TARGET_UNWIND_INFO */
346
347 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
348 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
349
350 #undef TARGET_CANNOT_COPY_INSN_P
351 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
352
353 #ifdef HAVE_AS_TLS
354 #undef TARGET_HAVE_TLS
355 #define TARGET_HAVE_TLS true
356 #endif
357
358 #undef TARGET_CANNOT_FORCE_CONST_MEM
359 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
360
361 #undef TARGET_SCHED_ISSUE_RATE
362 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
363
364 #undef TARGET_MANGLE_TYPE
365 #define TARGET_MANGLE_TYPE arm_mangle_type
366
367 #ifdef HAVE_AS_TLS
368 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
369 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
370 #endif
371
372 struct gcc_target targetm = TARGET_INITIALIZER;
373 \f
374 /* Obstack for minipool constant handling. */
375 static struct obstack minipool_obstack;
376 static char * minipool_startobj;
377
378 /* The maximum number of insns skipped which
379 will be conditionalised if possible. */
380 static int max_insns_skipped = 5;
381
382 extern FILE * asm_out_file;
383
384 /* True if we are currently building a constant table. */
385 int making_const_table;
386
387 /* Define the information needed to generate branch insns. This is
388 stored from the compare operation. */
389 rtx arm_compare_op0, arm_compare_op1;
390
391 /* The processor for which instructions should be scheduled. */
392 enum processor_type arm_tune = arm_none;
393
394 /* The default processor used if not overridden by commandline. */
395 static enum processor_type arm_default_cpu = arm_none;
396
397 /* Which floating point model to use. */
398 enum arm_fp_model arm_fp_model;
399
400 /* Which floating point hardware is available. */
401 enum fputype arm_fpu_arch;
402
403 /* Which floating point hardware to schedule for. */
404 enum fputype arm_fpu_tune;
405
406 /* Whether to use floating point hardware. */
407 enum float_abi_type arm_float_abi;
408
409 /* Which ABI to use. */
410 enum arm_abi_type arm_abi;
411
412 /* Which thread pointer model to use. */
413 enum arm_tp_type target_thread_pointer = TP_AUTO;
414
415 /* Used to parse -mstructure_size_boundary command line option. */
416 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
417
418 /* Used for Thumb call_via trampolines. */
419 rtx thumb_call_via_label[14];
420 static int thumb_call_reg_needed;
421
422 /* Bit values used to identify processor capabilities. */
423 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
424 #define FL_ARCH3M (1 << 1) /* Extended multiply */
425 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
426 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
427 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
428 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
429 #define FL_THUMB (1 << 6) /* Thumb aware */
430 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
431 #define FL_STRONG (1 << 8) /* StrongARM */
432 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
433 #define FL_XSCALE (1 << 10) /* XScale */
434 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
435 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
436 media instructions. */
437 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
438 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
439 Note: ARM6 & 7 derivatives only. */
440 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
441 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
442 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
443 profile. */
444 #define FL_DIV (1 << 18) /* Hardware divide. */
445 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
446 #define FL_NEON (1 << 20) /* Neon instructions. */
447
448 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
449
450 #define FL_FOR_ARCH2 FL_NOTM
451 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
452 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
453 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
454 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
455 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
456 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
457 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
458 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
459 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
460 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
461 #define FL_FOR_ARCH6J FL_FOR_ARCH6
462 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
463 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
464 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
465 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
466 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
467 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
468 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)
469 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
470 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
471
472 /* The bits in this mask specify which
473 instructions we are allowed to generate. */
474 static unsigned long insn_flags = 0;
475
476 /* The bits in this mask specify which instruction scheduling options should
477 be used. */
478 static unsigned long tune_flags = 0;
479
480 /* The following are used in the arm.md file as equivalents to bits
481 in the above two flag variables. */
482
483 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
484 int arm_arch3m = 0;
485
486 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
487 int arm_arch4 = 0;
488
489 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
490 int arm_arch4t = 0;
491
492 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
493 int arm_arch5 = 0;
494
495 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
496 int arm_arch5e = 0;
497
498 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
499 int arm_arch6 = 0;
500
501 /* Nonzero if this chip supports the ARM 6K extensions. */
502 int arm_arch6k = 0;
503
504 /* Nonzero if instructions not present in the 'M' profile can be used. */
505 int arm_arch_notm = 0;
506
507 /* Nonzero if this chip can benefit from load scheduling. */
508 int arm_ld_sched = 0;
509
510 /* Nonzero if this chip is a StrongARM. */
511 int arm_tune_strongarm = 0;
512
513 /* Nonzero if this chip is a Cirrus variant. */
514 int arm_arch_cirrus = 0;
515
516 /* Nonzero if this chip supports Intel Wireless MMX technology. */
517 int arm_arch_iwmmxt = 0;
518
519 /* Nonzero if this chip is an XScale. */
520 int arm_arch_xscale = 0;
521
522 /* Nonzero if tuning for XScale */
523 int arm_tune_xscale = 0;
524
525 /* Nonzero if we want to tune for stores that access the write-buffer.
526 This typically means an ARM6 or ARM7 with MMU or MPU. */
527 int arm_tune_wbuf = 0;
528
529 /* Nonzero if generating Thumb instructions. */
530 int thumb_code = 0;
531
532 /* Nonzero if we should define __THUMB_INTERWORK__ in the
533 preprocessor.
534 XXX This is a bit of a hack, it's intended to help work around
535 problems in GLD which doesn't understand that armv5t code is
536 interworking clean. */
537 int arm_cpp_interwork = 0;
538
539 /* Nonzero if chip supports Thumb 2. */
540 int arm_arch_thumb2;
541
542 /* Nonzero if chip supports integer division instruction. */
543 int arm_arch_hwdiv;
544
545 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
546 must report the mode of the memory reference from PRINT_OPERAND to
547 PRINT_OPERAND_ADDRESS. */
548 enum machine_mode output_memory_reference_mode;
549
550 /* The register number to be used for the PIC offset register. */
551 unsigned arm_pic_register = INVALID_REGNUM;
552
553 /* Set to 1 when a return insn is output, this means that the epilogue
554 is not needed. */
555 int return_used_this_function;
556
557 /* Set to 1 after arm_reorg has started. Reset to start at the start of
558 the next function. */
559 static int after_arm_reorg = 0;
560
561 /* The maximum number of insns to be used when loading a constant. */
562 static int arm_constant_limit = 3;
563
564 /* For an explanation of these variables, see final_prescan_insn below. */
565 int arm_ccfsm_state;
566 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
567 enum arm_cond_code arm_current_cc;
568 rtx arm_target_insn;
569 int arm_target_label;
570 /* The number of conditionally executed insns, including the current insn. */
571 int arm_condexec_count = 0;
572 /* A bitmask specifying the patterns for the IT block.
573 Zero means do not output an IT block before this insn. */
574 int arm_condexec_mask = 0;
575 /* The number of bits used in arm_condexec_mask. */
576 int arm_condexec_masklen = 0;
577
578 /* The condition codes of the ARM, and the inverse function. */
579 static const char * const arm_condition_codes[] =
580 {
581 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
582 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
583 };
584
585 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
586 #define streq(string1, string2) (strcmp (string1, string2) == 0)
587
588 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
589 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
590 | (1 << PIC_OFFSET_TABLE_REGNUM)))
591 \f
592 /* Initialization code. */
593
594 struct processors
595 {
596 const char *const name;
597 enum processor_type core;
598 const char *arch;
599 const unsigned long flags;
600 bool (* rtx_costs) (rtx, int, int, int *);
601 };
602
603 /* Not all of these give usefully different compilation alternatives,
604 but there is no simple way of generalizing them. */
605 static const struct processors all_cores[] =
606 {
607 /* ARM Cores */
608 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
609 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
610 #include "arm-cores.def"
611 #undef ARM_CORE
612 {NULL, arm_none, NULL, 0, NULL}
613 };
614
615 static const struct processors all_architectures[] =
616 {
617 /* ARM Architectures */
618 /* We don't specify rtx_costs here as it will be figured out
619 from the core. */
620
621 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
622 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
623 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
624 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
625 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
626 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
627 implementations that support it, so we will leave it out for now. */
628 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
629 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
630 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
631 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
632 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
633 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
634 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
635 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
636 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
637 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
638 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
639 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
640 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
641 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
642 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
643 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
644 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
645 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
646 {NULL, arm_none, NULL, 0 , NULL}
647 };
648
649 struct arm_cpu_select
650 {
651 const char * string;
652 const char * name;
653 const struct processors * processors;
654 };
655
656 /* This is a magic structure. The 'string' field is magically filled in
657 with a pointer to the value specified by the user on the command line
658 assuming that the user has specified such a value. */
659
660 static struct arm_cpu_select arm_select[] =
661 {
662 /* string name processors */
663 { NULL, "-mcpu=", all_cores },
664 { NULL, "-march=", all_architectures },
665 { NULL, "-mtune=", all_cores }
666 };
667
668 /* Defines representing the indexes into the above table. */
669 #define ARM_OPT_SET_CPU 0
670 #define ARM_OPT_SET_ARCH 1
671 #define ARM_OPT_SET_TUNE 2
672
673 /* The name of the preprocessor macro to define for this architecture. */
674
675 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
676
677 struct fpu_desc
678 {
679 const char * name;
680 enum fputype fpu;
681 };
682
683
684 /* Available values for -mfpu=. */
685
686 static const struct fpu_desc all_fpus[] =
687 {
688 {"fpa", FPUTYPE_FPA},
689 {"fpe2", FPUTYPE_FPA_EMU2},
690 {"fpe3", FPUTYPE_FPA_EMU2},
691 {"maverick", FPUTYPE_MAVERICK},
692 {"vfp", FPUTYPE_VFP},
693 {"vfp3", FPUTYPE_VFP3},
694 {"neon", FPUTYPE_NEON}
695 };
696
697
698 /* Floating point models used by the different hardware.
699 See fputype in arm.h. */
700
701 static const enum fputype fp_model_for_fpu[] =
702 {
703 /* No FP hardware. */
704 ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
705 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
706 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
707 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
708 ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
709 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
710 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
711 ARM_FP_MODEL_VFP /* FPUTYPE_NEON */
712 };
713
714
715 struct float_abi
716 {
717 const char * name;
718 enum float_abi_type abi_type;
719 };
720
721
722 /* Available values for -mfloat-abi=. */
723
724 static const struct float_abi all_float_abis[] =
725 {
726 {"soft", ARM_FLOAT_ABI_SOFT},
727 {"softfp", ARM_FLOAT_ABI_SOFTFP},
728 {"hard", ARM_FLOAT_ABI_HARD}
729 };
730
731
732 struct abi_name
733 {
734 const char *name;
735 enum arm_abi_type abi_type;
736 };
737
738
739 /* Available values for -mabi=. */
740
741 static const struct abi_name arm_all_abis[] =
742 {
743 {"apcs-gnu", ARM_ABI_APCS},
744 {"atpcs", ARM_ABI_ATPCS},
745 {"aapcs", ARM_ABI_AAPCS},
746 {"iwmmxt", ARM_ABI_IWMMXT},
747 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
748 };
749
750 /* Supported TLS relocations. */
751
752 enum tls_reloc {
753 TLS_GD32,
754 TLS_LDM32,
755 TLS_LDO32,
756 TLS_IE32,
757 TLS_LE32
758 };
759
760 /* Emit an insn that's a simple single-set. Both the operands must be known
761 to be valid. */
762 inline static rtx
763 emit_set_insn (rtx x, rtx y)
764 {
765 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
766 }
767
768 /* Return the number of bits set in VALUE. */
769 static unsigned
770 bit_count (unsigned long value)
771 {
772 unsigned long count = 0;
773
774 while (value)
775 {
776 count++;
777 value &= value - 1; /* Clear the least-significant set bit. */
778 }
779
780 return count;
781 }
782
783 /* Set up library functions unique to ARM. */
784
785 static void
786 arm_init_libfuncs (void)
787 {
788 /* There are no special library functions unless we are using the
789 ARM BPABI. */
790 if (!TARGET_BPABI)
791 return;
792
793 /* The functions below are described in Section 4 of the "Run-Time
794 ABI for the ARM architecture", Version 1.0. */
795
796 /* Double-precision floating-point arithmetic. Table 2. */
797 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
798 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
799 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
800 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
801 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
802
803 /* Double-precision comparisons. Table 3. */
804 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
805 set_optab_libfunc (ne_optab, DFmode, NULL);
806 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
807 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
808 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
809 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
810 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
811
812 /* Single-precision floating-point arithmetic. Table 4. */
813 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
814 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
815 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
816 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
817 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
818
819 /* Single-precision comparisons. Table 5. */
820 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
821 set_optab_libfunc (ne_optab, SFmode, NULL);
822 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
823 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
824 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
825 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
826 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
827
828 /* Floating-point to integer conversions. Table 6. */
829 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
830 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
831 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
832 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
833 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
834 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
835 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
836 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
837
838 /* Conversions between floating types. Table 7. */
839 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
840 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
841
842 /* Integer to floating-point conversions. Table 8. */
843 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
844 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
845 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
846 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
847 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
848 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
849 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
850 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
851
852 /* Long long. Table 9. */
853 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
854 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
855 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
856 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
857 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
858 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
859 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
860 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
861
862 /* Integer (32/32->32) division. \S 4.3.1. */
863 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
864 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
865
866 /* The divmod functions are designed so that they can be used for
867 plain division, even though they return both the quotient and the
868 remainder. The quotient is returned in the usual location (i.e.,
869 r0 for SImode, {r0, r1} for DImode), just as would be expected
870 for an ordinary division routine. Because the AAPCS calling
871 conventions specify that all of { r0, r1, r2, r3 } are
872 callee-saved registers, there is no need to tell the compiler
873 explicitly that those registers are clobbered by these
874 routines. */
875 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
876 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
877
878 /* For SImode division the ABI provides div-without-mod routines,
879 which are faster. */
880 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
881 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
882
883 /* We don't have mod libcalls. Fortunately gcc knows how to use the
884 divmod libcalls instead. */
885 set_optab_libfunc (smod_optab, DImode, NULL);
886 set_optab_libfunc (umod_optab, DImode, NULL);
887 set_optab_libfunc (smod_optab, SImode, NULL);
888 set_optab_libfunc (umod_optab, SImode, NULL);
889 }
890
891 /* Implement TARGET_HANDLE_OPTION. */
892
893 static bool
894 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
895 {
896 switch (code)
897 {
898 case OPT_march_:
899 arm_select[1].string = arg;
900 return true;
901
902 case OPT_mcpu_:
903 arm_select[0].string = arg;
904 return true;
905
906 case OPT_mhard_float:
907 target_float_abi_name = "hard";
908 return true;
909
910 case OPT_msoft_float:
911 target_float_abi_name = "soft";
912 return true;
913
914 case OPT_mtune_:
915 arm_select[2].string = arg;
916 return true;
917
918 default:
919 return true;
920 }
921 }
922
923 static void
924 arm_target_help (void)
925 {
926 int i;
927 static int columns = 0;
928 int remaining;
929
930 /* If we have not done so already, obtain the desired maximum width of
931 the output. Note - this is a duplication of the code at the start of
932 gcc/opts.c:print_specific_help() - the two copies should probably be
933 replaced by a single function. */
934 if (columns == 0)
935 {
936 const char *p;
937
938 GET_ENVIRONMENT (p, "COLUMNS");
939 if (p != NULL)
940 {
941 int value = atoi (p);
942
943 if (value > 0)
944 columns = value;
945 }
946
947 if (columns == 0)
948 /* Use a reasonable default. */
949 columns = 80;
950 }
951
952 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
953
954 /* The - 2 is because we know that the last entry in the array is NULL. */
955 i = ARRAY_SIZE (all_cores) - 2;
956 gcc_assert (i > 0);
957 printf (" %s", all_cores[i].name);
958 remaining = columns - (strlen (all_cores[i].name) + 4);
959 gcc_assert (remaining >= 0);
960
961 while (i--)
962 {
963 int len = strlen (all_cores[i].name);
964
965 if (remaining > len + 2)
966 {
967 printf (", %s", all_cores[i].name);
968 remaining -= len + 2;
969 }
970 else
971 {
972 if (remaining > 0)
973 printf (",");
974 printf ("\n %s", all_cores[i].name);
975 remaining = columns - (len + 4);
976 }
977 }
978
979 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
980
981 i = ARRAY_SIZE (all_architectures) - 2;
982 gcc_assert (i > 0);
983
984 printf (" %s", all_architectures[i].name);
985 remaining = columns - (strlen (all_architectures[i].name) + 4);
986 gcc_assert (remaining >= 0);
987
988 while (i--)
989 {
990 int len = strlen (all_architectures[i].name);
991
992 if (remaining > len + 2)
993 {
994 printf (", %s", all_architectures[i].name);
995 remaining -= len + 2;
996 }
997 else
998 {
999 if (remaining > 0)
1000 printf (",");
1001 printf ("\n %s", all_architectures[i].name);
1002 remaining = columns - (len + 4);
1003 }
1004 }
1005 printf ("\n");
1006
1007 }
1008
1009 /* Fix up any incompatible options that the user has specified.
1010 This has now turned into a maze. */
1011 void
1012 arm_override_options (void)
1013 {
1014 unsigned i;
1015 enum processor_type target_arch_cpu = arm_none;
1016
1017 /* Set up the flags based on the cpu/architecture selected by the user. */
1018 for (i = ARRAY_SIZE (arm_select); i--;)
1019 {
1020 struct arm_cpu_select * ptr = arm_select + i;
1021
1022 if (ptr->string != NULL && ptr->string[0] != '\0')
1023 {
1024 const struct processors * sel;
1025
1026 for (sel = ptr->processors; sel->name != NULL; sel++)
1027 if (streq (ptr->string, sel->name))
1028 {
1029 /* Set the architecture define. */
1030 if (i != ARM_OPT_SET_TUNE)
1031 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1032
1033 /* Determine the processor core for which we should
1034 tune code-generation. */
1035 if (/* -mcpu= is a sensible default. */
1036 i == ARM_OPT_SET_CPU
1037 /* -mtune= overrides -mcpu= and -march=. */
1038 || i == ARM_OPT_SET_TUNE)
1039 arm_tune = (enum processor_type) (sel - ptr->processors);
1040
1041 /* Remember the CPU associated with this architecture.
1042 If no other option is used to set the CPU type,
1043 we'll use this to guess the most suitable tuning
1044 options. */
1045 if (i == ARM_OPT_SET_ARCH)
1046 target_arch_cpu = sel->core;
1047
1048 if (i != ARM_OPT_SET_TUNE)
1049 {
1050 /* If we have been given an architecture and a processor
1051 make sure that they are compatible. We only generate
1052 a warning though, and we prefer the CPU over the
1053 architecture. */
1054 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1055 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1056 ptr->string);
1057
1058 insn_flags = sel->flags;
1059 }
1060
1061 break;
1062 }
1063
1064 if (sel->name == NULL)
1065 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1066 }
1067 }
1068
1069 /* Guess the tuning options from the architecture if necessary. */
1070 if (arm_tune == arm_none)
1071 arm_tune = target_arch_cpu;
1072
1073 /* If the user did not specify a processor, choose one for them. */
1074 if (insn_flags == 0)
1075 {
1076 const struct processors * sel;
1077 unsigned int sought;
1078 enum processor_type cpu;
1079
1080 cpu = TARGET_CPU_DEFAULT;
1081 if (cpu == arm_none)
1082 {
1083 #ifdef SUBTARGET_CPU_DEFAULT
1084 /* Use the subtarget default CPU if none was specified by
1085 configure. */
1086 cpu = SUBTARGET_CPU_DEFAULT;
1087 #endif
1088 /* Default to ARM6. */
1089 if (cpu == arm_none)
1090 cpu = arm6;
1091 }
1092 sel = &all_cores[cpu];
1093
1094 insn_flags = sel->flags;
1095
1096 /* Now check to see if the user has specified some command line
1097 switch that require certain abilities from the cpu. */
1098 sought = 0;
1099
1100 if (TARGET_INTERWORK || TARGET_THUMB)
1101 {
1102 sought |= (FL_THUMB | FL_MODE32);
1103
1104 /* There are no ARM processors that support both APCS-26 and
1105 interworking. Therefore we force FL_MODE26 to be removed
1106 from insn_flags here (if it was set), so that the search
1107 below will always be able to find a compatible processor. */
1108 insn_flags &= ~FL_MODE26;
1109 }
1110
1111 if (sought != 0 && ((sought & insn_flags) != sought))
1112 {
1113 /* Try to locate a CPU type that supports all of the abilities
1114 of the default CPU, plus the extra abilities requested by
1115 the user. */
1116 for (sel = all_cores; sel->name != NULL; sel++)
1117 if ((sel->flags & sought) == (sought | insn_flags))
1118 break;
1119
1120 if (sel->name == NULL)
1121 {
1122 unsigned current_bit_count = 0;
1123 const struct processors * best_fit = NULL;
1124
1125 /* Ideally we would like to issue an error message here
1126 saying that it was not possible to find a CPU compatible
1127 with the default CPU, but which also supports the command
1128 line options specified by the programmer, and so they
1129 ought to use the -mcpu=<name> command line option to
1130 override the default CPU type.
1131
1132 If we cannot find a cpu that has both the
1133 characteristics of the default cpu and the given
1134 command line options we scan the array again looking
1135 for a best match. */
1136 for (sel = all_cores; sel->name != NULL; sel++)
1137 if ((sel->flags & sought) == sought)
1138 {
1139 unsigned count;
1140
1141 count = bit_count (sel->flags & insn_flags);
1142
1143 if (count >= current_bit_count)
1144 {
1145 best_fit = sel;
1146 current_bit_count = count;
1147 }
1148 }
1149
1150 gcc_assert (best_fit);
1151 sel = best_fit;
1152 }
1153
1154 insn_flags = sel->flags;
1155 }
1156 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1157 arm_default_cpu = (enum processor_type) (sel - all_cores);
1158 if (arm_tune == arm_none)
1159 arm_tune = arm_default_cpu;
1160 }
1161
1162 /* The processor for which we should tune should now have been
1163 chosen. */
1164 gcc_assert (arm_tune != arm_none);
1165
1166 tune_flags = all_cores[(int)arm_tune].flags;
1167 if (optimize_size)
1168 targetm.rtx_costs = arm_size_rtx_costs;
1169 else
1170 targetm.rtx_costs = all_cores[(int)arm_tune].rtx_costs;
1171
1172 /* Make sure that the processor choice does not conflict with any of the
1173 other command line choices. */
1174 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1175 error ("target CPU does not support ARM mode");
1176
1177 if (TARGET_INTERWORK && !(insn_flags & FL_THUMB))
1178 {
1179 warning (0, "target CPU does not support interworking" );
1180 target_flags &= ~MASK_INTERWORK;
1181 }
1182
1183 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1184 {
1185 warning (0, "target CPU does not support THUMB instructions");
1186 target_flags &= ~MASK_THUMB;
1187 }
1188
1189 if (TARGET_APCS_FRAME && TARGET_THUMB)
1190 {
1191 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1192 target_flags &= ~MASK_APCS_FRAME;
1193 }
1194
1195 /* Callee super interworking implies thumb interworking. Adding
1196 this to the flags here simplifies the logic elsewhere. */
1197 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1198 target_flags |= MASK_INTERWORK;
1199
1200 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1201 from here where no function is being compiled currently. */
1202 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1203 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1204
1205 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1206 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1207
1208 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1209 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1210
1211 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1212 {
1213 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1214 target_flags |= MASK_APCS_FRAME;
1215 }
1216
1217 if (TARGET_POKE_FUNCTION_NAME)
1218 target_flags |= MASK_APCS_FRAME;
1219
1220 if (TARGET_APCS_REENT && flag_pic)
1221 error ("-fpic and -mapcs-reent are incompatible");
1222
1223 if (TARGET_APCS_REENT)
1224 warning (0, "APCS reentrant code not supported. Ignored");
1225
1226 /* If this target is normally configured to use APCS frames, warn if they
1227 are turned off and debugging is turned on. */
1228 if (TARGET_ARM
1229 && write_symbols != NO_DEBUG
1230 && !TARGET_APCS_FRAME
1231 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1232 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1233
1234 if (TARGET_APCS_FLOAT)
1235 warning (0, "passing floating point arguments in fp regs not yet supported");
1236
1237 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1238 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1239 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1240 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1241 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1242 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1243 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1244 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1245 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1246 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1247 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1248 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1249
1250 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1251 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1252 thumb_code = (TARGET_ARM == 0);
1253 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1254 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1255 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1256 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1257
1258 /* V5 code we generate is completely interworking capable, so we turn off
1259 TARGET_INTERWORK here to avoid many tests later on. */
1260
1261 /* XXX However, we must pass the right pre-processor defines to CPP
1262 or GLD can get confused. This is a hack. */
1263 if (TARGET_INTERWORK)
1264 arm_cpp_interwork = 1;
1265
1266 if (arm_arch5)
1267 target_flags &= ~MASK_INTERWORK;
1268
1269 if (target_abi_name)
1270 {
1271 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1272 {
1273 if (streq (arm_all_abis[i].name, target_abi_name))
1274 {
1275 arm_abi = arm_all_abis[i].abi_type;
1276 break;
1277 }
1278 }
1279 if (i == ARRAY_SIZE (arm_all_abis))
1280 error ("invalid ABI option: -mabi=%s", target_abi_name);
1281 }
1282 else
1283 arm_abi = ARM_DEFAULT_ABI;
1284
1285 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1286 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1287
1288 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1289 error ("iwmmxt abi requires an iwmmxt capable cpu");
1290
1291 arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1292 if (target_fpu_name == NULL && target_fpe_name != NULL)
1293 {
1294 if (streq (target_fpe_name, "2"))
1295 target_fpu_name = "fpe2";
1296 else if (streq (target_fpe_name, "3"))
1297 target_fpu_name = "fpe3";
1298 else
1299 error ("invalid floating point emulation option: -mfpe=%s",
1300 target_fpe_name);
1301 }
1302 if (target_fpu_name != NULL)
1303 {
1304 /* The user specified a FPU. */
1305 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1306 {
1307 if (streq (all_fpus[i].name, target_fpu_name))
1308 {
1309 arm_fpu_arch = all_fpus[i].fpu;
1310 arm_fpu_tune = arm_fpu_arch;
1311 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1312 break;
1313 }
1314 }
1315 if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1316 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1317 }
1318 else
1319 {
1320 #ifdef FPUTYPE_DEFAULT
1321 /* Use the default if it is specified for this platform. */
1322 arm_fpu_arch = FPUTYPE_DEFAULT;
1323 arm_fpu_tune = FPUTYPE_DEFAULT;
1324 #else
1325 /* Pick one based on CPU type. */
1326 /* ??? Some targets assume FPA is the default.
1327 if ((insn_flags & FL_VFP) != 0)
1328 arm_fpu_arch = FPUTYPE_VFP;
1329 else
1330 */
1331 if (arm_arch_cirrus)
1332 arm_fpu_arch = FPUTYPE_MAVERICK;
1333 else
1334 arm_fpu_arch = FPUTYPE_FPA_EMU2;
1335 #endif
1336 if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1337 arm_fpu_tune = FPUTYPE_FPA;
1338 else
1339 arm_fpu_tune = arm_fpu_arch;
1340 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1341 gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1342 }
1343
1344 if (target_float_abi_name != NULL)
1345 {
1346 /* The user specified a FP ABI. */
1347 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1348 {
1349 if (streq (all_float_abis[i].name, target_float_abi_name))
1350 {
1351 arm_float_abi = all_float_abis[i].abi_type;
1352 break;
1353 }
1354 }
1355 if (i == ARRAY_SIZE (all_float_abis))
1356 error ("invalid floating point abi: -mfloat-abi=%s",
1357 target_float_abi_name);
1358 }
1359 else
1360 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1361
1362 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1363 sorry ("-mfloat-abi=hard and VFP");
1364
1365 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1366 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1367 will ever exist. GCC makes no attempt to support this combination. */
1368 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1369 sorry ("iWMMXt and hardware floating point");
1370
1371 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1372 if (TARGET_THUMB2 && TARGET_IWMMXT)
1373 sorry ("Thumb-2 iWMMXt");
1374
1375 /* If soft-float is specified then don't use FPU. */
1376 if (TARGET_SOFT_FLOAT)
1377 arm_fpu_arch = FPUTYPE_NONE;
1378
1379 /* For arm2/3 there is no need to do any scheduling if there is only
1380 a floating point emulator, or we are doing software floating-point. */
1381 if ((TARGET_SOFT_FLOAT
1382 || arm_fpu_tune == FPUTYPE_FPA_EMU2
1383 || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1384 && (tune_flags & FL_MODE32) == 0)
1385 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1386
1387 if (target_thread_switch)
1388 {
1389 if (strcmp (target_thread_switch, "soft") == 0)
1390 target_thread_pointer = TP_SOFT;
1391 else if (strcmp (target_thread_switch, "auto") == 0)
1392 target_thread_pointer = TP_AUTO;
1393 else if (strcmp (target_thread_switch, "cp15") == 0)
1394 target_thread_pointer = TP_CP15;
1395 else
1396 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1397 }
1398
1399 /* Use the cp15 method if it is available. */
1400 if (target_thread_pointer == TP_AUTO)
1401 {
1402 if (arm_arch6k && !TARGET_THUMB)
1403 target_thread_pointer = TP_CP15;
1404 else
1405 target_thread_pointer = TP_SOFT;
1406 }
1407
1408 if (TARGET_HARD_TP && TARGET_THUMB1)
1409 error ("can not use -mtp=cp15 with 16-bit Thumb");
1410
1411 /* Override the default structure alignment for AAPCS ABI. */
1412 if (TARGET_AAPCS_BASED)
1413 arm_structure_size_boundary = 8;
1414
1415 if (structure_size_string != NULL)
1416 {
1417 int size = strtol (structure_size_string, NULL, 0);
1418
1419 if (size == 8 || size == 32
1420 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1421 arm_structure_size_boundary = size;
1422 else
1423 warning (0, "structure size boundary can only be set to %s",
1424 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1425 }
1426
1427 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1428 {
1429 error ("RTP PIC is incompatible with Thumb");
1430 flag_pic = 0;
1431 }
1432
1433 /* If stack checking is disabled, we can use r10 as the PIC register,
1434 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1435 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1436 {
1437 if (TARGET_VXWORKS_RTP)
1438 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1439 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1440 }
1441
1442 if (flag_pic && TARGET_VXWORKS_RTP)
1443 arm_pic_register = 9;
1444
1445 if (arm_pic_register_string != NULL)
1446 {
1447 int pic_register = decode_reg_name (arm_pic_register_string);
1448
1449 if (!flag_pic)
1450 warning (0, "-mpic-register= is useless without -fpic");
1451
1452 /* Prevent the user from choosing an obviously stupid PIC register. */
1453 else if (pic_register < 0 || call_used_regs[pic_register]
1454 || pic_register == HARD_FRAME_POINTER_REGNUM
1455 || pic_register == STACK_POINTER_REGNUM
1456 || pic_register >= PC_REGNUM
1457 || (TARGET_VXWORKS_RTP
1458 && (unsigned int) pic_register != arm_pic_register))
1459 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1460 else
1461 arm_pic_register = pic_register;
1462 }
1463
1464 /* ??? We might want scheduling for thumb2. */
1465 if (TARGET_THUMB && flag_schedule_insns)
1466 {
1467 /* Don't warn since it's on by default in -O2. */
1468 flag_schedule_insns = 0;
1469 }
1470
1471 if (optimize_size)
1472 {
1473 arm_constant_limit = 1;
1474
1475 /* If optimizing for size, bump the number of instructions that we
1476 are prepared to conditionally execute (even on a StrongARM). */
1477 max_insns_skipped = 6;
1478 }
1479 else
1480 {
1481 /* For processors with load scheduling, it never costs more than
1482 2 cycles to load a constant, and the load scheduler may well
1483 reduce that to 1. */
1484 if (arm_ld_sched)
1485 arm_constant_limit = 1;
1486
1487 /* On XScale the longer latency of a load makes it more difficult
1488 to achieve a good schedule, so it's faster to synthesize
1489 constants that can be done in two insns. */
1490 if (arm_tune_xscale)
1491 arm_constant_limit = 2;
1492
1493 /* StrongARM has early execution of branches, so a sequence
1494 that is worth skipping is shorter. */
1495 if (arm_tune_strongarm)
1496 max_insns_skipped = 3;
1497 }
1498
1499 /* Register global variables with the garbage collector. */
1500 arm_add_gc_roots ();
1501 }
1502
1503 static void
1504 arm_add_gc_roots (void)
1505 {
1506 gcc_obstack_init(&minipool_obstack);
1507 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1508 }
1509 \f
1510 /* A table of known ARM exception types.
1511 For use with the interrupt function attribute. */
1512
1513 typedef struct
1514 {
1515 const char *const arg;
1516 const unsigned long return_value;
1517 }
1518 isr_attribute_arg;
1519
1520 static const isr_attribute_arg isr_attribute_args [] =
1521 {
1522 { "IRQ", ARM_FT_ISR },
1523 { "irq", ARM_FT_ISR },
1524 { "FIQ", ARM_FT_FIQ },
1525 { "fiq", ARM_FT_FIQ },
1526 { "ABORT", ARM_FT_ISR },
1527 { "abort", ARM_FT_ISR },
1528 { "ABORT", ARM_FT_ISR },
1529 { "abort", ARM_FT_ISR },
1530 { "UNDEF", ARM_FT_EXCEPTION },
1531 { "undef", ARM_FT_EXCEPTION },
1532 { "SWI", ARM_FT_EXCEPTION },
1533 { "swi", ARM_FT_EXCEPTION },
1534 { NULL, ARM_FT_NORMAL }
1535 };
1536
1537 /* Returns the (interrupt) function type of the current
1538 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1539
1540 static unsigned long
1541 arm_isr_value (tree argument)
1542 {
1543 const isr_attribute_arg * ptr;
1544 const char * arg;
1545
1546 if (!arm_arch_notm)
1547 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1548
1549 /* No argument - default to IRQ. */
1550 if (argument == NULL_TREE)
1551 return ARM_FT_ISR;
1552
1553 /* Get the value of the argument. */
1554 if (TREE_VALUE (argument) == NULL_TREE
1555 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1556 return ARM_FT_UNKNOWN;
1557
1558 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1559
1560 /* Check it against the list of known arguments. */
1561 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1562 if (streq (arg, ptr->arg))
1563 return ptr->return_value;
1564
1565 /* An unrecognized interrupt type. */
1566 return ARM_FT_UNKNOWN;
1567 }
1568
1569 /* Computes the type of the current function. */
1570
1571 static unsigned long
1572 arm_compute_func_type (void)
1573 {
1574 unsigned long type = ARM_FT_UNKNOWN;
1575 tree a;
1576 tree attr;
1577
1578 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1579
1580 /* Decide if the current function is volatile. Such functions
1581 never return, and many memory cycles can be saved by not storing
1582 register values that will never be needed again. This optimization
1583 was added to speed up context switching in a kernel application. */
1584 if (optimize > 0
1585 && (TREE_NOTHROW (current_function_decl)
1586 || !(flag_unwind_tables
1587 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1588 && TREE_THIS_VOLATILE (current_function_decl))
1589 type |= ARM_FT_VOLATILE;
1590
1591 if (cfun->static_chain_decl != NULL)
1592 type |= ARM_FT_NESTED;
1593
1594 attr = DECL_ATTRIBUTES (current_function_decl);
1595
1596 a = lookup_attribute ("naked", attr);
1597 if (a != NULL_TREE)
1598 type |= ARM_FT_NAKED;
1599
1600 a = lookup_attribute ("isr", attr);
1601 if (a == NULL_TREE)
1602 a = lookup_attribute ("interrupt", attr);
1603
1604 if (a == NULL_TREE)
1605 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1606 else
1607 type |= arm_isr_value (TREE_VALUE (a));
1608
1609 return type;
1610 }
1611
1612 /* Returns the type of the current function. */
1613
1614 unsigned long
1615 arm_current_func_type (void)
1616 {
1617 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1618 cfun->machine->func_type = arm_compute_func_type ();
1619
1620 return cfun->machine->func_type;
1621 }
1622 \f
1623 /* Return 1 if it is possible to return using a single instruction.
1624 If SIBLING is non-null, this is a test for a return before a sibling
1625 call. SIBLING is the call insn, so we can examine its register usage. */
1626
1627 int
1628 use_return_insn (int iscond, rtx sibling)
1629 {
1630 int regno;
1631 unsigned int func_type;
1632 unsigned long saved_int_regs;
1633 unsigned HOST_WIDE_INT stack_adjust;
1634 arm_stack_offsets *offsets;
1635
1636 /* Never use a return instruction before reload has run. */
1637 if (!reload_completed)
1638 return 0;
1639
1640 func_type = arm_current_func_type ();
1641
1642 /* Naked, volatile and stack alignment functions need special
1643 consideration. */
1644 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
1645 return 0;
1646
1647 /* So do interrupt functions that use the frame pointer and Thumb
1648 interrupt functions. */
1649 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
1650 return 0;
1651
1652 offsets = arm_get_frame_offsets ();
1653 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
1654
1655 /* As do variadic functions. */
1656 if (current_function_pretend_args_size
1657 || cfun->machine->uses_anonymous_args
1658 /* Or if the function calls __builtin_eh_return () */
1659 || current_function_calls_eh_return
1660 /* Or if the function calls alloca */
1661 || current_function_calls_alloca
1662 /* Or if there is a stack adjustment. However, if the stack pointer
1663 is saved on the stack, we can use a pre-incrementing stack load. */
1664 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
1665 && stack_adjust == 4)))
1666 return 0;
1667
1668 saved_int_regs = offsets->saved_regs_mask;
1669
1670 /* Unfortunately, the insn
1671
1672 ldmib sp, {..., sp, ...}
1673
1674 triggers a bug on most SA-110 based devices, such that the stack
1675 pointer won't be correctly restored if the instruction takes a
1676 page fault. We work around this problem by popping r3 along with
1677 the other registers, since that is never slower than executing
1678 another instruction.
1679
1680 We test for !arm_arch5 here, because code for any architecture
1681 less than this could potentially be run on one of the buggy
1682 chips. */
1683 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
1684 {
1685 /* Validate that r3 is a call-clobbered register (always true in
1686 the default abi) ... */
1687 if (!call_used_regs[3])
1688 return 0;
1689
1690 /* ... that it isn't being used for a return value ... */
1691 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
1692 return 0;
1693
1694 /* ... or for a tail-call argument ... */
1695 if (sibling)
1696 {
1697 gcc_assert (GET_CODE (sibling) == CALL_INSN);
1698
1699 if (find_regno_fusage (sibling, USE, 3))
1700 return 0;
1701 }
1702
1703 /* ... and that there are no call-saved registers in r0-r2
1704 (always true in the default ABI). */
1705 if (saved_int_regs & 0x7)
1706 return 0;
1707 }
1708
1709 /* Can't be done if interworking with Thumb, and any registers have been
1710 stacked. */
1711 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
1712 return 0;
1713
1714 /* On StrongARM, conditional returns are expensive if they aren't
1715 taken and multiple registers have been stacked. */
1716 if (iscond && arm_tune_strongarm)
1717 {
1718 /* Conditional return when just the LR is stored is a simple
1719 conditional-load instruction, that's not expensive. */
1720 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
1721 return 0;
1722
1723 if (flag_pic
1724 && arm_pic_register != INVALID_REGNUM
1725 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
1726 return 0;
1727 }
1728
1729 /* If there are saved registers but the LR isn't saved, then we need
1730 two instructions for the return. */
1731 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
1732 return 0;
1733
1734 /* Can't be done if any of the FPA regs are pushed,
1735 since this also requires an insn. */
1736 if (TARGET_HARD_FLOAT && TARGET_FPA)
1737 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
1738 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1739 return 0;
1740
1741 /* Likewise VFP regs. */
1742 if (TARGET_HARD_FLOAT && TARGET_VFP)
1743 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
1744 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1745 return 0;
1746
1747 if (TARGET_REALLY_IWMMXT)
1748 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
1749 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
1750 return 0;
1751
1752 return 1;
1753 }
1754
1755 /* Return TRUE if int I is a valid immediate ARM constant. */
1756
1757 int
1758 const_ok_for_arm (HOST_WIDE_INT i)
1759 {
1760 int lowbit;
1761
1762 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
1763 be all zero, or all one. */
1764 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
1765 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
1766 != ((~(unsigned HOST_WIDE_INT) 0)
1767 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
1768 return FALSE;
1769
1770 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
1771
1772 /* Fast return for 0 and small values. We must do this for zero, since
1773 the code below can't handle that one case. */
1774 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
1775 return TRUE;
1776
1777 /* Get the number of trailing zeros. */
1778 lowbit = ffs((int) i) - 1;
1779
1780 /* Only even shifts are allowed in ARM mode so round down to the
1781 nearest even number. */
1782 if (TARGET_ARM)
1783 lowbit &= ~1;
1784
1785 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
1786 return TRUE;
1787
1788 if (TARGET_ARM)
1789 {
1790 /* Allow rotated constants in ARM mode. */
1791 if (lowbit <= 4
1792 && ((i & ~0xc000003f) == 0
1793 || (i & ~0xf000000f) == 0
1794 || (i & ~0xfc000003) == 0))
1795 return TRUE;
1796 }
1797 else
1798 {
1799 HOST_WIDE_INT v;
1800
1801 /* Allow repeated pattern. */
1802 v = i & 0xff;
1803 v |= v << 16;
1804 if (i == v || i == (v | (v << 8)))
1805 return TRUE;
1806 }
1807
1808 return FALSE;
1809 }
1810
1811 /* Return true if I is a valid constant for the operation CODE. */
1812 static int
1813 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
1814 {
1815 if (const_ok_for_arm (i))
1816 return 1;
1817
1818 switch (code)
1819 {
1820 case PLUS:
1821 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
1822
1823 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
1824 case XOR:
1825 case IOR:
1826 return 0;
1827
1828 case AND:
1829 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
1830
1831 default:
1832 gcc_unreachable ();
1833 }
1834 }
1835
1836 /* Emit a sequence of insns to handle a large constant.
1837 CODE is the code of the operation required, it can be any of SET, PLUS,
1838 IOR, AND, XOR, MINUS;
1839 MODE is the mode in which the operation is being performed;
1840 VAL is the integer to operate on;
1841 SOURCE is the other operand (a register, or a null-pointer for SET);
1842 SUBTARGETS means it is safe to create scratch registers if that will
1843 either produce a simpler sequence, or we will want to cse the values.
1844 Return value is the number of insns emitted. */
1845
1846 /* ??? Tweak this for thumb2. */
1847 int
1848 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
1849 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
1850 {
1851 rtx cond;
1852
1853 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
1854 cond = COND_EXEC_TEST (PATTERN (insn));
1855 else
1856 cond = NULL_RTX;
1857
1858 if (subtargets || code == SET
1859 || (GET_CODE (target) == REG && GET_CODE (source) == REG
1860 && REGNO (target) != REGNO (source)))
1861 {
1862 /* After arm_reorg has been called, we can't fix up expensive
1863 constants by pushing them into memory so we must synthesize
1864 them in-line, regardless of the cost. This is only likely to
1865 be more costly on chips that have load delay slots and we are
1866 compiling without running the scheduler (so no splitting
1867 occurred before the final instruction emission).
1868
1869 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
1870 */
1871 if (!after_arm_reorg
1872 && !cond
1873 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
1874 1, 0)
1875 > arm_constant_limit + (code != SET)))
1876 {
1877 if (code == SET)
1878 {
1879 /* Currently SET is the only monadic value for CODE, all
1880 the rest are diadic. */
1881 emit_set_insn (target, GEN_INT (val));
1882 return 1;
1883 }
1884 else
1885 {
1886 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
1887
1888 emit_set_insn (temp, GEN_INT (val));
1889 /* For MINUS, the value is subtracted from, since we never
1890 have subtraction of a constant. */
1891 if (code == MINUS)
1892 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
1893 else
1894 emit_set_insn (target,
1895 gen_rtx_fmt_ee (code, mode, source, temp));
1896 return 2;
1897 }
1898 }
1899 }
1900
1901 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
1902 1);
1903 }
1904
1905 /* Return the number of ARM instructions required to synthesize the given
1906 constant. */
1907 static int
1908 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
1909 {
1910 HOST_WIDE_INT temp1;
1911 int num_insns = 0;
1912 do
1913 {
1914 int end;
1915
1916 if (i <= 0)
1917 i += 32;
1918 if (remainder & (3 << (i - 2)))
1919 {
1920 end = i - 8;
1921 if (end < 0)
1922 end += 32;
1923 temp1 = remainder & ((0x0ff << end)
1924 | ((i < end) ? (0xff >> (32 - end)) : 0));
1925 remainder &= ~temp1;
1926 num_insns++;
1927 i -= 6;
1928 }
1929 i -= 2;
1930 } while (remainder);
1931 return num_insns;
1932 }
1933
1934 /* Emit an instruction with the indicated PATTERN. If COND is
1935 non-NULL, conditionalize the execution of the instruction on COND
1936 being true. */
1937
1938 static void
1939 emit_constant_insn (rtx cond, rtx pattern)
1940 {
1941 if (cond)
1942 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
1943 emit_insn (pattern);
1944 }
1945
1946 /* As above, but extra parameter GENERATE which, if clear, suppresses
1947 RTL generation. */
1948 /* ??? This needs more work for thumb2. */
1949
1950 static int
1951 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
1952 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
1953 int generate)
1954 {
1955 int can_invert = 0;
1956 int can_negate = 0;
1957 int can_negate_initial = 0;
1958 int can_shift = 0;
1959 int i;
1960 int num_bits_set = 0;
1961 int set_sign_bit_copies = 0;
1962 int clear_sign_bit_copies = 0;
1963 int clear_zero_bit_copies = 0;
1964 int set_zero_bit_copies = 0;
1965 int insns = 0;
1966 unsigned HOST_WIDE_INT temp1, temp2;
1967 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
1968
1969 /* Find out which operations are safe for a given CODE. Also do a quick
1970 check for degenerate cases; these can occur when DImode operations
1971 are split. */
1972 switch (code)
1973 {
1974 case SET:
1975 can_invert = 1;
1976 can_shift = 1;
1977 can_negate = 1;
1978 break;
1979
1980 case PLUS:
1981 can_negate = 1;
1982 can_negate_initial = 1;
1983 break;
1984
1985 case IOR:
1986 if (remainder == 0xffffffff)
1987 {
1988 if (generate)
1989 emit_constant_insn (cond,
1990 gen_rtx_SET (VOIDmode, target,
1991 GEN_INT (ARM_SIGN_EXTEND (val))));
1992 return 1;
1993 }
1994 if (remainder == 0)
1995 {
1996 if (reload_completed && rtx_equal_p (target, source))
1997 return 0;
1998 if (generate)
1999 emit_constant_insn (cond,
2000 gen_rtx_SET (VOIDmode, target, source));
2001 return 1;
2002 }
2003 break;
2004
2005 case AND:
2006 if (remainder == 0)
2007 {
2008 if (generate)
2009 emit_constant_insn (cond,
2010 gen_rtx_SET (VOIDmode, target, const0_rtx));
2011 return 1;
2012 }
2013 if (remainder == 0xffffffff)
2014 {
2015 if (reload_completed && rtx_equal_p (target, source))
2016 return 0;
2017 if (generate)
2018 emit_constant_insn (cond,
2019 gen_rtx_SET (VOIDmode, target, source));
2020 return 1;
2021 }
2022 can_invert = 1;
2023 break;
2024
2025 case XOR:
2026 if (remainder == 0)
2027 {
2028 if (reload_completed && rtx_equal_p (target, source))
2029 return 0;
2030 if (generate)
2031 emit_constant_insn (cond,
2032 gen_rtx_SET (VOIDmode, target, source));
2033 return 1;
2034 }
2035
2036 /* We don't know how to handle other cases yet. */
2037 gcc_assert (remainder == 0xffffffff);
2038
2039 if (generate)
2040 emit_constant_insn (cond,
2041 gen_rtx_SET (VOIDmode, target,
2042 gen_rtx_NOT (mode, source)));
2043 return 1;
2044
2045 case MINUS:
2046 /* We treat MINUS as (val - source), since (source - val) is always
2047 passed as (source + (-val)). */
2048 if (remainder == 0)
2049 {
2050 if (generate)
2051 emit_constant_insn (cond,
2052 gen_rtx_SET (VOIDmode, target,
2053 gen_rtx_NEG (mode, source)));
2054 return 1;
2055 }
2056 if (const_ok_for_arm (val))
2057 {
2058 if (generate)
2059 emit_constant_insn (cond,
2060 gen_rtx_SET (VOIDmode, target,
2061 gen_rtx_MINUS (mode, GEN_INT (val),
2062 source)));
2063 return 1;
2064 }
2065 can_negate = 1;
2066
2067 break;
2068
2069 default:
2070 gcc_unreachable ();
2071 }
2072
2073 /* If we can do it in one insn get out quickly. */
2074 if (const_ok_for_arm (val)
2075 || (can_negate_initial && const_ok_for_arm (-val))
2076 || (can_invert && const_ok_for_arm (~val)))
2077 {
2078 if (generate)
2079 emit_constant_insn (cond,
2080 gen_rtx_SET (VOIDmode, target,
2081 (source
2082 ? gen_rtx_fmt_ee (code, mode, source,
2083 GEN_INT (val))
2084 : GEN_INT (val))));
2085 return 1;
2086 }
2087
2088 /* Calculate a few attributes that may be useful for specific
2089 optimizations. */
2090 for (i = 31; i >= 0; i--)
2091 {
2092 if ((remainder & (1 << i)) == 0)
2093 clear_sign_bit_copies++;
2094 else
2095 break;
2096 }
2097
2098 for (i = 31; i >= 0; i--)
2099 {
2100 if ((remainder & (1 << i)) != 0)
2101 set_sign_bit_copies++;
2102 else
2103 break;
2104 }
2105
2106 for (i = 0; i <= 31; i++)
2107 {
2108 if ((remainder & (1 << i)) == 0)
2109 clear_zero_bit_copies++;
2110 else
2111 break;
2112 }
2113
2114 for (i = 0; i <= 31; i++)
2115 {
2116 if ((remainder & (1 << i)) != 0)
2117 set_zero_bit_copies++;
2118 else
2119 break;
2120 }
2121
2122 switch (code)
2123 {
2124 case SET:
2125 /* See if we can use movw. */
2126 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2127 {
2128 if (generate)
2129 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2130 GEN_INT (val)));
2131 return 1;
2132 }
2133
2134 /* See if we can do this by sign_extending a constant that is known
2135 to be negative. This is a good, way of doing it, since the shift
2136 may well merge into a subsequent insn. */
2137 if (set_sign_bit_copies > 1)
2138 {
2139 if (const_ok_for_arm
2140 (temp1 = ARM_SIGN_EXTEND (remainder
2141 << (set_sign_bit_copies - 1))))
2142 {
2143 if (generate)
2144 {
2145 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2146 emit_constant_insn (cond,
2147 gen_rtx_SET (VOIDmode, new_src,
2148 GEN_INT (temp1)));
2149 emit_constant_insn (cond,
2150 gen_ashrsi3 (target, new_src,
2151 GEN_INT (set_sign_bit_copies - 1)));
2152 }
2153 return 2;
2154 }
2155 /* For an inverted constant, we will need to set the low bits,
2156 these will be shifted out of harm's way. */
2157 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2158 if (const_ok_for_arm (~temp1))
2159 {
2160 if (generate)
2161 {
2162 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2163 emit_constant_insn (cond,
2164 gen_rtx_SET (VOIDmode, new_src,
2165 GEN_INT (temp1)));
2166 emit_constant_insn (cond,
2167 gen_ashrsi3 (target, new_src,
2168 GEN_INT (set_sign_bit_copies - 1)));
2169 }
2170 return 2;
2171 }
2172 }
2173
2174 /* See if we can calculate the value as the difference between two
2175 valid immediates. */
2176 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2177 {
2178 int topshift = clear_sign_bit_copies & ~1;
2179
2180 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2181 & (0xff000000 >> topshift));
2182
2183 /* If temp1 is zero, then that means the 9 most significant
2184 bits of remainder were 1 and we've caused it to overflow.
2185 When topshift is 0 we don't need to do anything since we
2186 can borrow from 'bit 32'. */
2187 if (temp1 == 0 && topshift != 0)
2188 temp1 = 0x80000000 >> (topshift - 1);
2189
2190 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2191
2192 if (const_ok_for_arm (temp2))
2193 {
2194 if (generate)
2195 {
2196 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2197 emit_constant_insn (cond,
2198 gen_rtx_SET (VOIDmode, new_src,
2199 GEN_INT (temp1)));
2200 emit_constant_insn (cond,
2201 gen_addsi3 (target, new_src,
2202 GEN_INT (-temp2)));
2203 }
2204
2205 return 2;
2206 }
2207 }
2208
2209 /* See if we can generate this by setting the bottom (or the top)
2210 16 bits, and then shifting these into the other half of the
2211 word. We only look for the simplest cases, to do more would cost
2212 too much. Be careful, however, not to generate this when the
2213 alternative would take fewer insns. */
2214 if (val & 0xffff0000)
2215 {
2216 temp1 = remainder & 0xffff0000;
2217 temp2 = remainder & 0x0000ffff;
2218
2219 /* Overlaps outside this range are best done using other methods. */
2220 for (i = 9; i < 24; i++)
2221 {
2222 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2223 && !const_ok_for_arm (temp2))
2224 {
2225 rtx new_src = (subtargets
2226 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2227 : target);
2228 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2229 source, subtargets, generate);
2230 source = new_src;
2231 if (generate)
2232 emit_constant_insn
2233 (cond,
2234 gen_rtx_SET
2235 (VOIDmode, target,
2236 gen_rtx_IOR (mode,
2237 gen_rtx_ASHIFT (mode, source,
2238 GEN_INT (i)),
2239 source)));
2240 return insns + 1;
2241 }
2242 }
2243
2244 /* Don't duplicate cases already considered. */
2245 for (i = 17; i < 24; i++)
2246 {
2247 if (((temp1 | (temp1 >> i)) == remainder)
2248 && !const_ok_for_arm (temp1))
2249 {
2250 rtx new_src = (subtargets
2251 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2252 : target);
2253 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2254 source, subtargets, generate);
2255 source = new_src;
2256 if (generate)
2257 emit_constant_insn
2258 (cond,
2259 gen_rtx_SET (VOIDmode, target,
2260 gen_rtx_IOR
2261 (mode,
2262 gen_rtx_LSHIFTRT (mode, source,
2263 GEN_INT (i)),
2264 source)));
2265 return insns + 1;
2266 }
2267 }
2268 }
2269 break;
2270
2271 case IOR:
2272 case XOR:
2273 /* If we have IOR or XOR, and the constant can be loaded in a
2274 single instruction, and we can find a temporary to put it in,
2275 then this can be done in two instructions instead of 3-4. */
2276 if (subtargets
2277 /* TARGET can't be NULL if SUBTARGETS is 0 */
2278 || (reload_completed && !reg_mentioned_p (target, source)))
2279 {
2280 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2281 {
2282 if (generate)
2283 {
2284 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2285
2286 emit_constant_insn (cond,
2287 gen_rtx_SET (VOIDmode, sub,
2288 GEN_INT (val)));
2289 emit_constant_insn (cond,
2290 gen_rtx_SET (VOIDmode, target,
2291 gen_rtx_fmt_ee (code, mode,
2292 source, sub)));
2293 }
2294 return 2;
2295 }
2296 }
2297
2298 if (code == XOR)
2299 break;
2300
2301 if (set_sign_bit_copies > 8
2302 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2303 {
2304 if (generate)
2305 {
2306 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2307 rtx shift = GEN_INT (set_sign_bit_copies);
2308
2309 emit_constant_insn
2310 (cond,
2311 gen_rtx_SET (VOIDmode, sub,
2312 gen_rtx_NOT (mode,
2313 gen_rtx_ASHIFT (mode,
2314 source,
2315 shift))));
2316 emit_constant_insn
2317 (cond,
2318 gen_rtx_SET (VOIDmode, target,
2319 gen_rtx_NOT (mode,
2320 gen_rtx_LSHIFTRT (mode, sub,
2321 shift))));
2322 }
2323 return 2;
2324 }
2325
2326 if (set_zero_bit_copies > 8
2327 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2328 {
2329 if (generate)
2330 {
2331 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2332 rtx shift = GEN_INT (set_zero_bit_copies);
2333
2334 emit_constant_insn
2335 (cond,
2336 gen_rtx_SET (VOIDmode, sub,
2337 gen_rtx_NOT (mode,
2338 gen_rtx_LSHIFTRT (mode,
2339 source,
2340 shift))));
2341 emit_constant_insn
2342 (cond,
2343 gen_rtx_SET (VOIDmode, target,
2344 gen_rtx_NOT (mode,
2345 gen_rtx_ASHIFT (mode, sub,
2346 shift))));
2347 }
2348 return 2;
2349 }
2350
2351 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2352 {
2353 if (generate)
2354 {
2355 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2356 emit_constant_insn (cond,
2357 gen_rtx_SET (VOIDmode, sub,
2358 gen_rtx_NOT (mode, source)));
2359 source = sub;
2360 if (subtargets)
2361 sub = gen_reg_rtx (mode);
2362 emit_constant_insn (cond,
2363 gen_rtx_SET (VOIDmode, sub,
2364 gen_rtx_AND (mode, source,
2365 GEN_INT (temp1))));
2366 emit_constant_insn (cond,
2367 gen_rtx_SET (VOIDmode, target,
2368 gen_rtx_NOT (mode, sub)));
2369 }
2370 return 3;
2371 }
2372 break;
2373
2374 case AND:
2375 /* See if two shifts will do 2 or more insn's worth of work. */
2376 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2377 {
2378 HOST_WIDE_INT shift_mask = ((0xffffffff
2379 << (32 - clear_sign_bit_copies))
2380 & 0xffffffff);
2381
2382 if ((remainder | shift_mask) != 0xffffffff)
2383 {
2384 if (generate)
2385 {
2386 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2387 insns = arm_gen_constant (AND, mode, cond,
2388 remainder | shift_mask,
2389 new_src, source, subtargets, 1);
2390 source = new_src;
2391 }
2392 else
2393 {
2394 rtx targ = subtargets ? NULL_RTX : target;
2395 insns = arm_gen_constant (AND, mode, cond,
2396 remainder | shift_mask,
2397 targ, source, subtargets, 0);
2398 }
2399 }
2400
2401 if (generate)
2402 {
2403 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2404 rtx shift = GEN_INT (clear_sign_bit_copies);
2405
2406 emit_insn (gen_ashlsi3 (new_src, source, shift));
2407 emit_insn (gen_lshrsi3 (target, new_src, shift));
2408 }
2409
2410 return insns + 2;
2411 }
2412
2413 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2414 {
2415 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2416
2417 if ((remainder | shift_mask) != 0xffffffff)
2418 {
2419 if (generate)
2420 {
2421 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2422
2423 insns = arm_gen_constant (AND, mode, cond,
2424 remainder | shift_mask,
2425 new_src, source, subtargets, 1);
2426 source = new_src;
2427 }
2428 else
2429 {
2430 rtx targ = subtargets ? NULL_RTX : target;
2431
2432 insns = arm_gen_constant (AND, mode, cond,
2433 remainder | shift_mask,
2434 targ, source, subtargets, 0);
2435 }
2436 }
2437
2438 if (generate)
2439 {
2440 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2441 rtx shift = GEN_INT (clear_zero_bit_copies);
2442
2443 emit_insn (gen_lshrsi3 (new_src, source, shift));
2444 emit_insn (gen_ashlsi3 (target, new_src, shift));
2445 }
2446
2447 return insns + 2;
2448 }
2449
2450 break;
2451
2452 default:
2453 break;
2454 }
2455
2456 for (i = 0; i < 32; i++)
2457 if (remainder & (1 << i))
2458 num_bits_set++;
2459
2460 if (code == AND || (can_invert && num_bits_set > 16))
2461 remainder = (~remainder) & 0xffffffff;
2462 else if (code == PLUS && num_bits_set > 16)
2463 remainder = (-remainder) & 0xffffffff;
2464 else
2465 {
2466 can_invert = 0;
2467 can_negate = 0;
2468 }
2469
2470 /* Now try and find a way of doing the job in either two or three
2471 instructions.
2472 We start by looking for the largest block of zeros that are aligned on
2473 a 2-bit boundary, we then fill up the temps, wrapping around to the
2474 top of the word when we drop off the bottom.
2475 In the worst case this code should produce no more than four insns.
2476 Thumb-2 constants are shifted, not rotated, so the MSB is always the
2477 best place to start. */
2478
2479 /* ??? Use thumb2 replicated constants when the high and low halfwords are
2480 the same. */
2481 {
2482 int best_start = 0;
2483 if (!TARGET_THUMB2)
2484 {
2485 int best_consecutive_zeros = 0;
2486
2487 for (i = 0; i < 32; i += 2)
2488 {
2489 int consecutive_zeros = 0;
2490
2491 if (!(remainder & (3 << i)))
2492 {
2493 while ((i < 32) && !(remainder & (3 << i)))
2494 {
2495 consecutive_zeros += 2;
2496 i += 2;
2497 }
2498 if (consecutive_zeros > best_consecutive_zeros)
2499 {
2500 best_consecutive_zeros = consecutive_zeros;
2501 best_start = i - consecutive_zeros;
2502 }
2503 i -= 2;
2504 }
2505 }
2506
2507 /* So long as it won't require any more insns to do so, it's
2508 desirable to emit a small constant (in bits 0...9) in the last
2509 insn. This way there is more chance that it can be combined with
2510 a later addressing insn to form a pre-indexed load or store
2511 operation. Consider:
2512
2513 *((volatile int *)0xe0000100) = 1;
2514 *((volatile int *)0xe0000110) = 2;
2515
2516 We want this to wind up as:
2517
2518 mov rA, #0xe0000000
2519 mov rB, #1
2520 str rB, [rA, #0x100]
2521 mov rB, #2
2522 str rB, [rA, #0x110]
2523
2524 rather than having to synthesize both large constants from scratch.
2525
2526 Therefore, we calculate how many insns would be required to emit
2527 the constant starting from `best_start', and also starting from
2528 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2529 yield a shorter sequence, we may as well use zero. */
2530 if (best_start != 0
2531 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2532 && (count_insns_for_constant (remainder, 0) <=
2533 count_insns_for_constant (remainder, best_start)))
2534 best_start = 0;
2535 }
2536
2537 /* Now start emitting the insns. */
2538 i = best_start;
2539 do
2540 {
2541 int end;
2542
2543 if (i <= 0)
2544 i += 32;
2545 if (remainder & (3 << (i - 2)))
2546 {
2547 end = i - 8;
2548 if (end < 0)
2549 end += 32;
2550 temp1 = remainder & ((0x0ff << end)
2551 | ((i < end) ? (0xff >> (32 - end)) : 0));
2552 remainder &= ~temp1;
2553
2554 if (generate)
2555 {
2556 rtx new_src, temp1_rtx;
2557
2558 if (code == SET || code == MINUS)
2559 {
2560 new_src = (subtargets ? gen_reg_rtx (mode) : target);
2561 if (can_invert && code != MINUS)
2562 temp1 = ~temp1;
2563 }
2564 else
2565 {
2566 if (remainder && subtargets)
2567 new_src = gen_reg_rtx (mode);
2568 else
2569 new_src = target;
2570 if (can_invert)
2571 temp1 = ~temp1;
2572 else if (can_negate)
2573 temp1 = -temp1;
2574 }
2575
2576 temp1 = trunc_int_for_mode (temp1, mode);
2577 temp1_rtx = GEN_INT (temp1);
2578
2579 if (code == SET)
2580 ;
2581 else if (code == MINUS)
2582 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
2583 else
2584 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
2585
2586 emit_constant_insn (cond,
2587 gen_rtx_SET (VOIDmode, new_src,
2588 temp1_rtx));
2589 source = new_src;
2590 }
2591
2592 if (code == SET)
2593 {
2594 can_invert = 0;
2595 code = PLUS;
2596 }
2597 else if (code == MINUS)
2598 code = PLUS;
2599
2600 insns++;
2601 if (TARGET_ARM)
2602 i -= 6;
2603 else
2604 i -= 7;
2605 }
2606 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
2607 shifts. */
2608 if (TARGET_ARM)
2609 i -= 2;
2610 else
2611 i--;
2612 }
2613 while (remainder);
2614 }
2615
2616 return insns;
2617 }
2618
2619 /* Canonicalize a comparison so that we are more likely to recognize it.
2620 This can be done for a few constant compares, where we can make the
2621 immediate value easier to load. */
2622
2623 enum rtx_code
2624 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
2625 rtx * op1)
2626 {
2627 unsigned HOST_WIDE_INT i = INTVAL (*op1);
2628 unsigned HOST_WIDE_INT maxval;
2629 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
2630
2631 switch (code)
2632 {
2633 case EQ:
2634 case NE:
2635 return code;
2636
2637 case GT:
2638 case LE:
2639 if (i != maxval
2640 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2641 {
2642 *op1 = GEN_INT (i + 1);
2643 return code == GT ? GE : LT;
2644 }
2645 break;
2646
2647 case GE:
2648 case LT:
2649 if (i != ~maxval
2650 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2651 {
2652 *op1 = GEN_INT (i - 1);
2653 return code == GE ? GT : LE;
2654 }
2655 break;
2656
2657 case GTU:
2658 case LEU:
2659 if (i != ~((unsigned HOST_WIDE_INT) 0)
2660 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2661 {
2662 *op1 = GEN_INT (i + 1);
2663 return code == GTU ? GEU : LTU;
2664 }
2665 break;
2666
2667 case GEU:
2668 case LTU:
2669 if (i != 0
2670 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2671 {
2672 *op1 = GEN_INT (i - 1);
2673 return code == GEU ? GTU : LEU;
2674 }
2675 break;
2676
2677 default:
2678 gcc_unreachable ();
2679 }
2680
2681 return code;
2682 }
2683
2684
2685 /* Define how to find the value returned by a function. */
2686
2687 rtx
2688 arm_function_value(const_tree type, const_tree func ATTRIBUTE_UNUSED)
2689 {
2690 enum machine_mode mode;
2691 int unsignedp ATTRIBUTE_UNUSED;
2692 rtx r ATTRIBUTE_UNUSED;
2693
2694 mode = TYPE_MODE (type);
2695 /* Promote integer types. */
2696 if (INTEGRAL_TYPE_P (type))
2697 PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
2698
2699 /* Promotes small structs returned in a register to full-word size
2700 for big-endian AAPCS. */
2701 if (arm_return_in_msb (type))
2702 {
2703 HOST_WIDE_INT size = int_size_in_bytes (type);
2704 if (size % UNITS_PER_WORD != 0)
2705 {
2706 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2707 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
2708 }
2709 }
2710
2711 return LIBCALL_VALUE(mode);
2712 }
2713
2714 /* Determine the amount of memory needed to store the possible return
2715 registers of an untyped call. */
2716 int
2717 arm_apply_result_size (void)
2718 {
2719 int size = 16;
2720
2721 if (TARGET_ARM)
2722 {
2723 if (TARGET_HARD_FLOAT_ABI)
2724 {
2725 if (TARGET_FPA)
2726 size += 12;
2727 if (TARGET_MAVERICK)
2728 size += 8;
2729 }
2730 if (TARGET_IWMMXT_ABI)
2731 size += 8;
2732 }
2733
2734 return size;
2735 }
2736
2737 /* Decide whether a type should be returned in memory (true)
2738 or in a register (false). This is called by the macro
2739 RETURN_IN_MEMORY. */
2740 int
2741 arm_return_in_memory (const_tree type)
2742 {
2743 HOST_WIDE_INT size;
2744
2745 size = int_size_in_bytes (type);
2746
2747 /* Vector values should be returned using ARM registers, not memory (unless
2748 they're over 16 bytes, which will break since we only have four
2749 call-clobbered registers to play with). */
2750 if (TREE_CODE (type) == VECTOR_TYPE)
2751 return (size < 0 || size > (4 * UNITS_PER_WORD));
2752
2753 if (!AGGREGATE_TYPE_P (type) &&
2754 !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
2755 /* All simple types are returned in registers.
2756 For AAPCS, complex types are treated the same as aggregates. */
2757 return 0;
2758
2759 if (arm_abi != ARM_ABI_APCS)
2760 {
2761 /* ATPCS and later return aggregate types in memory only if they are
2762 larger than a word (or are variable size). */
2763 return (size < 0 || size > UNITS_PER_WORD);
2764 }
2765
2766 /* For the arm-wince targets we choose to be compatible with Microsoft's
2767 ARM and Thumb compilers, which always return aggregates in memory. */
2768 #ifndef ARM_WINCE
2769 /* All structures/unions bigger than one word are returned in memory.
2770 Also catch the case where int_size_in_bytes returns -1. In this case
2771 the aggregate is either huge or of variable size, and in either case
2772 we will want to return it via memory and not in a register. */
2773 if (size < 0 || size > UNITS_PER_WORD)
2774 return 1;
2775
2776 if (TREE_CODE (type) == RECORD_TYPE)
2777 {
2778 tree field;
2779
2780 /* For a struct the APCS says that we only return in a register
2781 if the type is 'integer like' and every addressable element
2782 has an offset of zero. For practical purposes this means
2783 that the structure can have at most one non bit-field element
2784 and that this element must be the first one in the structure. */
2785
2786 /* Find the first field, ignoring non FIELD_DECL things which will
2787 have been created by C++. */
2788 for (field = TYPE_FIELDS (type);
2789 field && TREE_CODE (field) != FIELD_DECL;
2790 field = TREE_CHAIN (field))
2791 continue;
2792
2793 if (field == NULL)
2794 return 0; /* An empty structure. Allowed by an extension to ANSI C. */
2795
2796 /* Check that the first field is valid for returning in a register. */
2797
2798 /* ... Floats are not allowed */
2799 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2800 return 1;
2801
2802 /* ... Aggregates that are not themselves valid for returning in
2803 a register are not allowed. */
2804 if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2805 return 1;
2806
2807 /* Now check the remaining fields, if any. Only bitfields are allowed,
2808 since they are not addressable. */
2809 for (field = TREE_CHAIN (field);
2810 field;
2811 field = TREE_CHAIN (field))
2812 {
2813 if (TREE_CODE (field) != FIELD_DECL)
2814 continue;
2815
2816 if (!DECL_BIT_FIELD_TYPE (field))
2817 return 1;
2818 }
2819
2820 return 0;
2821 }
2822
2823 if (TREE_CODE (type) == UNION_TYPE)
2824 {
2825 tree field;
2826
2827 /* Unions can be returned in registers if every element is
2828 integral, or can be returned in an integer register. */
2829 for (field = TYPE_FIELDS (type);
2830 field;
2831 field = TREE_CHAIN (field))
2832 {
2833 if (TREE_CODE (field) != FIELD_DECL)
2834 continue;
2835
2836 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2837 return 1;
2838
2839 if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2840 return 1;
2841 }
2842
2843 return 0;
2844 }
2845 #endif /* not ARM_WINCE */
2846
2847 /* Return all other types in memory. */
2848 return 1;
2849 }
2850
2851 /* Indicate whether or not words of a double are in big-endian order. */
2852
2853 int
2854 arm_float_words_big_endian (void)
2855 {
2856 if (TARGET_MAVERICK)
2857 return 0;
2858
2859 /* For FPA, float words are always big-endian. For VFP, floats words
2860 follow the memory system mode. */
2861
2862 if (TARGET_FPA)
2863 {
2864 return 1;
2865 }
2866
2867 if (TARGET_VFP)
2868 return (TARGET_BIG_END ? 1 : 0);
2869
2870 return 1;
2871 }
2872
2873 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2874 for a call to a function whose data type is FNTYPE.
2875 For a library call, FNTYPE is NULL. */
2876 void
2877 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
2878 rtx libname ATTRIBUTE_UNUSED,
2879 tree fndecl ATTRIBUTE_UNUSED)
2880 {
2881 /* On the ARM, the offset starts at 0. */
2882 pcum->nregs = 0;
2883 pcum->iwmmxt_nregs = 0;
2884 pcum->can_split = true;
2885
2886 /* Varargs vectors are treated the same as long long.
2887 named_count avoids having to change the way arm handles 'named' */
2888 pcum->named_count = 0;
2889 pcum->nargs = 0;
2890
2891 if (TARGET_REALLY_IWMMXT && fntype)
2892 {
2893 tree fn_arg;
2894
2895 for (fn_arg = TYPE_ARG_TYPES (fntype);
2896 fn_arg;
2897 fn_arg = TREE_CHAIN (fn_arg))
2898 pcum->named_count += 1;
2899
2900 if (! pcum->named_count)
2901 pcum->named_count = INT_MAX;
2902 }
2903 }
2904
2905
2906 /* Return true if mode/type need doubleword alignment. */
2907 bool
2908 arm_needs_doubleword_align (enum machine_mode mode, tree type)
2909 {
2910 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
2911 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
2912 }
2913
2914
2915 /* Determine where to put an argument to a function.
2916 Value is zero to push the argument on the stack,
2917 or a hard register in which to store the argument.
2918
2919 MODE is the argument's machine mode.
2920 TYPE is the data type of the argument (as a tree).
2921 This is null for libcalls where that information may
2922 not be available.
2923 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2924 the preceding args and about the function being called.
2925 NAMED is nonzero if this argument is a named parameter
2926 (otherwise it is an extra parameter matching an ellipsis). */
2927
2928 rtx
2929 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2930 tree type, int named)
2931 {
2932 int nregs;
2933
2934 /* Varargs vectors are treated the same as long long.
2935 named_count avoids having to change the way arm handles 'named' */
2936 if (TARGET_IWMMXT_ABI
2937 && arm_vector_mode_supported_p (mode)
2938 && pcum->named_count > pcum->nargs + 1)
2939 {
2940 if (pcum->iwmmxt_nregs <= 9)
2941 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
2942 else
2943 {
2944 pcum->can_split = false;
2945 return NULL_RTX;
2946 }
2947 }
2948
2949 /* Put doubleword aligned quantities in even register pairs. */
2950 if (pcum->nregs & 1
2951 && ARM_DOUBLEWORD_ALIGN
2952 && arm_needs_doubleword_align (mode, type))
2953 pcum->nregs++;
2954
2955 if (mode == VOIDmode)
2956 /* Pick an arbitrary value for operand 2 of the call insn. */
2957 return const0_rtx;
2958
2959 /* Only allow splitting an arg between regs and memory if all preceding
2960 args were allocated to regs. For args passed by reference we only count
2961 the reference pointer. */
2962 if (pcum->can_split)
2963 nregs = 1;
2964 else
2965 nregs = ARM_NUM_REGS2 (mode, type);
2966
2967 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
2968 return NULL_RTX;
2969
2970 return gen_rtx_REG (mode, pcum->nregs);
2971 }
2972
2973 static int
2974 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2975 tree type, bool named ATTRIBUTE_UNUSED)
2976 {
2977 int nregs = pcum->nregs;
2978
2979 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
2980 return 0;
2981
2982 if (NUM_ARG_REGS > nregs
2983 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
2984 && pcum->can_split)
2985 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
2986
2987 return 0;
2988 }
2989
2990 /* Variable sized types are passed by reference. This is a GCC
2991 extension to the ARM ABI. */
2992
2993 static bool
2994 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2995 enum machine_mode mode ATTRIBUTE_UNUSED,
2996 const_tree type, bool named ATTRIBUTE_UNUSED)
2997 {
2998 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
2999 }
3000 \f
3001 /* Encode the current state of the #pragma [no_]long_calls. */
3002 typedef enum
3003 {
3004 OFF, /* No #pragma [no_]long_calls is in effect. */
3005 LONG, /* #pragma long_calls is in effect. */
3006 SHORT /* #pragma no_long_calls is in effect. */
3007 } arm_pragma_enum;
3008
3009 static arm_pragma_enum arm_pragma_long_calls = OFF;
3010
3011 void
3012 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3013 {
3014 arm_pragma_long_calls = LONG;
3015 }
3016
3017 void
3018 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3019 {
3020 arm_pragma_long_calls = SHORT;
3021 }
3022
3023 void
3024 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3025 {
3026 arm_pragma_long_calls = OFF;
3027 }
3028 \f
3029 /* Table of machine attributes. */
3030 const struct attribute_spec arm_attribute_table[] =
3031 {
3032 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
3033 /* Function calls made to this symbol must be done indirectly, because
3034 it may lie outside of the 26 bit addressing range of a normal function
3035 call. */
3036 { "long_call", 0, 0, false, true, true, NULL },
3037 /* Whereas these functions are always known to reside within the 26 bit
3038 addressing range. */
3039 { "short_call", 0, 0, false, true, true, NULL },
3040 /* Interrupt Service Routines have special prologue and epilogue requirements. */
3041 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
3042 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
3043 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3044 #ifdef ARM_PE
3045 /* ARM/PE has three new attributes:
3046 interfacearm - ?
3047 dllexport - for exporting a function/variable that will live in a dll
3048 dllimport - for importing a function/variable from a dll
3049
3050 Microsoft allows multiple declspecs in one __declspec, separating
3051 them with spaces. We do NOT support this. Instead, use __declspec
3052 multiple times.
3053 */
3054 { "dllimport", 0, 0, true, false, false, NULL },
3055 { "dllexport", 0, 0, true, false, false, NULL },
3056 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3057 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
3058 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
3059 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
3060 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
3061 #endif
3062 { NULL, 0, 0, false, false, false, NULL }
3063 };
3064
3065 /* Handle an attribute requiring a FUNCTION_DECL;
3066 arguments as in struct attribute_spec.handler. */
3067 static tree
3068 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
3069 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
3070 {
3071 if (TREE_CODE (*node) != FUNCTION_DECL)
3072 {
3073 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3074 IDENTIFIER_POINTER (name));
3075 *no_add_attrs = true;
3076 }
3077
3078 return NULL_TREE;
3079 }
3080
3081 /* Handle an "interrupt" or "isr" attribute;
3082 arguments as in struct attribute_spec.handler. */
3083 static tree
3084 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
3085 bool *no_add_attrs)
3086 {
3087 if (DECL_P (*node))
3088 {
3089 if (TREE_CODE (*node) != FUNCTION_DECL)
3090 {
3091 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3092 IDENTIFIER_POINTER (name));
3093 *no_add_attrs = true;
3094 }
3095 /* FIXME: the argument if any is checked for type attributes;
3096 should it be checked for decl ones? */
3097 }
3098 else
3099 {
3100 if (TREE_CODE (*node) == FUNCTION_TYPE
3101 || TREE_CODE (*node) == METHOD_TYPE)
3102 {
3103 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
3104 {
3105 warning (OPT_Wattributes, "%qs attribute ignored",
3106 IDENTIFIER_POINTER (name));
3107 *no_add_attrs = true;
3108 }
3109 }
3110 else if (TREE_CODE (*node) == POINTER_TYPE
3111 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
3112 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
3113 && arm_isr_value (args) != ARM_FT_UNKNOWN)
3114 {
3115 *node = build_variant_type_copy (*node);
3116 TREE_TYPE (*node) = build_type_attribute_variant
3117 (TREE_TYPE (*node),
3118 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
3119 *no_add_attrs = true;
3120 }
3121 else
3122 {
3123 /* Possibly pass this attribute on from the type to a decl. */
3124 if (flags & ((int) ATTR_FLAG_DECL_NEXT
3125 | (int) ATTR_FLAG_FUNCTION_NEXT
3126 | (int) ATTR_FLAG_ARRAY_NEXT))
3127 {
3128 *no_add_attrs = true;
3129 return tree_cons (name, args, NULL_TREE);
3130 }
3131 else
3132 {
3133 warning (OPT_Wattributes, "%qs attribute ignored",
3134 IDENTIFIER_POINTER (name));
3135 }
3136 }
3137 }
3138
3139 return NULL_TREE;
3140 }
3141
3142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
3143 /* Handle the "notshared" attribute. This attribute is another way of
3144 requesting hidden visibility. ARM's compiler supports
3145 "__declspec(notshared)"; we support the same thing via an
3146 attribute. */
3147
3148 static tree
3149 arm_handle_notshared_attribute (tree *node,
3150 tree name ATTRIBUTE_UNUSED,
3151 tree args ATTRIBUTE_UNUSED,
3152 int flags ATTRIBUTE_UNUSED,
3153 bool *no_add_attrs)
3154 {
3155 tree decl = TYPE_NAME (*node);
3156
3157 if (decl)
3158 {
3159 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
3160 DECL_VISIBILITY_SPECIFIED (decl) = 1;
3161 *no_add_attrs = false;
3162 }
3163 return NULL_TREE;
3164 }
3165 #endif
3166
3167 /* Return 0 if the attributes for two types are incompatible, 1 if they
3168 are compatible, and 2 if they are nearly compatible (which causes a
3169 warning to be generated). */
3170 static int
3171 arm_comp_type_attributes (const_tree type1, const_tree type2)
3172 {
3173 int l1, l2, s1, s2;
3174
3175 /* Check for mismatch of non-default calling convention. */
3176 if (TREE_CODE (type1) != FUNCTION_TYPE)
3177 return 1;
3178
3179 /* Check for mismatched call attributes. */
3180 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
3181 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
3182 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
3183 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
3184
3185 /* Only bother to check if an attribute is defined. */
3186 if (l1 | l2 | s1 | s2)
3187 {
3188 /* If one type has an attribute, the other must have the same attribute. */
3189 if ((l1 != l2) || (s1 != s2))
3190 return 0;
3191
3192 /* Disallow mixed attributes. */
3193 if ((l1 & s2) || (l2 & s1))
3194 return 0;
3195 }
3196
3197 /* Check for mismatched ISR attribute. */
3198 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
3199 if (! l1)
3200 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
3201 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
3202 if (! l2)
3203 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
3204 if (l1 != l2)
3205 return 0;
3206
3207 return 1;
3208 }
3209
3210 /* Assigns default attributes to newly defined type. This is used to
3211 set short_call/long_call attributes for function types of
3212 functions defined inside corresponding #pragma scopes. */
3213 static void
3214 arm_set_default_type_attributes (tree type)
3215 {
3216 /* Add __attribute__ ((long_call)) to all functions, when
3217 inside #pragma long_calls or __attribute__ ((short_call)),
3218 when inside #pragma no_long_calls. */
3219 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
3220 {
3221 tree type_attr_list, attr_name;
3222 type_attr_list = TYPE_ATTRIBUTES (type);
3223
3224 if (arm_pragma_long_calls == LONG)
3225 attr_name = get_identifier ("long_call");
3226 else if (arm_pragma_long_calls == SHORT)
3227 attr_name = get_identifier ("short_call");
3228 else
3229 return;
3230
3231 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
3232 TYPE_ATTRIBUTES (type) = type_attr_list;
3233 }
3234 }
3235 \f
3236 /* Return true if DECL is known to be linked into section SECTION. */
3237
3238 static bool
3239 arm_function_in_section_p (tree decl, section *section)
3240 {
3241 /* We can only be certain about functions defined in the same
3242 compilation unit. */
3243 if (!TREE_STATIC (decl))
3244 return false;
3245
3246 /* Make sure that SYMBOL always binds to the definition in this
3247 compilation unit. */
3248 if (!targetm.binds_local_p (decl))
3249 return false;
3250
3251 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
3252 if (!DECL_SECTION_NAME (decl))
3253 {
3254 /* Only cater for unit-at-a-time mode, where we know that the user
3255 cannot later specify a section for DECL. */
3256 if (!flag_unit_at_a_time)
3257 return false;
3258
3259 /* Make sure that we will not create a unique section for DECL. */
3260 if (flag_function_sections || DECL_ONE_ONLY (decl))
3261 return false;
3262 }
3263
3264 return function_section (decl) == section;
3265 }
3266
3267 /* Return nonzero if a 32-bit "long_call" should be generated for
3268 a call from the current function to DECL. We generate a long_call
3269 if the function:
3270
3271 a. has an __attribute__((long call))
3272 or b. is within the scope of a #pragma long_calls
3273 or c. the -mlong-calls command line switch has been specified
3274
3275 However we do not generate a long call if the function:
3276
3277 d. has an __attribute__ ((short_call))
3278 or e. is inside the scope of a #pragma no_long_calls
3279 or f. is defined in the same section as the current function. */
3280
3281 bool
3282 arm_is_long_call_p (tree decl)
3283 {
3284 tree attrs;
3285
3286 if (!decl)
3287 return TARGET_LONG_CALLS;
3288
3289 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
3290 if (lookup_attribute ("short_call", attrs))
3291 return false;
3292
3293 /* For "f", be conservative, and only cater for cases in which the
3294 whole of the current function is placed in the same section. */
3295 if (!flag_reorder_blocks_and_partition
3296 && arm_function_in_section_p (decl, current_function_section ()))
3297 return false;
3298
3299 if (lookup_attribute ("long_call", attrs))
3300 return true;
3301
3302 return TARGET_LONG_CALLS;
3303 }
3304
3305 /* Return nonzero if it is ok to make a tail-call to DECL. */
3306 static bool
3307 arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3308 {
3309 unsigned long func_type;
3310
3311 if (cfun->machine->sibcall_blocked)
3312 return false;
3313
3314 /* Never tailcall something for which we have no decl, or if we
3315 are in Thumb mode. */
3316 if (decl == NULL || TARGET_THUMB)
3317 return false;
3318
3319 /* The PIC register is live on entry to VxWorks PLT entries, so we
3320 must make the call before restoring the PIC register. */
3321 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
3322 return false;
3323
3324 /* Cannot tail-call to long calls, since these are out of range of
3325 a branch instruction. */
3326 if (arm_is_long_call_p (decl))
3327 return false;
3328
3329 /* If we are interworking and the function is not declared static
3330 then we can't tail-call it unless we know that it exists in this
3331 compilation unit (since it might be a Thumb routine). */
3332 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
3333 return false;
3334
3335 func_type = arm_current_func_type ();
3336 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
3337 if (IS_INTERRUPT (func_type))
3338 return false;
3339
3340 /* Never tailcall if function may be called with a misaligned SP. */
3341 if (IS_STACKALIGN (func_type))
3342 return false;
3343
3344 /* Everything else is ok. */
3345 return true;
3346 }
3347
3348 \f
3349 /* Addressing mode support functions. */
3350
3351 /* Return nonzero if X is a legitimate immediate operand when compiling
3352 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
3353 int
3354 legitimate_pic_operand_p (rtx x)
3355 {
3356 if (GET_CODE (x) == SYMBOL_REF
3357 || (GET_CODE (x) == CONST
3358 && GET_CODE (XEXP (x, 0)) == PLUS
3359 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3360 return 0;
3361
3362 return 1;
3363 }
3364
3365 /* Record that the current function needs a PIC register. Initialize
3366 cfun->machine->pic_reg if we have not already done so. */
3367
3368 static void
3369 require_pic_register (void)
3370 {
3371 /* A lot of the logic here is made obscure by the fact that this
3372 routine gets called as part of the rtx cost estimation process.
3373 We don't want those calls to affect any assumptions about the real
3374 function; and further, we can't call entry_of_function() until we
3375 start the real expansion process. */
3376 if (!current_function_uses_pic_offset_table)
3377 {
3378 gcc_assert (can_create_pseudo_p ());
3379 if (arm_pic_register != INVALID_REGNUM)
3380 {
3381 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
3382
3383 /* Play games to avoid marking the function as needing pic
3384 if we are being called as part of the cost-estimation
3385 process. */
3386 if (current_ir_type () != IR_GIMPLE)
3387 current_function_uses_pic_offset_table = 1;
3388 }
3389 else
3390 {
3391 rtx seq;
3392
3393 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
3394
3395 /* Play games to avoid marking the function as needing pic
3396 if we are being called as part of the cost-estimation
3397 process. */
3398 if (current_ir_type () != IR_GIMPLE)
3399 {
3400 current_function_uses_pic_offset_table = 1;
3401 start_sequence ();
3402
3403 arm_load_pic_register (0UL);
3404
3405 seq = get_insns ();
3406 end_sequence ();
3407 emit_insn_after (seq, entry_of_function ());
3408 }
3409 }
3410 }
3411 }
3412
3413 rtx
3414 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
3415 {
3416 if (GET_CODE (orig) == SYMBOL_REF
3417 || GET_CODE (orig) == LABEL_REF)
3418 {
3419 rtx pic_ref, address;
3420 rtx insn;
3421 int subregs = 0;
3422
3423 /* If this function doesn't have a pic register, create one now. */
3424 require_pic_register ();
3425
3426 if (reg == 0)
3427 {
3428 gcc_assert (can_create_pseudo_p ());
3429 reg = gen_reg_rtx (Pmode);
3430
3431 subregs = 1;
3432 }
3433
3434 if (subregs)
3435 address = gen_reg_rtx (Pmode);
3436 else
3437 address = reg;
3438
3439 if (TARGET_ARM)
3440 emit_insn (gen_pic_load_addr_arm (address, orig));
3441 else if (TARGET_THUMB2)
3442 emit_insn (gen_pic_load_addr_thumb2 (address, orig));
3443 else /* TARGET_THUMB1 */
3444 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
3445
3446 /* VxWorks does not impose a fixed gap between segments; the run-time
3447 gap can be different from the object-file gap. We therefore can't
3448 use GOTOFF unless we are absolutely sure that the symbol is in the
3449 same segment as the GOT. Unfortunately, the flexibility of linker
3450 scripts means that we can't be sure of that in general, so assume
3451 that GOTOFF is never valid on VxWorks. */
3452 if ((GET_CODE (orig) == LABEL_REF
3453 || (GET_CODE (orig) == SYMBOL_REF &&
3454 SYMBOL_REF_LOCAL_P (orig)))
3455 && NEED_GOT_RELOC
3456 && !TARGET_VXWORKS_RTP)
3457 pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
3458 else
3459 {
3460 pic_ref = gen_const_mem (Pmode,
3461 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
3462 address));
3463 }
3464
3465 insn = emit_move_insn (reg, pic_ref);
3466
3467 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3468 by loop. */
3469 set_unique_reg_note (insn, REG_EQUAL, orig);
3470
3471 return reg;
3472 }
3473 else if (GET_CODE (orig) == CONST)
3474 {
3475 rtx base, offset;
3476
3477 if (GET_CODE (XEXP (orig, 0)) == PLUS
3478 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
3479 return orig;
3480
3481 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
3482 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
3483 return orig;
3484
3485 if (reg == 0)
3486 {
3487 gcc_assert (can_create_pseudo_p ());
3488 reg = gen_reg_rtx (Pmode);
3489 }
3490
3491 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3492
3493 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3494 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3495 base == reg ? 0 : reg);
3496
3497 if (GET_CODE (offset) == CONST_INT)
3498 {
3499 /* The base register doesn't really matter, we only want to
3500 test the index for the appropriate mode. */
3501 if (!arm_legitimate_index_p (mode, offset, SET, 0))
3502 {
3503 gcc_assert (can_create_pseudo_p ());
3504 offset = force_reg (Pmode, offset);
3505 }
3506
3507 if (GET_CODE (offset) == CONST_INT)
3508 return plus_constant (base, INTVAL (offset));
3509 }
3510
3511 if (GET_MODE_SIZE (mode) > 4
3512 && (GET_MODE_CLASS (mode) == MODE_INT
3513 || TARGET_SOFT_FLOAT))
3514 {
3515 emit_insn (gen_addsi3 (reg, base, offset));
3516 return reg;
3517 }
3518
3519 return gen_rtx_PLUS (Pmode, base, offset);
3520 }
3521
3522 return orig;
3523 }
3524
3525
3526 /* Find a spare register to use during the prolog of a function. */
3527
3528 static int
3529 thumb_find_work_register (unsigned long pushed_regs_mask)
3530 {
3531 int reg;
3532
3533 /* Check the argument registers first as these are call-used. The
3534 register allocation order means that sometimes r3 might be used
3535 but earlier argument registers might not, so check them all. */
3536 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
3537 if (!df_regs_ever_live_p (reg))
3538 return reg;
3539
3540 /* Before going on to check the call-saved registers we can try a couple
3541 more ways of deducing that r3 is available. The first is when we are
3542 pushing anonymous arguments onto the stack and we have less than 4
3543 registers worth of fixed arguments(*). In this case r3 will be part of
3544 the variable argument list and so we can be sure that it will be
3545 pushed right at the start of the function. Hence it will be available
3546 for the rest of the prologue.
3547 (*): ie current_function_pretend_args_size is greater than 0. */
3548 if (cfun->machine->uses_anonymous_args
3549 && current_function_pretend_args_size > 0)
3550 return LAST_ARG_REGNUM;
3551
3552 /* The other case is when we have fixed arguments but less than 4 registers
3553 worth. In this case r3 might be used in the body of the function, but
3554 it is not being used to convey an argument into the function. In theory
3555 we could just check current_function_args_size to see how many bytes are
3556 being passed in argument registers, but it seems that it is unreliable.
3557 Sometimes it will have the value 0 when in fact arguments are being
3558 passed. (See testcase execute/20021111-1.c for an example). So we also
3559 check the args_info.nregs field as well. The problem with this field is
3560 that it makes no allowances for arguments that are passed to the
3561 function but which are not used. Hence we could miss an opportunity
3562 when a function has an unused argument in r3. But it is better to be
3563 safe than to be sorry. */
3564 if (! cfun->machine->uses_anonymous_args
3565 && current_function_args_size >= 0
3566 && current_function_args_size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
3567 && cfun->args_info.nregs < 4)
3568 return LAST_ARG_REGNUM;
3569
3570 /* Otherwise look for a call-saved register that is going to be pushed. */
3571 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
3572 if (pushed_regs_mask & (1 << reg))
3573 return reg;
3574
3575 if (TARGET_THUMB2)
3576 {
3577 /* Thumb-2 can use high regs. */
3578 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
3579 if (pushed_regs_mask & (1 << reg))
3580 return reg;
3581 }
3582 /* Something went wrong - thumb_compute_save_reg_mask()
3583 should have arranged for a suitable register to be pushed. */
3584 gcc_unreachable ();
3585 }
3586
3587 static GTY(()) int pic_labelno;
3588
3589 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
3590 low register. */
3591
3592 void
3593 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
3594 {
3595 rtx l1, labelno, pic_tmp, pic_tmp2, pic_rtx, pic_reg;
3596 rtx global_offset_table;
3597
3598 if (current_function_uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
3599 return;
3600
3601 gcc_assert (flag_pic);
3602
3603 pic_reg = cfun->machine->pic_reg;
3604 if (TARGET_VXWORKS_RTP)
3605 {
3606 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
3607 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3608 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3609
3610 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
3611
3612 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
3613 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
3614 }
3615 else
3616 {
3617 /* We use an UNSPEC rather than a LABEL_REF because this label
3618 never appears in the code stream. */
3619
3620 labelno = GEN_INT (pic_labelno++);
3621 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
3622 l1 = gen_rtx_CONST (VOIDmode, l1);
3623
3624 global_offset_table
3625 = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3626 /* On the ARM the PC register contains 'dot + 8' at the time of the
3627 addition, on the Thumb it is 'dot + 4'. */
3628 pic_tmp = plus_constant (l1, TARGET_ARM ? 8 : 4);
3629 if (GOT_PCREL)
3630 {
3631 pic_tmp2 = gen_rtx_PLUS (Pmode, global_offset_table, pc_rtx);
3632 pic_tmp2 = gen_rtx_CONST (VOIDmode, pic_tmp2);
3633 }
3634 else
3635 pic_tmp2 = gen_rtx_CONST (VOIDmode, global_offset_table);
3636
3637 pic_rtx = gen_rtx_MINUS (Pmode, pic_tmp2, pic_tmp);
3638 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3639
3640 if (TARGET_ARM)
3641 {
3642 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3643 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
3644 }
3645 else if (TARGET_THUMB2)
3646 {
3647 /* Thumb-2 only allows very limited access to the PC. Calculate the
3648 address in a temporary register. */
3649 if (arm_pic_register != INVALID_REGNUM)
3650 {
3651 pic_tmp = gen_rtx_REG (SImode,
3652 thumb_find_work_register (saved_regs));
3653 }
3654 else
3655 {
3656 gcc_assert (can_create_pseudo_p ());
3657 pic_tmp = gen_reg_rtx (Pmode);
3658 }
3659
3660 emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
3661 emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
3662 emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
3663 }
3664 else /* TARGET_THUMB1 */
3665 {
3666 if (arm_pic_register != INVALID_REGNUM
3667 && REGNO (pic_reg) > LAST_LO_REGNUM)
3668 {
3669 /* We will have pushed the pic register, so we should always be
3670 able to find a work register. */
3671 pic_tmp = gen_rtx_REG (SImode,
3672 thumb_find_work_register (saved_regs));
3673 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
3674 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
3675 }
3676 else
3677 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
3678 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
3679 }
3680 }
3681
3682 /* Need to emit this whether or not we obey regdecls,
3683 since setjmp/longjmp can cause life info to screw up. */
3684 emit_insn (gen_rtx_USE (VOIDmode, pic_reg));
3685 }
3686
3687
3688 /* Return nonzero if X is valid as an ARM state addressing register. */
3689 static int
3690 arm_address_register_rtx_p (rtx x, int strict_p)
3691 {
3692 int regno;
3693
3694 if (GET_CODE (x) != REG)
3695 return 0;
3696
3697 regno = REGNO (x);
3698
3699 if (strict_p)
3700 return ARM_REGNO_OK_FOR_BASE_P (regno);
3701
3702 return (regno <= LAST_ARM_REGNUM
3703 || regno >= FIRST_PSEUDO_REGISTER
3704 || regno == FRAME_POINTER_REGNUM
3705 || regno == ARG_POINTER_REGNUM);
3706 }
3707
3708 /* Return TRUE if this rtx is the difference of a symbol and a label,
3709 and will reduce to a PC-relative relocation in the object file.
3710 Expressions like this can be left alone when generating PIC, rather
3711 than forced through the GOT. */
3712 static int
3713 pcrel_constant_p (rtx x)
3714 {
3715 if (GET_CODE (x) == MINUS)
3716 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
3717
3718 return FALSE;
3719 }
3720
3721 /* Return nonzero if X is a valid ARM state address operand. */
3722 int
3723 arm_legitimate_address_p (enum machine_mode mode, rtx x, RTX_CODE outer,
3724 int strict_p)
3725 {
3726 bool use_ldrd;
3727 enum rtx_code code = GET_CODE (x);
3728
3729 if (arm_address_register_rtx_p (x, strict_p))
3730 return 1;
3731
3732 use_ldrd = (TARGET_LDRD
3733 && (mode == DImode
3734 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3735
3736 if (code == POST_INC || code == PRE_DEC
3737 || ((code == PRE_INC || code == POST_DEC)
3738 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3739 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3740
3741 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3742 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3743 && GET_CODE (XEXP (x, 1)) == PLUS
3744 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3745 {
3746 rtx addend = XEXP (XEXP (x, 1), 1);
3747
3748 /* Don't allow ldrd post increment by register because it's hard
3749 to fixup invalid register choices. */
3750 if (use_ldrd
3751 && GET_CODE (x) == POST_MODIFY
3752 && GET_CODE (addend) == REG)
3753 return 0;
3754
3755 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
3756 && arm_legitimate_index_p (mode, addend, outer, strict_p));
3757 }
3758
3759 /* After reload constants split into minipools will have addresses
3760 from a LABEL_REF. */
3761 else if (reload_completed
3762 && (code == LABEL_REF
3763 || (code == CONST
3764 && GET_CODE (XEXP (x, 0)) == PLUS
3765 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3766 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3767 return 1;
3768
3769 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
3770 return 0;
3771
3772 else if (code == PLUS)
3773 {
3774 rtx xop0 = XEXP (x, 0);
3775 rtx xop1 = XEXP (x, 1);
3776
3777 return ((arm_address_register_rtx_p (xop0, strict_p)
3778 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
3779 || (arm_address_register_rtx_p (xop1, strict_p)
3780 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
3781 }
3782
3783 #if 0
3784 /* Reload currently can't handle MINUS, so disable this for now */
3785 else if (GET_CODE (x) == MINUS)
3786 {
3787 rtx xop0 = XEXP (x, 0);
3788 rtx xop1 = XEXP (x, 1);
3789
3790 return (arm_address_register_rtx_p (xop0, strict_p)
3791 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
3792 }
3793 #endif
3794
3795 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3796 && code == SYMBOL_REF
3797 && CONSTANT_POOL_ADDRESS_P (x)
3798 && ! (flag_pic
3799 && symbol_mentioned_p (get_pool_constant (x))
3800 && ! pcrel_constant_p (get_pool_constant (x))))
3801 return 1;
3802
3803 return 0;
3804 }
3805
3806 /* Return nonzero if X is a valid Thumb-2 address operand. */
3807 int
3808 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
3809 {
3810 bool use_ldrd;
3811 enum rtx_code code = GET_CODE (x);
3812
3813 if (arm_address_register_rtx_p (x, strict_p))
3814 return 1;
3815
3816 use_ldrd = (TARGET_LDRD
3817 && (mode == DImode
3818 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3819
3820 if (code == POST_INC || code == PRE_DEC
3821 || ((code == PRE_INC || code == POST_DEC)
3822 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3823 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3824
3825 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3826 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3827 && GET_CODE (XEXP (x, 1)) == PLUS
3828 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3829 {
3830 /* Thumb-2 only has autoincrement by constant. */
3831 rtx addend = XEXP (XEXP (x, 1), 1);
3832 HOST_WIDE_INT offset;
3833
3834 if (GET_CODE (addend) != CONST_INT)
3835 return 0;
3836
3837 offset = INTVAL(addend);
3838 if (GET_MODE_SIZE (mode) <= 4)
3839 return (offset > -256 && offset < 256);
3840
3841 return (use_ldrd && offset > -1024 && offset < 1024
3842 && (offset & 3) == 0);
3843 }
3844
3845 /* After reload constants split into minipools will have addresses
3846 from a LABEL_REF. */
3847 else if (reload_completed
3848 && (code == LABEL_REF
3849 || (code == CONST
3850 && GET_CODE (XEXP (x, 0)) == PLUS
3851 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3852 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3853 return 1;
3854
3855 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
3856 return 0;
3857
3858 else if (code == PLUS)
3859 {
3860 rtx xop0 = XEXP (x, 0);
3861 rtx xop1 = XEXP (x, 1);
3862
3863 return ((arm_address_register_rtx_p (xop0, strict_p)
3864 && thumb2_legitimate_index_p (mode, xop1, strict_p))
3865 || (arm_address_register_rtx_p (xop1, strict_p)
3866 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
3867 }
3868
3869 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3870 && code == SYMBOL_REF
3871 && CONSTANT_POOL_ADDRESS_P (x)
3872 && ! (flag_pic
3873 && symbol_mentioned_p (get_pool_constant (x))
3874 && ! pcrel_constant_p (get_pool_constant (x))))
3875 return 1;
3876
3877 return 0;
3878 }
3879
3880 /* Return nonzero if INDEX is valid for an address index operand in
3881 ARM state. */
3882 static int
3883 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
3884 int strict_p)
3885 {
3886 HOST_WIDE_INT range;
3887 enum rtx_code code = GET_CODE (index);
3888
3889 /* Standard coprocessor addressing modes. */
3890 if (TARGET_HARD_FLOAT
3891 && (TARGET_FPA || TARGET_MAVERICK)
3892 && (GET_MODE_CLASS (mode) == MODE_FLOAT
3893 || (TARGET_MAVERICK && mode == DImode)))
3894 return (code == CONST_INT && INTVAL (index) < 1024
3895 && INTVAL (index) > -1024
3896 && (INTVAL (index) & 3) == 0);
3897
3898 if (TARGET_NEON
3899 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
3900 return (code == CONST_INT
3901 && INTVAL (index) < 1016
3902 && INTVAL (index) > -1024
3903 && (INTVAL (index) & 3) == 0);
3904
3905 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
3906 return (code == CONST_INT
3907 && INTVAL (index) < 1024
3908 && INTVAL (index) > -1024
3909 && (INTVAL (index) & 3) == 0);
3910
3911 if (arm_address_register_rtx_p (index, strict_p)
3912 && (GET_MODE_SIZE (mode) <= 4))
3913 return 1;
3914
3915 if (mode == DImode || mode == DFmode)
3916 {
3917 if (code == CONST_INT)
3918 {
3919 HOST_WIDE_INT val = INTVAL (index);
3920
3921 if (TARGET_LDRD)
3922 return val > -256 && val < 256;
3923 else
3924 return val > -4096 && val < 4092;
3925 }
3926
3927 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
3928 }
3929
3930 if (GET_MODE_SIZE (mode) <= 4
3931 && ! (arm_arch4
3932 && (mode == HImode
3933 || (mode == QImode && outer == SIGN_EXTEND))))
3934 {
3935 if (code == MULT)
3936 {
3937 rtx xiop0 = XEXP (index, 0);
3938 rtx xiop1 = XEXP (index, 1);
3939
3940 return ((arm_address_register_rtx_p (xiop0, strict_p)
3941 && power_of_two_operand (xiop1, SImode))
3942 || (arm_address_register_rtx_p (xiop1, strict_p)
3943 && power_of_two_operand (xiop0, SImode)));
3944 }
3945 else if (code == LSHIFTRT || code == ASHIFTRT
3946 || code == ASHIFT || code == ROTATERT)
3947 {
3948 rtx op = XEXP (index, 1);
3949
3950 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
3951 && GET_CODE (op) == CONST_INT
3952 && INTVAL (op) > 0
3953 && INTVAL (op) <= 31);
3954 }
3955 }
3956
3957 /* For ARM v4 we may be doing a sign-extend operation during the
3958 load. */
3959 if (arm_arch4)
3960 {
3961 if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode))
3962 range = 256;
3963 else
3964 range = 4096;
3965 }
3966 else
3967 range = (mode == HImode) ? 4095 : 4096;
3968
3969 return (code == CONST_INT
3970 && INTVAL (index) < range
3971 && INTVAL (index) > -range);
3972 }
3973
3974 /* Return true if OP is a valid index scaling factor for Thumb-2 address
3975 index operand. i.e. 1, 2, 4 or 8. */
3976 static bool
3977 thumb2_index_mul_operand (rtx op)
3978 {
3979 HOST_WIDE_INT val;
3980
3981 if (GET_CODE(op) != CONST_INT)
3982 return false;
3983
3984 val = INTVAL(op);
3985 return (val == 1 || val == 2 || val == 4 || val == 8);
3986 }
3987
3988 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
3989 static int
3990 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
3991 {
3992 enum rtx_code code = GET_CODE (index);
3993
3994 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
3995 /* Standard coprocessor addressing modes. */
3996 if (TARGET_HARD_FLOAT
3997 && (TARGET_FPA || TARGET_MAVERICK)
3998 && (GET_MODE_CLASS (mode) == MODE_FLOAT
3999 || (TARGET_MAVERICK && mode == DImode)))
4000 return (code == CONST_INT && INTVAL (index) < 1024
4001 && INTVAL (index) > -1024
4002 && (INTVAL (index) & 3) == 0);
4003
4004 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
4005 {
4006 /* For DImode assume values will usually live in core regs
4007 and only allow LDRD addressing modes. */
4008 if (!TARGET_LDRD || mode != DImode)
4009 return (code == CONST_INT
4010 && INTVAL (index) < 1024
4011 && INTVAL (index) > -1024
4012 && (INTVAL (index) & 3) == 0);
4013 }
4014
4015 if (TARGET_NEON
4016 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
4017 return (code == CONST_INT
4018 && INTVAL (index) < 1016
4019 && INTVAL (index) > -1024
4020 && (INTVAL (index) & 3) == 0);
4021
4022 if (arm_address_register_rtx_p (index, strict_p)
4023 && (GET_MODE_SIZE (mode) <= 4))
4024 return 1;
4025
4026 if (mode == DImode || mode == DFmode)
4027 {
4028 HOST_WIDE_INT val = INTVAL (index);
4029 /* ??? Can we assume ldrd for thumb2? */
4030 /* Thumb-2 ldrd only has reg+const addressing modes. */
4031 if (code != CONST_INT)
4032 return 0;
4033
4034 /* ldrd supports offsets of +-1020.
4035 However the ldr fallback does not. */
4036 return val > -256 && val < 256 && (val & 3) == 0;
4037 }
4038
4039 if (code == MULT)
4040 {
4041 rtx xiop0 = XEXP (index, 0);
4042 rtx xiop1 = XEXP (index, 1);
4043
4044 return ((arm_address_register_rtx_p (xiop0, strict_p)
4045 && thumb2_index_mul_operand (xiop1))
4046 || (arm_address_register_rtx_p (xiop1, strict_p)
4047 && thumb2_index_mul_operand (xiop0)));
4048 }
4049 else if (code == ASHIFT)
4050 {
4051 rtx op = XEXP (index, 1);
4052
4053 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
4054 && GET_CODE (op) == CONST_INT
4055 && INTVAL (op) > 0
4056 && INTVAL (op) <= 3);
4057 }
4058
4059 return (code == CONST_INT
4060 && INTVAL (index) < 4096
4061 && INTVAL (index) > -256);
4062 }
4063
4064 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
4065 static int
4066 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
4067 {
4068 int regno;
4069
4070 if (GET_CODE (x) != REG)
4071 return 0;
4072
4073 regno = REGNO (x);
4074
4075 if (strict_p)
4076 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
4077
4078 return (regno <= LAST_LO_REGNUM
4079 || regno > LAST_VIRTUAL_REGISTER
4080 || regno == FRAME_POINTER_REGNUM
4081 || (GET_MODE_SIZE (mode) >= 4
4082 && (regno == STACK_POINTER_REGNUM
4083 || regno >= FIRST_PSEUDO_REGISTER
4084 || x == hard_frame_pointer_rtx
4085 || x == arg_pointer_rtx)));
4086 }
4087
4088 /* Return nonzero if x is a legitimate index register. This is the case
4089 for any base register that can access a QImode object. */
4090 inline static int
4091 thumb1_index_register_rtx_p (rtx x, int strict_p)
4092 {
4093 return thumb1_base_register_rtx_p (x, QImode, strict_p);
4094 }
4095
4096 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
4097
4098 The AP may be eliminated to either the SP or the FP, so we use the
4099 least common denominator, e.g. SImode, and offsets from 0 to 64.
4100
4101 ??? Verify whether the above is the right approach.
4102
4103 ??? Also, the FP may be eliminated to the SP, so perhaps that
4104 needs special handling also.
4105
4106 ??? Look at how the mips16 port solves this problem. It probably uses
4107 better ways to solve some of these problems.
4108
4109 Although it is not incorrect, we don't accept QImode and HImode
4110 addresses based on the frame pointer or arg pointer until the
4111 reload pass starts. This is so that eliminating such addresses
4112 into stack based ones won't produce impossible code. */
4113 int
4114 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
4115 {
4116 /* ??? Not clear if this is right. Experiment. */
4117 if (GET_MODE_SIZE (mode) < 4
4118 && !(reload_in_progress || reload_completed)
4119 && (reg_mentioned_p (frame_pointer_rtx, x)
4120 || reg_mentioned_p (arg_pointer_rtx, x)
4121 || reg_mentioned_p (virtual_incoming_args_rtx, x)
4122 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
4123 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
4124 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
4125 return 0;
4126
4127 /* Accept any base register. SP only in SImode or larger. */
4128 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
4129 return 1;
4130
4131 /* This is PC relative data before arm_reorg runs. */
4132 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
4133 && GET_CODE (x) == SYMBOL_REF
4134 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
4135 return 1;
4136
4137 /* This is PC relative data after arm_reorg runs. */
4138 else if (GET_MODE_SIZE (mode) >= 4 && reload_completed
4139 && (GET_CODE (x) == LABEL_REF
4140 || (GET_CODE (x) == CONST
4141 && GET_CODE (XEXP (x, 0)) == PLUS
4142 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4143 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
4144 return 1;
4145
4146 /* Post-inc indexing only supported for SImode and larger. */
4147 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
4148 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
4149 return 1;
4150
4151 else if (GET_CODE (x) == PLUS)
4152 {
4153 /* REG+REG address can be any two index registers. */
4154 /* We disallow FRAME+REG addressing since we know that FRAME
4155 will be replaced with STACK, and SP relative addressing only
4156 permits SP+OFFSET. */
4157 if (GET_MODE_SIZE (mode) <= 4
4158 && XEXP (x, 0) != frame_pointer_rtx
4159 && XEXP (x, 1) != frame_pointer_rtx
4160 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4161 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
4162 return 1;
4163
4164 /* REG+const has 5-7 bit offset for non-SP registers. */
4165 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4166 || XEXP (x, 0) == arg_pointer_rtx)
4167 && GET_CODE (XEXP (x, 1)) == CONST_INT
4168 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4169 return 1;
4170
4171 /* REG+const has 10-bit offset for SP, but only SImode and
4172 larger is supported. */
4173 /* ??? Should probably check for DI/DFmode overflow here
4174 just like GO_IF_LEGITIMATE_OFFSET does. */
4175 else if (GET_CODE (XEXP (x, 0)) == REG
4176 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
4177 && GET_MODE_SIZE (mode) >= 4
4178 && GET_CODE (XEXP (x, 1)) == CONST_INT
4179 && INTVAL (XEXP (x, 1)) >= 0
4180 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
4181 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4182 return 1;
4183
4184 else if (GET_CODE (XEXP (x, 0)) == REG
4185 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
4186 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
4187 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
4188 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
4189 && GET_MODE_SIZE (mode) >= 4
4190 && GET_CODE (XEXP (x, 1)) == CONST_INT
4191 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4192 return 1;
4193 }
4194
4195 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
4196 && GET_MODE_SIZE (mode) == 4
4197 && GET_CODE (x) == SYMBOL_REF
4198 && CONSTANT_POOL_ADDRESS_P (x)
4199 && ! (flag_pic
4200 && symbol_mentioned_p (get_pool_constant (x))
4201 && ! pcrel_constant_p (get_pool_constant (x))))
4202 return 1;
4203
4204 return 0;
4205 }
4206
4207 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
4208 instruction of mode MODE. */
4209 int
4210 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
4211 {
4212 switch (GET_MODE_SIZE (mode))
4213 {
4214 case 1:
4215 return val >= 0 && val < 32;
4216
4217 case 2:
4218 return val >= 0 && val < 64 && (val & 1) == 0;
4219
4220 default:
4221 return (val >= 0
4222 && (val + GET_MODE_SIZE (mode)) <= 128
4223 && (val & 3) == 0);
4224 }
4225 }
4226
4227 /* Build the SYMBOL_REF for __tls_get_addr. */
4228
4229 static GTY(()) rtx tls_get_addr_libfunc;
4230
4231 static rtx
4232 get_tls_get_addr (void)
4233 {
4234 if (!tls_get_addr_libfunc)
4235 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
4236 return tls_get_addr_libfunc;
4237 }
4238
4239 static rtx
4240 arm_load_tp (rtx target)
4241 {
4242 if (!target)
4243 target = gen_reg_rtx (SImode);
4244
4245 if (TARGET_HARD_TP)
4246 {
4247 /* Can return in any reg. */
4248 emit_insn (gen_load_tp_hard (target));
4249 }
4250 else
4251 {
4252 /* Always returned in r0. Immediately copy the result into a pseudo,
4253 otherwise other uses of r0 (e.g. setting up function arguments) may
4254 clobber the value. */
4255
4256 rtx tmp;
4257
4258 emit_insn (gen_load_tp_soft ());
4259
4260 tmp = gen_rtx_REG (SImode, 0);
4261 emit_move_insn (target, tmp);
4262 }
4263 return target;
4264 }
4265
4266 static rtx
4267 load_tls_operand (rtx x, rtx reg)
4268 {
4269 rtx tmp;
4270
4271 if (reg == NULL_RTX)
4272 reg = gen_reg_rtx (SImode);
4273
4274 tmp = gen_rtx_CONST (SImode, x);
4275
4276 emit_move_insn (reg, tmp);
4277
4278 return reg;
4279 }
4280
4281 static rtx
4282 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
4283 {
4284 rtx insns, label, labelno, sum;
4285
4286 start_sequence ();
4287
4288 labelno = GEN_INT (pic_labelno++);
4289 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4290 label = gen_rtx_CONST (VOIDmode, label);
4291
4292 sum = gen_rtx_UNSPEC (Pmode,
4293 gen_rtvec (4, x, GEN_INT (reloc), label,
4294 GEN_INT (TARGET_ARM ? 8 : 4)),
4295 UNSPEC_TLS);
4296 reg = load_tls_operand (sum, reg);
4297
4298 if (TARGET_ARM)
4299 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
4300 else if (TARGET_THUMB2)
4301 {
4302 rtx tmp;
4303 /* Thumb-2 only allows very limited access to the PC. Calculate
4304 the address in a temporary register. */
4305 tmp = gen_reg_rtx (SImode);
4306 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4307 emit_insn (gen_addsi3(reg, reg, tmp));
4308 }
4309 else /* TARGET_THUMB1 */
4310 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4311
4312 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
4313 Pmode, 1, reg, Pmode);
4314
4315 insns = get_insns ();
4316 end_sequence ();
4317
4318 return insns;
4319 }
4320
4321 rtx
4322 legitimize_tls_address (rtx x, rtx reg)
4323 {
4324 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
4325 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
4326
4327 switch (model)
4328 {
4329 case TLS_MODEL_GLOBAL_DYNAMIC:
4330 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
4331 dest = gen_reg_rtx (Pmode);
4332 emit_libcall_block (insns, dest, ret, x);
4333 return dest;
4334
4335 case TLS_MODEL_LOCAL_DYNAMIC:
4336 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
4337
4338 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
4339 share the LDM result with other LD model accesses. */
4340 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
4341 UNSPEC_TLS);
4342 dest = gen_reg_rtx (Pmode);
4343 emit_libcall_block (insns, dest, ret, eqv);
4344
4345 /* Load the addend. */
4346 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
4347 UNSPEC_TLS);
4348 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
4349 return gen_rtx_PLUS (Pmode, dest, addend);
4350
4351 case TLS_MODEL_INITIAL_EXEC:
4352 labelno = GEN_INT (pic_labelno++);
4353 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4354 label = gen_rtx_CONST (VOIDmode, label);
4355 sum = gen_rtx_UNSPEC (Pmode,
4356 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
4357 GEN_INT (TARGET_ARM ? 8 : 4)),
4358 UNSPEC_TLS);
4359 reg = load_tls_operand (sum, reg);
4360
4361 if (TARGET_ARM)
4362 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
4363 else if (TARGET_THUMB2)
4364 {
4365 rtx tmp;
4366 /* Thumb-2 only allows very limited access to the PC. Calculate
4367 the address in a temporary register. */
4368 tmp = gen_reg_rtx (SImode);
4369 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4370 emit_insn (gen_addsi3(reg, reg, tmp));
4371 emit_move_insn (reg, gen_const_mem (SImode, reg));
4372 }
4373 else
4374 {
4375 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4376 emit_move_insn (reg, gen_const_mem (SImode, reg));
4377 }
4378
4379 tp = arm_load_tp (NULL_RTX);
4380
4381 return gen_rtx_PLUS (Pmode, tp, reg);
4382
4383 case TLS_MODEL_LOCAL_EXEC:
4384 tp = arm_load_tp (NULL_RTX);
4385
4386 reg = gen_rtx_UNSPEC (Pmode,
4387 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
4388 UNSPEC_TLS);
4389 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
4390
4391 return gen_rtx_PLUS (Pmode, tp, reg);
4392
4393 default:
4394 abort ();
4395 }
4396 }
4397
4398 /* Try machine-dependent ways of modifying an illegitimate address
4399 to be legitimate. If we find one, return the new, valid address. */
4400 rtx
4401 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4402 {
4403 if (arm_tls_symbol_p (x))
4404 return legitimize_tls_address (x, NULL_RTX);
4405
4406 if (GET_CODE (x) == PLUS)
4407 {
4408 rtx xop0 = XEXP (x, 0);
4409 rtx xop1 = XEXP (x, 1);
4410
4411 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
4412 xop0 = force_reg (SImode, xop0);
4413
4414 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
4415 xop1 = force_reg (SImode, xop1);
4416
4417 if (ARM_BASE_REGISTER_RTX_P (xop0)
4418 && GET_CODE (xop1) == CONST_INT)
4419 {
4420 HOST_WIDE_INT n, low_n;
4421 rtx base_reg, val;
4422 n = INTVAL (xop1);
4423
4424 /* VFP addressing modes actually allow greater offsets, but for
4425 now we just stick with the lowest common denominator. */
4426 if (mode == DImode
4427 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
4428 {
4429 low_n = n & 0x0f;
4430 n &= ~0x0f;
4431 if (low_n > 4)
4432 {
4433 n += 16;
4434 low_n -= 16;
4435 }
4436 }
4437 else
4438 {
4439 low_n = ((mode) == TImode ? 0
4440 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
4441 n -= low_n;
4442 }
4443
4444 base_reg = gen_reg_rtx (SImode);
4445 val = force_operand (plus_constant (xop0, n), NULL_RTX);
4446 emit_move_insn (base_reg, val);
4447 x = plus_constant (base_reg, low_n);
4448 }
4449 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4450 x = gen_rtx_PLUS (SImode, xop0, xop1);
4451 }
4452
4453 /* XXX We don't allow MINUS any more -- see comment in
4454 arm_legitimate_address_p (). */
4455 else if (GET_CODE (x) == MINUS)
4456 {
4457 rtx xop0 = XEXP (x, 0);
4458 rtx xop1 = XEXP (x, 1);
4459
4460 if (CONSTANT_P (xop0))
4461 xop0 = force_reg (SImode, xop0);
4462
4463 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
4464 xop1 = force_reg (SImode, xop1);
4465
4466 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4467 x = gen_rtx_MINUS (SImode, xop0, xop1);
4468 }
4469
4470 /* Make sure to take full advantage of the pre-indexed addressing mode
4471 with absolute addresses which often allows for the base register to
4472 be factorized for multiple adjacent memory references, and it might
4473 even allows for the mini pool to be avoided entirely. */
4474 else if (GET_CODE (x) == CONST_INT && optimize > 0)
4475 {
4476 unsigned int bits;
4477 HOST_WIDE_INT mask, base, index;
4478 rtx base_reg;
4479
4480 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
4481 use a 8-bit index. So let's use a 12-bit index for SImode only and
4482 hope that arm_gen_constant will enable ldrb to use more bits. */
4483 bits = (mode == SImode) ? 12 : 8;
4484 mask = (1 << bits) - 1;
4485 base = INTVAL (x) & ~mask;
4486 index = INTVAL (x) & mask;
4487 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
4488 {
4489 /* It'll most probably be more efficient to generate the base
4490 with more bits set and use a negative index instead. */
4491 base |= mask;
4492 index -= mask;
4493 }
4494 base_reg = force_reg (SImode, GEN_INT (base));
4495 x = plus_constant (base_reg, index);
4496 }
4497
4498 if (flag_pic)
4499 {
4500 /* We need to find and carefully transform any SYMBOL and LABEL
4501 references; so go back to the original address expression. */
4502 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4503
4504 if (new_x != orig_x)
4505 x = new_x;
4506 }
4507
4508 return x;
4509 }
4510
4511
4512 /* Try machine-dependent ways of modifying an illegitimate Thumb address
4513 to be legitimate. If we find one, return the new, valid address. */
4514 rtx
4515 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4516 {
4517 if (arm_tls_symbol_p (x))
4518 return legitimize_tls_address (x, NULL_RTX);
4519
4520 if (GET_CODE (x) == PLUS
4521 && GET_CODE (XEXP (x, 1)) == CONST_INT
4522 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
4523 || INTVAL (XEXP (x, 1)) < 0))
4524 {
4525 rtx xop0 = XEXP (x, 0);
4526 rtx xop1 = XEXP (x, 1);
4527 HOST_WIDE_INT offset = INTVAL (xop1);
4528
4529 /* Try and fold the offset into a biasing of the base register and
4530 then offsetting that. Don't do this when optimizing for space
4531 since it can cause too many CSEs. */
4532 if (optimize_size && offset >= 0
4533 && offset < 256 + 31 * GET_MODE_SIZE (mode))
4534 {
4535 HOST_WIDE_INT delta;
4536
4537 if (offset >= 256)
4538 delta = offset - (256 - GET_MODE_SIZE (mode));
4539 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
4540 delta = 31 * GET_MODE_SIZE (mode);
4541 else
4542 delta = offset & (~31 * GET_MODE_SIZE (mode));
4543
4544 xop0 = force_operand (plus_constant (xop0, offset - delta),
4545 NULL_RTX);
4546 x = plus_constant (xop0, delta);
4547 }
4548 else if (offset < 0 && offset > -256)
4549 /* Small negative offsets are best done with a subtract before the
4550 dereference, forcing these into a register normally takes two
4551 instructions. */
4552 x = force_operand (x, NULL_RTX);
4553 else
4554 {
4555 /* For the remaining cases, force the constant into a register. */
4556 xop1 = force_reg (SImode, xop1);
4557 x = gen_rtx_PLUS (SImode, xop0, xop1);
4558 }
4559 }
4560 else if (GET_CODE (x) == PLUS
4561 && s_register_operand (XEXP (x, 1), SImode)
4562 && !s_register_operand (XEXP (x, 0), SImode))
4563 {
4564 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
4565
4566 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
4567 }
4568
4569 if (flag_pic)
4570 {
4571 /* We need to find and carefully transform any SYMBOL and LABEL
4572 references; so go back to the original address expression. */
4573 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4574
4575 if (new_x != orig_x)
4576 x = new_x;
4577 }
4578
4579 return x;
4580 }
4581
4582 rtx
4583 thumb_legitimize_reload_address (rtx *x_p,
4584 enum machine_mode mode,
4585 int opnum, int type,
4586 int ind_levels ATTRIBUTE_UNUSED)
4587 {
4588 rtx x = *x_p;
4589
4590 if (GET_CODE (x) == PLUS
4591 && GET_MODE_SIZE (mode) < 4
4592 && REG_P (XEXP (x, 0))
4593 && XEXP (x, 0) == stack_pointer_rtx
4594 && GET_CODE (XEXP (x, 1)) == CONST_INT
4595 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4596 {
4597 rtx orig_x = x;
4598
4599 x = copy_rtx (x);
4600 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4601 Pmode, VOIDmode, 0, 0, opnum, type);
4602 return x;
4603 }
4604
4605 /* If both registers are hi-regs, then it's better to reload the
4606 entire expression rather than each register individually. That
4607 only requires one reload register rather than two. */
4608 if (GET_CODE (x) == PLUS
4609 && REG_P (XEXP (x, 0))
4610 && REG_P (XEXP (x, 1))
4611 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
4612 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
4613 {
4614 rtx orig_x = x;
4615
4616 x = copy_rtx (x);
4617 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4618 Pmode, VOIDmode, 0, 0, opnum, type);
4619 return x;
4620 }
4621
4622 return NULL;
4623 }
4624
4625 /* Test for various thread-local symbols. */
4626
4627 /* Return TRUE if X is a thread-local symbol. */
4628
4629 static bool
4630 arm_tls_symbol_p (rtx x)
4631 {
4632 if (! TARGET_HAVE_TLS)
4633 return false;
4634
4635 if (GET_CODE (x) != SYMBOL_REF)
4636 return false;
4637
4638 return SYMBOL_REF_TLS_MODEL (x) != 0;
4639 }
4640
4641 /* Helper for arm_tls_referenced_p. */
4642
4643 static int
4644 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
4645 {
4646 if (GET_CODE (*x) == SYMBOL_REF)
4647 return SYMBOL_REF_TLS_MODEL (*x) != 0;
4648
4649 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
4650 TLS offsets, not real symbol references. */
4651 if (GET_CODE (*x) == UNSPEC
4652 && XINT (*x, 1) == UNSPEC_TLS)
4653 return -1;
4654
4655 return 0;
4656 }
4657
4658 /* Return TRUE if X contains any TLS symbol references. */
4659
4660 bool
4661 arm_tls_referenced_p (rtx x)
4662 {
4663 if (! TARGET_HAVE_TLS)
4664 return false;
4665
4666 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
4667 }
4668
4669 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
4670
4671 bool
4672 arm_cannot_force_const_mem (rtx x)
4673 {
4674 rtx base, offset;
4675
4676 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
4677 {
4678 split_const (x, &base, &offset);
4679 if (GET_CODE (base) == SYMBOL_REF
4680 && !offset_within_block_p (base, INTVAL (offset)))
4681 return true;
4682 }
4683 return arm_tls_referenced_p (x);
4684 }
4685 \f
4686 #define REG_OR_SUBREG_REG(X) \
4687 (GET_CODE (X) == REG \
4688 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
4689
4690 #define REG_OR_SUBREG_RTX(X) \
4691 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
4692
4693 #ifndef COSTS_N_INSNS
4694 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
4695 #endif
4696 static inline int
4697 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
4698 {
4699 enum machine_mode mode = GET_MODE (x);
4700
4701 switch (code)
4702 {
4703 case ASHIFT:
4704 case ASHIFTRT:
4705 case LSHIFTRT:
4706 case ROTATERT:
4707 case PLUS:
4708 case MINUS:
4709 case COMPARE:
4710 case NEG:
4711 case NOT:
4712 return COSTS_N_INSNS (1);
4713
4714 case MULT:
4715 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4716 {
4717 int cycles = 0;
4718 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
4719
4720 while (i)
4721 {
4722 i >>= 2;
4723 cycles++;
4724 }
4725 return COSTS_N_INSNS (2) + cycles;
4726 }
4727 return COSTS_N_INSNS (1) + 16;
4728
4729 case SET:
4730 return (COSTS_N_INSNS (1)
4731 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
4732 + GET_CODE (SET_DEST (x)) == MEM));
4733
4734 case CONST_INT:
4735 if (outer == SET)
4736 {
4737 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
4738 return 0;
4739 if (thumb_shiftable_const (INTVAL (x)))
4740 return COSTS_N_INSNS (2);
4741 return COSTS_N_INSNS (3);
4742 }
4743 else if ((outer == PLUS || outer == COMPARE)
4744 && INTVAL (x) < 256 && INTVAL (x) > -256)
4745 return 0;
4746 else if (outer == AND
4747 && INTVAL (x) < 256 && INTVAL (x) >= -256)
4748 return COSTS_N_INSNS (1);
4749 else if (outer == ASHIFT || outer == ASHIFTRT
4750 || outer == LSHIFTRT)
4751 return 0;
4752 return COSTS_N_INSNS (2);
4753
4754 case CONST:
4755 case CONST_DOUBLE:
4756 case LABEL_REF:
4757 case SYMBOL_REF:
4758 return COSTS_N_INSNS (3);
4759
4760 case UDIV:
4761 case UMOD:
4762 case DIV:
4763 case MOD:
4764 return 100;
4765
4766 case TRUNCATE:
4767 return 99;
4768
4769 case AND:
4770 case XOR:
4771 case IOR:
4772 /* XXX guess. */
4773 return 8;
4774
4775 case MEM:
4776 /* XXX another guess. */
4777 /* Memory costs quite a lot for the first word, but subsequent words
4778 load at the equivalent of a single insn each. */
4779 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4780 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
4781 ? 4 : 0));
4782
4783 case IF_THEN_ELSE:
4784 /* XXX a guess. */
4785 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
4786 return 14;
4787 return 2;
4788
4789 case ZERO_EXTEND:
4790 /* XXX still guessing. */
4791 switch (GET_MODE (XEXP (x, 0)))
4792 {
4793 case QImode:
4794 return (1 + (mode == DImode ? 4 : 0)
4795 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4796
4797 case HImode:
4798 return (4 + (mode == DImode ? 4 : 0)
4799 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4800
4801 case SImode:
4802 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4803
4804 default:
4805 return 99;
4806 }
4807
4808 default:
4809 return 99;
4810 }
4811 }
4812
4813
4814 /* Worker routine for arm_rtx_costs. */
4815 /* ??? This needs updating for thumb2. */
4816 static inline int
4817 arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
4818 {
4819 enum machine_mode mode = GET_MODE (x);
4820 enum rtx_code subcode;
4821 int extra_cost;
4822
4823 switch (code)
4824 {
4825 case MEM:
4826 /* Memory costs quite a lot for the first word, but subsequent words
4827 load at the equivalent of a single insn each. */
4828 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4829 + (GET_CODE (x) == SYMBOL_REF
4830 && CONSTANT_POOL_ADDRESS_P (x) ? 4 : 0));
4831
4832 case DIV:
4833 case MOD:
4834 case UDIV:
4835 case UMOD:
4836 return optimize_size ? COSTS_N_INSNS (2) : 100;
4837
4838 case ROTATE:
4839 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
4840 return 4;
4841 /* Fall through */
4842 case ROTATERT:
4843 if (mode != SImode)
4844 return 8;
4845 /* Fall through */
4846 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
4847 if (mode == DImode)
4848 return (8 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : 8)
4849 + ((GET_CODE (XEXP (x, 0)) == REG
4850 || (GET_CODE (XEXP (x, 0)) == SUBREG
4851 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4852 ? 0 : 8));
4853 return (1 + ((GET_CODE (XEXP (x, 0)) == REG
4854 || (GET_CODE (XEXP (x, 0)) == SUBREG
4855 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4856 ? 0 : 4)
4857 + ((GET_CODE (XEXP (x, 1)) == REG
4858 || (GET_CODE (XEXP (x, 1)) == SUBREG
4859 && GET_CODE (SUBREG_REG (XEXP (x, 1))) == REG)
4860 || (GET_CODE (XEXP (x, 1)) == CONST_INT))
4861 ? 0 : 4));
4862
4863 case MINUS:
4864 if (GET_CODE (XEXP (x, 1)) == MULT && mode == SImode && arm_arch_thumb2)
4865 {
4866 extra_cost = rtx_cost (XEXP (x, 1), code);
4867 if (!REG_OR_SUBREG_REG (XEXP (x, 0)))
4868 extra_cost += 4 * ARM_NUM_REGS (mode);
4869 return extra_cost;
4870 }
4871
4872 if (mode == DImode)
4873 return (4 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 8)
4874 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4875 || (GET_CODE (XEXP (x, 0)) == CONST_INT
4876 && const_ok_for_arm (INTVAL (XEXP (x, 0)))))
4877 ? 0 : 8));
4878
4879 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4880 return (2 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4881 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4882 && arm_const_double_rtx (XEXP (x, 1))))
4883 ? 0 : 8)
4884 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4885 || (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
4886 && arm_const_double_rtx (XEXP (x, 0))))
4887 ? 0 : 8));
4888
4889 if (((GET_CODE (XEXP (x, 0)) == CONST_INT
4890 && const_ok_for_arm (INTVAL (XEXP (x, 0)))
4891 && REG_OR_SUBREG_REG (XEXP (x, 1))))
4892 || (((subcode = GET_CODE (XEXP (x, 1))) == ASHIFT
4893 || subcode == ASHIFTRT || subcode == LSHIFTRT
4894 || subcode == ROTATE || subcode == ROTATERT
4895 || (subcode == MULT
4896 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
4897 && ((INTVAL (XEXP (XEXP (x, 1), 1)) &
4898 (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0)))
4899 && REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 0))
4900 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 1))
4901 || GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
4902 && REG_OR_SUBREG_REG (XEXP (x, 0))))
4903 return 1;
4904 /* Fall through */
4905
4906 case PLUS:
4907 if (GET_CODE (XEXP (x, 0)) == MULT)
4908 {
4909 extra_cost = rtx_cost (XEXP (x, 0), code);
4910 if (!REG_OR_SUBREG_REG (XEXP (x, 1)))
4911 extra_cost += 4 * ARM_NUM_REGS (mode);
4912 return extra_cost;
4913 }
4914
4915 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4916 return (2 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4917 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4918 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4919 && arm_const_double_rtx (XEXP (x, 1))))
4920 ? 0 : 8));
4921
4922 /* Fall through */
4923 case AND: case XOR: case IOR:
4924 extra_cost = 0;
4925
4926 /* Normally the frame registers will be spilt into reg+const during
4927 reload, so it is a bad idea to combine them with other instructions,
4928 since then they might not be moved outside of loops. As a compromise
4929 we allow integration with ops that have a constant as their second
4930 operand. */
4931 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
4932 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
4933 && GET_CODE (XEXP (x, 1)) != CONST_INT)
4934 || (REG_OR_SUBREG_REG (XEXP (x, 0))
4935 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
4936 extra_cost = 4;
4937
4938 if (mode == DImode)
4939 return (4 + extra_cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4940 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4941 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4942 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4943 ? 0 : 8));
4944
4945 if (REG_OR_SUBREG_REG (XEXP (x, 0)))
4946 return (1 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : extra_cost)
4947 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4948 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4949 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4950 ? 0 : 4));
4951
4952 else if (REG_OR_SUBREG_REG (XEXP (x, 1)))
4953 return (1 + extra_cost
4954 + ((((subcode = GET_CODE (XEXP (x, 0))) == ASHIFT
4955 || subcode == LSHIFTRT || subcode == ASHIFTRT
4956 || subcode == ROTATE || subcode == ROTATERT
4957 || (subcode == MULT
4958 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4959 && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
4960 (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
4961 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 0)))
4962 && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1)))
4963 || GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))
4964 ? 0 : 4));
4965
4966 return 8;
4967
4968 case MULT:
4969 /* This should have been handled by the CPU specific routines. */
4970 gcc_unreachable ();
4971
4972 case TRUNCATE:
4973 if (arm_arch3m && mode == SImode
4974 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
4975 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4976 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
4977 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
4978 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
4979 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
4980 return 8;
4981 return 99;
4982
4983 case NEG:
4984 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4985 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 6);
4986 /* Fall through */
4987 case NOT:
4988 if (mode == DImode)
4989 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
4990
4991 return 1 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
4992
4993 case IF_THEN_ELSE:
4994 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
4995 return 14;
4996 return 2;
4997
4998 case COMPARE:
4999 return 1;
5000
5001 case ABS:
5002 return 4 + (mode == DImode ? 4 : 0);
5003
5004 case SIGN_EXTEND:
5005 /* ??? value extensions are cheaper on armv6. */
5006 if (GET_MODE (XEXP (x, 0)) == QImode)
5007 return (4 + (mode == DImode ? 4 : 0)
5008 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5009 /* Fall through */
5010 case ZERO_EXTEND:
5011 switch (GET_MODE (XEXP (x, 0)))
5012 {
5013 case QImode:
5014 return (1 + (mode == DImode ? 4 : 0)
5015 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5016
5017 case HImode:
5018 return (4 + (mode == DImode ? 4 : 0)
5019 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5020
5021 case SImode:
5022 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5023
5024 case V8QImode:
5025 case V4HImode:
5026 case V2SImode:
5027 case V4QImode:
5028 case V2HImode:
5029 return 1;
5030
5031 default:
5032 gcc_unreachable ();
5033 }
5034 gcc_unreachable ();
5035
5036 case CONST_INT:
5037 if (const_ok_for_arm (INTVAL (x)))
5038 return outer == SET ? 2 : -1;
5039 else if (outer == AND
5040 && const_ok_for_arm (~INTVAL (x)))
5041 return -1;
5042 else if ((outer == COMPARE
5043 || outer == PLUS || outer == MINUS)
5044 && const_ok_for_arm (-INTVAL (x)))
5045 return -1;
5046 else
5047 return 5;
5048
5049 case CONST:
5050 case LABEL_REF:
5051 case SYMBOL_REF:
5052 return 6;
5053
5054 case CONST_DOUBLE:
5055 if (arm_const_double_rtx (x) || vfp3_const_double_rtx (x))
5056 return outer == SET ? 2 : -1;
5057 else if ((outer == COMPARE || outer == PLUS)
5058 && neg_const_double_rtx_ok_for_fpa (x))
5059 return -1;
5060 return 7;
5061
5062 default:
5063 return 99;
5064 }
5065 }
5066
5067 /* RTX costs when optimizing for size. */
5068 static bool
5069 arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
5070 {
5071 enum machine_mode mode = GET_MODE (x);
5072
5073 if (TARGET_THUMB)
5074 {
5075 /* XXX TBD. For now, use the standard costs. */
5076 *total = thumb1_rtx_costs (x, code, outer_code);
5077 return true;
5078 }
5079
5080 switch (code)
5081 {
5082 case MEM:
5083 /* A memory access costs 1 insn if the mode is small, or the address is
5084 a single register, otherwise it costs one insn per word. */
5085 if (REG_P (XEXP (x, 0)))
5086 *total = COSTS_N_INSNS (1);
5087 else
5088 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5089 return true;
5090
5091 case DIV:
5092 case MOD:
5093 case UDIV:
5094 case UMOD:
5095 /* Needs a libcall, so it costs about this. */
5096 *total = COSTS_N_INSNS (2);
5097 return false;
5098
5099 case ROTATE:
5100 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
5101 {
5102 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code);
5103 return true;
5104 }
5105 /* Fall through */
5106 case ROTATERT:
5107 case ASHIFT:
5108 case LSHIFTRT:
5109 case ASHIFTRT:
5110 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
5111 {
5112 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code);
5113 return true;
5114 }
5115 else if (mode == SImode)
5116 {
5117 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code);
5118 /* Slightly disparage register shifts, but not by much. */
5119 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5120 *total += 1 + rtx_cost (XEXP (x, 1), code);
5121 return true;
5122 }
5123
5124 /* Needs a libcall. */
5125 *total = COSTS_N_INSNS (2);
5126 return false;
5127
5128 case MINUS:
5129 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5130 {
5131 *total = COSTS_N_INSNS (1);
5132 return false;
5133 }
5134
5135 if (mode == SImode)
5136 {
5137 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
5138 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
5139
5140 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
5141 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
5142 || subcode1 == ROTATE || subcode1 == ROTATERT
5143 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
5144 || subcode1 == ASHIFTRT)
5145 {
5146 /* It's just the cost of the two operands. */
5147 *total = 0;
5148 return false;
5149 }
5150
5151 *total = COSTS_N_INSNS (1);
5152 return false;
5153 }
5154
5155 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5156 return false;
5157
5158 case PLUS:
5159 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5160 {
5161 *total = COSTS_N_INSNS (1);
5162 return false;
5163 }
5164
5165 /* Fall through */
5166 case AND: case XOR: case IOR:
5167 if (mode == SImode)
5168 {
5169 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
5170
5171 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
5172 || subcode == LSHIFTRT || subcode == ASHIFTRT
5173 || (code == AND && subcode == NOT))
5174 {
5175 /* It's just the cost of the two operands. */
5176 *total = 0;
5177 return false;
5178 }
5179 }
5180
5181 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5182 return false;
5183
5184 case MULT:
5185 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5186 return false;
5187
5188 case NEG:
5189 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5190 *total = COSTS_N_INSNS (1);
5191 /* Fall through */
5192 case NOT:
5193 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5194
5195 return false;
5196
5197 case IF_THEN_ELSE:
5198 *total = 0;
5199 return false;
5200
5201 case COMPARE:
5202 if (cc_register (XEXP (x, 0), VOIDmode))
5203 * total = 0;
5204 else
5205 *total = COSTS_N_INSNS (1);
5206 return false;
5207
5208 case ABS:
5209 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5210 *total = COSTS_N_INSNS (1);
5211 else
5212 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
5213 return false;
5214
5215 case SIGN_EXTEND:
5216 *total = 0;
5217 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
5218 {
5219 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5220 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5221 }
5222 if (mode == DImode)
5223 *total += COSTS_N_INSNS (1);
5224 return false;
5225
5226 case ZERO_EXTEND:
5227 *total = 0;
5228 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5229 {
5230 switch (GET_MODE (XEXP (x, 0)))
5231 {
5232 case QImode:
5233 *total += COSTS_N_INSNS (1);
5234 break;
5235
5236 case HImode:
5237 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5238
5239 case SImode:
5240 break;
5241
5242 default:
5243 *total += COSTS_N_INSNS (2);
5244 }
5245 }
5246
5247 if (mode == DImode)
5248 *total += COSTS_N_INSNS (1);
5249
5250 return false;
5251
5252 case CONST_INT:
5253 if (const_ok_for_arm (INTVAL (x)))
5254 *total = COSTS_N_INSNS (outer_code == SET ? 1 : 0);
5255 else if (const_ok_for_arm (~INTVAL (x)))
5256 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
5257 else if (const_ok_for_arm (-INTVAL (x)))
5258 {
5259 if (outer_code == COMPARE || outer_code == PLUS
5260 || outer_code == MINUS)
5261 *total = 0;
5262 else
5263 *total = COSTS_N_INSNS (1);
5264 }
5265 else
5266 *total = COSTS_N_INSNS (2);
5267 return true;
5268
5269 case CONST:
5270 case LABEL_REF:
5271 case SYMBOL_REF:
5272 *total = COSTS_N_INSNS (2);
5273 return true;
5274
5275 case CONST_DOUBLE:
5276 *total = COSTS_N_INSNS (4);
5277 return true;
5278
5279 default:
5280 if (mode != VOIDmode)
5281 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5282 else
5283 *total = COSTS_N_INSNS (4); /* How knows? */
5284 return false;
5285 }
5286 }
5287
5288 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
5289 supported on any "slowmul" cores, so it can be ignored. */
5290
5291 static bool
5292 arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
5293 {
5294 enum machine_mode mode = GET_MODE (x);
5295
5296 if (TARGET_THUMB)
5297 {
5298 *total = thumb1_rtx_costs (x, code, outer_code);
5299 return true;
5300 }
5301
5302 switch (code)
5303 {
5304 case MULT:
5305 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5306 || mode == DImode)
5307 {
5308 *total = 30;
5309 return true;
5310 }
5311
5312 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5313 {
5314 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5315 & (unsigned HOST_WIDE_INT) 0xffffffff);
5316 int cost, const_ok = const_ok_for_arm (i);
5317 int j, booth_unit_size;
5318
5319 /* Tune as appropriate. */
5320 cost = const_ok ? 4 : 8;
5321 booth_unit_size = 2;
5322 for (j = 0; i && j < 32; j += booth_unit_size)
5323 {
5324 i >>= booth_unit_size;
5325 cost += 2;
5326 }
5327
5328 *total = cost;
5329 return true;
5330 }
5331
5332 *total = 30 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5333 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5334 return true;
5335
5336 default:
5337 *total = arm_rtx_costs_1 (x, code, outer_code);
5338 return true;
5339 }
5340 }
5341
5342
5343 /* RTX cost for cores with a fast multiply unit (M variants). */
5344
5345 static bool
5346 arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
5347 {
5348 enum machine_mode mode = GET_MODE (x);
5349
5350 if (TARGET_THUMB1)
5351 {
5352 *total = thumb1_rtx_costs (x, code, outer_code);
5353 return true;
5354 }
5355
5356 /* ??? should thumb2 use different costs? */
5357 switch (code)
5358 {
5359 case MULT:
5360 /* There is no point basing this on the tuning, since it is always the
5361 fast variant if it exists at all. */
5362 if (mode == DImode
5363 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5364 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5365 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5366 {
5367 *total = 8;
5368 return true;
5369 }
5370
5371
5372 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5373 || mode == DImode)
5374 {
5375 *total = 30;
5376 return true;
5377 }
5378
5379 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5380 {
5381 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5382 & (unsigned HOST_WIDE_INT) 0xffffffff);
5383 int cost, const_ok = const_ok_for_arm (i);
5384 int j, booth_unit_size;
5385
5386 /* Tune as appropriate. */
5387 cost = const_ok ? 4 : 8;
5388 booth_unit_size = 8;
5389 for (j = 0; i && j < 32; j += booth_unit_size)
5390 {
5391 i >>= booth_unit_size;
5392 cost += 2;
5393 }
5394
5395 *total = cost;
5396 return true;
5397 }
5398
5399 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5400 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5401 return true;
5402
5403 default:
5404 *total = arm_rtx_costs_1 (x, code, outer_code);
5405 return true;
5406 }
5407 }
5408
5409
5410 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
5411 so it can be ignored. */
5412
5413 static bool
5414 arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
5415 {
5416 enum machine_mode mode = GET_MODE (x);
5417
5418 if (TARGET_THUMB)
5419 {
5420 *total = thumb1_rtx_costs (x, code, outer_code);
5421 return true;
5422 }
5423
5424 switch (code)
5425 {
5426 case MULT:
5427 /* There is no point basing this on the tuning, since it is always the
5428 fast variant if it exists at all. */
5429 if (mode == DImode
5430 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5431 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5432 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5433 {
5434 *total = 8;
5435 return true;
5436 }
5437
5438
5439 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5440 || mode == DImode)
5441 {
5442 *total = 30;
5443 return true;
5444 }
5445
5446 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5447 {
5448 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5449 & (unsigned HOST_WIDE_INT) 0xffffffff);
5450 int cost, const_ok = const_ok_for_arm (i);
5451 unsigned HOST_WIDE_INT masked_const;
5452
5453 /* The cost will be related to two insns.
5454 First a load of the constant (MOV or LDR), then a multiply. */
5455 cost = 2;
5456 if (! const_ok)
5457 cost += 1; /* LDR is probably more expensive because
5458 of longer result latency. */
5459 masked_const = i & 0xffff8000;
5460 if (masked_const != 0 && masked_const != 0xffff8000)
5461 {
5462 masked_const = i & 0xf8000000;
5463 if (masked_const == 0 || masked_const == 0xf8000000)
5464 cost += 1;
5465 else
5466 cost += 2;
5467 }
5468 *total = cost;
5469 return true;
5470 }
5471
5472 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5473 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5474 return true;
5475
5476 case COMPARE:
5477 /* A COMPARE of a MULT is slow on XScale; the muls instruction
5478 will stall until the multiplication is complete. */
5479 if (GET_CODE (XEXP (x, 0)) == MULT)
5480 *total = 4 + rtx_cost (XEXP (x, 0), code);
5481 else
5482 *total = arm_rtx_costs_1 (x, code, outer_code);
5483 return true;
5484
5485 default:
5486 *total = arm_rtx_costs_1 (x, code, outer_code);
5487 return true;
5488 }
5489 }
5490
5491
5492 /* RTX costs for 9e (and later) cores. */
5493
5494 static bool
5495 arm_9e_rtx_costs (rtx x, int code, int outer_code, int *total)
5496 {
5497 enum machine_mode mode = GET_MODE (x);
5498 int nonreg_cost;
5499 int cost;
5500
5501 if (TARGET_THUMB1)
5502 {
5503 switch (code)
5504 {
5505 case MULT:
5506 *total = COSTS_N_INSNS (3);
5507 return true;
5508
5509 default:
5510 *total = thumb1_rtx_costs (x, code, outer_code);
5511 return true;
5512 }
5513 }
5514
5515 switch (code)
5516 {
5517 case MULT:
5518 /* There is no point basing this on the tuning, since it is always the
5519 fast variant if it exists at all. */
5520 if (mode == DImode
5521 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5522 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5523 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5524 {
5525 *total = 3;
5526 return true;
5527 }
5528
5529
5530 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5531 {
5532 *total = 30;
5533 return true;
5534 }
5535 if (mode == DImode)
5536 {
5537 cost = 7;
5538 nonreg_cost = 8;
5539 }
5540 else
5541 {
5542 cost = 2;
5543 nonreg_cost = 4;
5544 }
5545
5546
5547 *total = cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : nonreg_cost)
5548 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : nonreg_cost);
5549 return true;
5550
5551 default:
5552 *total = arm_rtx_costs_1 (x, code, outer_code);
5553 return true;
5554 }
5555 }
5556 /* All address computations that can be done are free, but rtx cost returns
5557 the same for practically all of them. So we weight the different types
5558 of address here in the order (most pref first):
5559 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
5560 static inline int
5561 arm_arm_address_cost (rtx x)
5562 {
5563 enum rtx_code c = GET_CODE (x);
5564
5565 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
5566 return 0;
5567 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
5568 return 10;
5569
5570 if (c == PLUS || c == MINUS)
5571 {
5572 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5573 return 2;
5574
5575 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
5576 return 3;
5577
5578 return 4;
5579 }
5580
5581 return 6;
5582 }
5583
5584 static inline int
5585 arm_thumb_address_cost (rtx x)
5586 {
5587 enum rtx_code c = GET_CODE (x);
5588
5589 if (c == REG)
5590 return 1;
5591 if (c == PLUS
5592 && GET_CODE (XEXP (x, 0)) == REG
5593 && GET_CODE (XEXP (x, 1)) == CONST_INT)
5594 return 1;
5595
5596 return 2;
5597 }
5598
5599 static int
5600 arm_address_cost (rtx x)
5601 {
5602 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
5603 }
5604
5605 static int
5606 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
5607 {
5608 rtx i_pat, d_pat;
5609
5610 /* Some true dependencies can have a higher cost depending
5611 on precisely how certain input operands are used. */
5612 if (arm_tune_xscale
5613 && REG_NOTE_KIND (link) == 0
5614 && recog_memoized (insn) >= 0
5615 && recog_memoized (dep) >= 0)
5616 {
5617 int shift_opnum = get_attr_shift (insn);
5618 enum attr_type attr_type = get_attr_type (dep);
5619
5620 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
5621 operand for INSN. If we have a shifted input operand and the
5622 instruction we depend on is another ALU instruction, then we may
5623 have to account for an additional stall. */
5624 if (shift_opnum != 0
5625 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
5626 {
5627 rtx shifted_operand;
5628 int opno;
5629
5630 /* Get the shifted operand. */
5631 extract_insn (insn);
5632 shifted_operand = recog_data.operand[shift_opnum];
5633
5634 /* Iterate over all the operands in DEP. If we write an operand
5635 that overlaps with SHIFTED_OPERAND, then we have increase the
5636 cost of this dependency. */
5637 extract_insn (dep);
5638 preprocess_constraints ();
5639 for (opno = 0; opno < recog_data.n_operands; opno++)
5640 {
5641 /* We can ignore strict inputs. */
5642 if (recog_data.operand_type[opno] == OP_IN)
5643 continue;
5644
5645 if (reg_overlap_mentioned_p (recog_data.operand[opno],
5646 shifted_operand))
5647 return 2;
5648 }
5649 }
5650 }
5651
5652 /* XXX This is not strictly true for the FPA. */
5653 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
5654 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
5655 return 0;
5656
5657 /* Call insns don't incur a stall, even if they follow a load. */
5658 if (REG_NOTE_KIND (link) == 0
5659 && GET_CODE (insn) == CALL_INSN)
5660 return 1;
5661
5662 if ((i_pat = single_set (insn)) != NULL
5663 && GET_CODE (SET_SRC (i_pat)) == MEM
5664 && (d_pat = single_set (dep)) != NULL
5665 && GET_CODE (SET_DEST (d_pat)) == MEM)
5666 {
5667 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
5668 /* This is a load after a store, there is no conflict if the load reads
5669 from a cached area. Assume that loads from the stack, and from the
5670 constant pool are cached, and that others will miss. This is a
5671 hack. */
5672
5673 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
5674 || reg_mentioned_p (stack_pointer_rtx, src_mem)
5675 || reg_mentioned_p (frame_pointer_rtx, src_mem)
5676 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
5677 return 1;
5678 }
5679
5680 return cost;
5681 }
5682
5683 static int fp_consts_inited = 0;
5684
5685 /* Only zero is valid for VFP. Other values are also valid for FPA. */
5686 static const char * const strings_fp[8] =
5687 {
5688 "0", "1", "2", "3",
5689 "4", "5", "0.5", "10"
5690 };
5691
5692 static REAL_VALUE_TYPE values_fp[8];
5693
5694 static void
5695 init_fp_table (void)
5696 {
5697 int i;
5698 REAL_VALUE_TYPE r;
5699
5700 if (TARGET_VFP)
5701 fp_consts_inited = 1;
5702 else
5703 fp_consts_inited = 8;
5704
5705 for (i = 0; i < fp_consts_inited; i++)
5706 {
5707 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
5708 values_fp[i] = r;
5709 }
5710 }
5711
5712 /* Return TRUE if rtx X is a valid immediate FP constant. */
5713 int
5714 arm_const_double_rtx (rtx x)
5715 {
5716 REAL_VALUE_TYPE r;
5717 int i;
5718
5719 if (!fp_consts_inited)
5720 init_fp_table ();
5721
5722 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5723 if (REAL_VALUE_MINUS_ZERO (r))
5724 return 0;
5725
5726 for (i = 0; i < fp_consts_inited; i++)
5727 if (REAL_VALUES_EQUAL (r, values_fp[i]))
5728 return 1;
5729
5730 return 0;
5731 }
5732
5733 /* Return TRUE if rtx X is a valid immediate FPA constant. */
5734 int
5735 neg_const_double_rtx_ok_for_fpa (rtx x)
5736 {
5737 REAL_VALUE_TYPE r;
5738 int i;
5739
5740 if (!fp_consts_inited)
5741 init_fp_table ();
5742
5743 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5744 r = REAL_VALUE_NEGATE (r);
5745 if (REAL_VALUE_MINUS_ZERO (r))
5746 return 0;
5747
5748 for (i = 0; i < 8; i++)
5749 if (REAL_VALUES_EQUAL (r, values_fp[i]))
5750 return 1;
5751
5752 return 0;
5753 }
5754
5755
5756 /* VFPv3 has a fairly wide range of representable immediates, formed from
5757 "quarter-precision" floating-point values. These can be evaluated using this
5758 formula (with ^ for exponentiation):
5759
5760 -1^s * n * 2^-r
5761
5762 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
5763 16 <= n <= 31 and 0 <= r <= 7.
5764
5765 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
5766
5767 - A (most-significant) is the sign bit.
5768 - BCD are the exponent (encoded as r XOR 3).
5769 - EFGH are the mantissa (encoded as n - 16).
5770 */
5771
5772 /* Return an integer index for a VFPv3 immediate operand X suitable for the
5773 fconst[sd] instruction, or -1 if X isn't suitable. */
5774 static int
5775 vfp3_const_double_index (rtx x)
5776 {
5777 REAL_VALUE_TYPE r, m;
5778 int sign, exponent;
5779 unsigned HOST_WIDE_INT mantissa, mant_hi;
5780 unsigned HOST_WIDE_INT mask;
5781 HOST_WIDE_INT m1, m2;
5782 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
5783
5784 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
5785 return -1;
5786
5787 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5788
5789 /* We can't represent these things, so detect them first. */
5790 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
5791 return -1;
5792
5793 /* Extract sign, exponent and mantissa. */
5794 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
5795 r = REAL_VALUE_ABS (r);
5796 exponent = REAL_EXP (&r);
5797 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
5798 highest (sign) bit, with a fixed binary point at bit point_pos.
5799 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
5800 bits for the mantissa, this may fail (low bits would be lost). */
5801 real_ldexp (&m, &r, point_pos - exponent);
5802 REAL_VALUE_TO_INT (&m1, &m2, m);
5803 mantissa = m1;
5804 mant_hi = m2;
5805
5806 /* If there are bits set in the low part of the mantissa, we can't
5807 represent this value. */
5808 if (mantissa != 0)
5809 return -1;
5810
5811 /* Now make it so that mantissa contains the most-significant bits, and move
5812 the point_pos to indicate that the least-significant bits have been
5813 discarded. */
5814 point_pos -= HOST_BITS_PER_WIDE_INT;
5815 mantissa = mant_hi;
5816
5817 /* We can permit four significant bits of mantissa only, plus a high bit
5818 which is always 1. */
5819 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
5820 if ((mantissa & mask) != 0)
5821 return -1;
5822
5823 /* Now we know the mantissa is in range, chop off the unneeded bits. */
5824 mantissa >>= point_pos - 5;
5825
5826 /* The mantissa may be zero. Disallow that case. (It's possible to load the
5827 floating-point immediate zero with Neon using an integer-zero load, but
5828 that case is handled elsewhere.) */
5829 if (mantissa == 0)
5830 return -1;
5831
5832 gcc_assert (mantissa >= 16 && mantissa <= 31);
5833
5834 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
5835 normalized significands are in the range [1, 2). (Our mantissa is shifted
5836 left 4 places at this point relative to normalized IEEE754 values). GCC
5837 internally uses [0.5, 1) (see real.c), so the exponent returned from
5838 REAL_EXP must be altered. */
5839 exponent = 5 - exponent;
5840
5841 if (exponent < 0 || exponent > 7)
5842 return -1;
5843
5844 /* Sign, mantissa and exponent are now in the correct form to plug into the
5845 formula described in the comment above. */
5846 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
5847 }
5848
5849 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
5850 int
5851 vfp3_const_double_rtx (rtx x)
5852 {
5853 if (!TARGET_VFP3)
5854 return 0;
5855
5856 return vfp3_const_double_index (x) != -1;
5857 }
5858
5859 /* Recognize immediates which can be used in various Neon instructions. Legal
5860 immediates are described by the following table (for VMVN variants, the
5861 bitwise inverse of the constant shown is recognized. In either case, VMOV
5862 is output and the correct instruction to use for a given constant is chosen
5863 by the assembler). The constant shown is replicated across all elements of
5864 the destination vector.
5865
5866 insn elems variant constant (binary)
5867 ---- ----- ------- -----------------
5868 vmov i32 0 00000000 00000000 00000000 abcdefgh
5869 vmov i32 1 00000000 00000000 abcdefgh 00000000
5870 vmov i32 2 00000000 abcdefgh 00000000 00000000
5871 vmov i32 3 abcdefgh 00000000 00000000 00000000
5872 vmov i16 4 00000000 abcdefgh
5873 vmov i16 5 abcdefgh 00000000
5874 vmvn i32 6 00000000 00000000 00000000 abcdefgh
5875 vmvn i32 7 00000000 00000000 abcdefgh 00000000
5876 vmvn i32 8 00000000 abcdefgh 00000000 00000000
5877 vmvn i32 9 abcdefgh 00000000 00000000 00000000
5878 vmvn i16 10 00000000 abcdefgh
5879 vmvn i16 11 abcdefgh 00000000
5880 vmov i32 12 00000000 00000000 abcdefgh 11111111
5881 vmvn i32 13 00000000 00000000 abcdefgh 11111111
5882 vmov i32 14 00000000 abcdefgh 11111111 11111111
5883 vmvn i32 15 00000000 abcdefgh 11111111 11111111
5884 vmov i8 16 abcdefgh
5885 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
5886 eeeeeeee ffffffff gggggggg hhhhhhhh
5887 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
5888
5889 For case 18, B = !b. Representable values are exactly those accepted by
5890 vfp3_const_double_index, but are output as floating-point numbers rather
5891 than indices.
5892
5893 Variants 0-5 (inclusive) may also be used as immediates for the second
5894 operand of VORR/VBIC instructions.
5895
5896 The INVERSE argument causes the bitwise inverse of the given operand to be
5897 recognized instead (used for recognizing legal immediates for the VAND/VORN
5898 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
5899 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
5900 output, rather than the real insns vbic/vorr).
5901
5902 INVERSE makes no difference to the recognition of float vectors.
5903
5904 The return value is the variant of immediate as shown in the above table, or
5905 -1 if the given value doesn't match any of the listed patterns.
5906 */
5907 static int
5908 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
5909 rtx *modconst, int *elementwidth)
5910 {
5911 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
5912 matches = 1; \
5913 for (i = 0; i < idx; i += (STRIDE)) \
5914 if (!(TEST)) \
5915 matches = 0; \
5916 if (matches) \
5917 { \
5918 immtype = (CLASS); \
5919 elsize = (ELSIZE); \
5920 break; \
5921 }
5922
5923 unsigned int i, elsize, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
5924 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
5925 unsigned char bytes[16];
5926 int immtype = -1, matches;
5927 unsigned int invmask = inverse ? 0xff : 0;
5928
5929 /* Vectors of float constants. */
5930 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5931 {
5932 rtx el0 = CONST_VECTOR_ELT (op, 0);
5933 REAL_VALUE_TYPE r0;
5934
5935 if (!vfp3_const_double_rtx (el0))
5936 return -1;
5937
5938 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
5939
5940 for (i = 1; i < n_elts; i++)
5941 {
5942 rtx elt = CONST_VECTOR_ELT (op, i);
5943 REAL_VALUE_TYPE re;
5944
5945 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
5946
5947 if (!REAL_VALUES_EQUAL (r0, re))
5948 return -1;
5949 }
5950
5951 if (modconst)
5952 *modconst = CONST_VECTOR_ELT (op, 0);
5953
5954 if (elementwidth)
5955 *elementwidth = 0;
5956
5957 return 18;
5958 }
5959
5960 /* Splat vector constant out into a byte vector. */
5961 for (i = 0; i < n_elts; i++)
5962 {
5963 rtx el = CONST_VECTOR_ELT (op, i);
5964 unsigned HOST_WIDE_INT elpart;
5965 unsigned int part, parts;
5966
5967 if (GET_CODE (el) == CONST_INT)
5968 {
5969 elpart = INTVAL (el);
5970 parts = 1;
5971 }
5972 else if (GET_CODE (el) == CONST_DOUBLE)
5973 {
5974 elpart = CONST_DOUBLE_LOW (el);
5975 parts = 2;
5976 }
5977 else
5978 gcc_unreachable ();
5979
5980 for (part = 0; part < parts; part++)
5981 {
5982 unsigned int byte;
5983 for (byte = 0; byte < innersize; byte++)
5984 {
5985 bytes[idx++] = (elpart & 0xff) ^ invmask;
5986 elpart >>= BITS_PER_UNIT;
5987 }
5988 if (GET_CODE (el) == CONST_DOUBLE)
5989 elpart = CONST_DOUBLE_HIGH (el);
5990 }
5991 }
5992
5993 /* Sanity check. */
5994 gcc_assert (idx == GET_MODE_SIZE (mode));
5995
5996 do
5997 {
5998 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
5999 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6000
6001 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6002 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6003
6004 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6005 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6006
6007 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6008 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
6009
6010 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
6011
6012 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
6013
6014 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6015 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6016
6017 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6018 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6019
6020 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6021 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6022
6023 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6024 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
6025
6026 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
6027
6028 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
6029
6030 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6031 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6032
6033 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6034 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6035
6036 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6037 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6038
6039 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6040 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6041
6042 CHECK (1, 8, 16, bytes[i] == bytes[0]);
6043
6044 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6045 && bytes[i] == bytes[(i + 8) % idx]);
6046 }
6047 while (0);
6048
6049 if (immtype == -1)
6050 return -1;
6051
6052 if (elementwidth)
6053 *elementwidth = elsize;
6054
6055 if (modconst)
6056 {
6057 unsigned HOST_WIDE_INT imm = 0;
6058
6059 /* Un-invert bytes of recognized vector, if necessary. */
6060 if (invmask != 0)
6061 for (i = 0; i < idx; i++)
6062 bytes[i] ^= invmask;
6063
6064 if (immtype == 17)
6065 {
6066 /* FIXME: Broken on 32-bit H_W_I hosts. */
6067 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6068
6069 for (i = 0; i < 8; i++)
6070 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6071 << (i * BITS_PER_UNIT);
6072
6073 *modconst = GEN_INT (imm);
6074 }
6075 else
6076 {
6077 unsigned HOST_WIDE_INT imm = 0;
6078
6079 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6080 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6081
6082 *modconst = GEN_INT (imm);
6083 }
6084 }
6085
6086 return immtype;
6087 #undef CHECK
6088 }
6089
6090 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
6091 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
6092 float elements), and a modified constant (whatever should be output for a
6093 VMOV) in *MODCONST. */
6094
6095 int
6096 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
6097 rtx *modconst, int *elementwidth)
6098 {
6099 rtx tmpconst;
6100 int tmpwidth;
6101 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
6102
6103 if (retval == -1)
6104 return 0;
6105
6106 if (modconst)
6107 *modconst = tmpconst;
6108
6109 if (elementwidth)
6110 *elementwidth = tmpwidth;
6111
6112 return 1;
6113 }
6114
6115 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
6116 the immediate is valid, write a constant suitable for using as an operand
6117 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
6118 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
6119
6120 int
6121 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
6122 rtx *modconst, int *elementwidth)
6123 {
6124 rtx tmpconst;
6125 int tmpwidth;
6126 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
6127
6128 if (retval < 0 || retval > 5)
6129 return 0;
6130
6131 if (modconst)
6132 *modconst = tmpconst;
6133
6134 if (elementwidth)
6135 *elementwidth = tmpwidth;
6136
6137 return 1;
6138 }
6139
6140 /* Return a string suitable for output of Neon immediate logic operation
6141 MNEM. */
6142
6143 char *
6144 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
6145 int inverse, int quad)
6146 {
6147 int width, is_valid;
6148 static char templ[40];
6149
6150 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
6151
6152 gcc_assert (is_valid != 0);
6153
6154 if (quad)
6155 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
6156 else
6157 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
6158
6159 return templ;
6160 }
6161
6162 /* Output a sequence of pairwise operations to implement a reduction.
6163 NOTE: We do "too much work" here, because pairwise operations work on two
6164 registers-worth of operands in one go. Unfortunately we can't exploit those
6165 extra calculations to do the full operation in fewer steps, I don't think.
6166 Although all vector elements of the result but the first are ignored, we
6167 actually calculate the same result in each of the elements. An alternative
6168 such as initially loading a vector with zero to use as each of the second
6169 operands would use up an additional register and take an extra instruction,
6170 for no particular gain. */
6171
6172 void
6173 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
6174 rtx (*reduc) (rtx, rtx, rtx))
6175 {
6176 enum machine_mode inner = GET_MODE_INNER (mode);
6177 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
6178 rtx tmpsum = op1;
6179
6180 for (i = parts / 2; i >= 1; i /= 2)
6181 {
6182 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
6183 emit_insn (reduc (dest, tmpsum, tmpsum));
6184 tmpsum = dest;
6185 }
6186 }
6187
6188 /* Initialize a vector with non-constant elements. FIXME: We can do better
6189 than the current implementation (building a vector on the stack and then
6190 loading it) in many cases. See rs6000.c. */
6191
6192 void
6193 neon_expand_vector_init (rtx target, rtx vals)
6194 {
6195 enum machine_mode mode = GET_MODE (target);
6196 enum machine_mode inner = GET_MODE_INNER (mode);
6197 unsigned int i, n_elts = GET_MODE_NUNITS (mode);
6198 rtx mem;
6199
6200 gcc_assert (VECTOR_MODE_P (mode));
6201
6202 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
6203 for (i = 0; i < n_elts; i++)
6204 emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
6205 XVECEXP (vals, 0, i));
6206
6207 emit_move_insn (target, mem);
6208 }
6209
6210 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
6211 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
6212 reported source locations are bogus. */
6213
6214 static void
6215 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
6216 const char *err)
6217 {
6218 HOST_WIDE_INT lane;
6219
6220 gcc_assert (GET_CODE (operand) == CONST_INT);
6221
6222 lane = INTVAL (operand);
6223
6224 if (lane < low || lane >= high)
6225 error (err);
6226 }
6227
6228 /* Bounds-check lanes. */
6229
6230 void
6231 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6232 {
6233 bounds_check (operand, low, high, "lane out of range");
6234 }
6235
6236 /* Bounds-check constants. */
6237
6238 void
6239 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6240 {
6241 bounds_check (operand, low, high, "constant out of range");
6242 }
6243
6244 HOST_WIDE_INT
6245 neon_element_bits (enum machine_mode mode)
6246 {
6247 if (mode == DImode)
6248 return GET_MODE_BITSIZE (mode);
6249 else
6250 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
6251 }
6252
6253 \f
6254 /* Predicates for `match_operand' and `match_operator'. */
6255
6256 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
6257 int
6258 cirrus_memory_offset (rtx op)
6259 {
6260 /* Reject eliminable registers. */
6261 if (! (reload_in_progress || reload_completed)
6262 && ( reg_mentioned_p (frame_pointer_rtx, op)
6263 || reg_mentioned_p (arg_pointer_rtx, op)
6264 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6265 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6266 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6267 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6268 return 0;
6269
6270 if (GET_CODE (op) == MEM)
6271 {
6272 rtx ind;
6273
6274 ind = XEXP (op, 0);
6275
6276 /* Match: (mem (reg)). */
6277 if (GET_CODE (ind) == REG)
6278 return 1;
6279
6280 /* Match:
6281 (mem (plus (reg)
6282 (const))). */
6283 if (GET_CODE (ind) == PLUS
6284 && GET_CODE (XEXP (ind, 0)) == REG
6285 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6286 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
6287 return 1;
6288 }
6289
6290 return 0;
6291 }
6292
6293 /* Return TRUE if OP is a valid coprocessor memory address pattern.
6294 WB is true if full writeback address modes are allowed and is false
6295 if limited writeback address modes (POST_INC and PRE_DEC) are
6296 allowed. */
6297
6298 int
6299 arm_coproc_mem_operand (rtx op, bool wb)
6300 {
6301 rtx ind;
6302
6303 /* Reject eliminable registers. */
6304 if (! (reload_in_progress || reload_completed)
6305 && ( reg_mentioned_p (frame_pointer_rtx, op)
6306 || reg_mentioned_p (arg_pointer_rtx, op)
6307 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6308 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6309 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6310 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6311 return FALSE;
6312
6313 /* Constants are converted into offsets from labels. */
6314 if (GET_CODE (op) != MEM)
6315 return FALSE;
6316
6317 ind = XEXP (op, 0);
6318
6319 if (reload_completed
6320 && (GET_CODE (ind) == LABEL_REF
6321 || (GET_CODE (ind) == CONST
6322 && GET_CODE (XEXP (ind, 0)) == PLUS
6323 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6324 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6325 return TRUE;
6326
6327 /* Match: (mem (reg)). */
6328 if (GET_CODE (ind) == REG)
6329 return arm_address_register_rtx_p (ind, 0);
6330
6331 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
6332 acceptable in any case (subject to verification by
6333 arm_address_register_rtx_p). We need WB to be true to accept
6334 PRE_INC and POST_DEC. */
6335 if (GET_CODE (ind) == POST_INC
6336 || GET_CODE (ind) == PRE_DEC
6337 || (wb
6338 && (GET_CODE (ind) == PRE_INC
6339 || GET_CODE (ind) == POST_DEC)))
6340 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
6341
6342 if (wb
6343 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
6344 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
6345 && GET_CODE (XEXP (ind, 1)) == PLUS
6346 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
6347 ind = XEXP (ind, 1);
6348
6349 /* Match:
6350 (plus (reg)
6351 (const)). */
6352 if (GET_CODE (ind) == PLUS
6353 && GET_CODE (XEXP (ind, 0)) == REG
6354 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6355 && GET_CODE (XEXP (ind, 1)) == CONST_INT
6356 && INTVAL (XEXP (ind, 1)) > -1024
6357 && INTVAL (XEXP (ind, 1)) < 1024
6358 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
6359 return TRUE;
6360
6361 return FALSE;
6362 }
6363
6364 /* Return TRUE if OP is a memory operand which we can load or store a vector
6365 to/from. If CORE is true, we're moving from ARM registers not Neon
6366 registers. */
6367 int
6368 neon_vector_mem_operand (rtx op, bool core)
6369 {
6370 rtx ind;
6371
6372 /* Reject eliminable registers. */
6373 if (! (reload_in_progress || reload_completed)
6374 && ( reg_mentioned_p (frame_pointer_rtx, op)
6375 || reg_mentioned_p (arg_pointer_rtx, op)
6376 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6377 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6378 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6379 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6380 return FALSE;
6381
6382 /* Constants are converted into offsets from labels. */
6383 if (GET_CODE (op) != MEM)
6384 return FALSE;
6385
6386 ind = XEXP (op, 0);
6387
6388 if (reload_completed
6389 && (GET_CODE (ind) == LABEL_REF
6390 || (GET_CODE (ind) == CONST
6391 && GET_CODE (XEXP (ind, 0)) == PLUS
6392 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6393 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6394 return TRUE;
6395
6396 /* Match: (mem (reg)). */
6397 if (GET_CODE (ind) == REG)
6398 return arm_address_register_rtx_p (ind, 0);
6399
6400 /* Allow post-increment with Neon registers. */
6401 if (!core && GET_CODE (ind) == POST_INC)
6402 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
6403
6404 #if 0
6405 /* FIXME: We can support this too if we use VLD1/VST1. */
6406 if (!core
6407 && GET_CODE (ind) == POST_MODIFY
6408 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
6409 && GET_CODE (XEXP (ind, 1)) == PLUS
6410 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
6411 ind = XEXP (ind, 1);
6412 #endif
6413
6414 /* Match:
6415 (plus (reg)
6416 (const)). */
6417 if (!core
6418 && GET_CODE (ind) == PLUS
6419 && GET_CODE (XEXP (ind, 0)) == REG
6420 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6421 && GET_CODE (XEXP (ind, 1)) == CONST_INT
6422 && INTVAL (XEXP (ind, 1)) > -1024
6423 && INTVAL (XEXP (ind, 1)) < 1016
6424 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
6425 return TRUE;
6426
6427 return FALSE;
6428 }
6429
6430 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
6431 type. */
6432 int
6433 neon_struct_mem_operand (rtx op)
6434 {
6435 rtx ind;
6436
6437 /* Reject eliminable registers. */
6438 if (! (reload_in_progress || reload_completed)
6439 && ( reg_mentioned_p (frame_pointer_rtx, op)
6440 || reg_mentioned_p (arg_pointer_rtx, op)
6441 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6442 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6443 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6444 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6445 return FALSE;
6446
6447 /* Constants are converted into offsets from labels. */
6448 if (GET_CODE (op) != MEM)
6449 return FALSE;
6450
6451 ind = XEXP (op, 0);
6452
6453 if (reload_completed
6454 && (GET_CODE (ind) == LABEL_REF
6455 || (GET_CODE (ind) == CONST
6456 && GET_CODE (XEXP (ind, 0)) == PLUS
6457 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6458 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6459 return TRUE;
6460
6461 /* Match: (mem (reg)). */
6462 if (GET_CODE (ind) == REG)
6463 return arm_address_register_rtx_p (ind, 0);
6464
6465 return FALSE;
6466 }
6467
6468 /* Return true if X is a register that will be eliminated later on. */
6469 int
6470 arm_eliminable_register (rtx x)
6471 {
6472 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
6473 || REGNO (x) == ARG_POINTER_REGNUM
6474 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
6475 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
6476 }
6477
6478 /* Return GENERAL_REGS if a scratch register required to reload x to/from
6479 coprocessor registers. Otherwise return NO_REGS. */
6480
6481 enum reg_class
6482 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
6483 {
6484 if (TARGET_NEON
6485 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6486 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6487 && neon_vector_mem_operand (x, FALSE))
6488 return NO_REGS;
6489
6490 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
6491 return NO_REGS;
6492
6493 return GENERAL_REGS;
6494 }
6495
6496 /* Values which must be returned in the most-significant end of the return
6497 register. */
6498
6499 static bool
6500 arm_return_in_msb (const_tree valtype)
6501 {
6502 return (TARGET_AAPCS_BASED
6503 && BYTES_BIG_ENDIAN
6504 && (AGGREGATE_TYPE_P (valtype)
6505 || TREE_CODE (valtype) == COMPLEX_TYPE));
6506 }
6507
6508 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
6509 Use by the Cirrus Maverick code which has to workaround
6510 a hardware bug triggered by such instructions. */
6511 static bool
6512 arm_memory_load_p (rtx insn)
6513 {
6514 rtx body, lhs, rhs;;
6515
6516 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
6517 return false;
6518
6519 body = PATTERN (insn);
6520
6521 if (GET_CODE (body) != SET)
6522 return false;
6523
6524 lhs = XEXP (body, 0);
6525 rhs = XEXP (body, 1);
6526
6527 lhs = REG_OR_SUBREG_RTX (lhs);
6528
6529 /* If the destination is not a general purpose
6530 register we do not have to worry. */
6531 if (GET_CODE (lhs) != REG
6532 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
6533 return false;
6534
6535 /* As well as loads from memory we also have to react
6536 to loads of invalid constants which will be turned
6537 into loads from the minipool. */
6538 return (GET_CODE (rhs) == MEM
6539 || GET_CODE (rhs) == SYMBOL_REF
6540 || note_invalid_constants (insn, -1, false));
6541 }
6542
6543 /* Return TRUE if INSN is a Cirrus instruction. */
6544 static bool
6545 arm_cirrus_insn_p (rtx insn)
6546 {
6547 enum attr_cirrus attr;
6548
6549 /* get_attr cannot accept USE or CLOBBER. */
6550 if (!insn
6551 || GET_CODE (insn) != INSN
6552 || GET_CODE (PATTERN (insn)) == USE
6553 || GET_CODE (PATTERN (insn)) == CLOBBER)
6554 return 0;
6555
6556 attr = get_attr_cirrus (insn);
6557
6558 return attr != CIRRUS_NOT;
6559 }
6560
6561 /* Cirrus reorg for invalid instruction combinations. */
6562 static void
6563 cirrus_reorg (rtx first)
6564 {
6565 enum attr_cirrus attr;
6566 rtx body = PATTERN (first);
6567 rtx t;
6568 int nops;
6569
6570 /* Any branch must be followed by 2 non Cirrus instructions. */
6571 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
6572 {
6573 nops = 0;
6574 t = next_nonnote_insn (first);
6575
6576 if (arm_cirrus_insn_p (t))
6577 ++ nops;
6578
6579 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
6580 ++ nops;
6581
6582 while (nops --)
6583 emit_insn_after (gen_nop (), first);
6584
6585 return;
6586 }
6587
6588 /* (float (blah)) is in parallel with a clobber. */
6589 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
6590 body = XVECEXP (body, 0, 0);
6591
6592 if (GET_CODE (body) == SET)
6593 {
6594 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
6595
6596 /* cfldrd, cfldr64, cfstrd, cfstr64 must
6597 be followed by a non Cirrus insn. */
6598 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
6599 {
6600 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
6601 emit_insn_after (gen_nop (), first);
6602
6603 return;
6604 }
6605 else if (arm_memory_load_p (first))
6606 {
6607 unsigned int arm_regno;
6608
6609 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
6610 ldr/cfmv64hr combination where the Rd field is the same
6611 in both instructions must be split with a non Cirrus
6612 insn. Example:
6613
6614 ldr r0, blah
6615 nop
6616 cfmvsr mvf0, r0. */
6617
6618 /* Get Arm register number for ldr insn. */
6619 if (GET_CODE (lhs) == REG)
6620 arm_regno = REGNO (lhs);
6621 else
6622 {
6623 gcc_assert (GET_CODE (rhs) == REG);
6624 arm_regno = REGNO (rhs);
6625 }
6626
6627 /* Next insn. */
6628 first = next_nonnote_insn (first);
6629
6630 if (! arm_cirrus_insn_p (first))
6631 return;
6632
6633 body = PATTERN (first);
6634
6635 /* (float (blah)) is in parallel with a clobber. */
6636 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
6637 body = XVECEXP (body, 0, 0);
6638
6639 if (GET_CODE (body) == FLOAT)
6640 body = XEXP (body, 0);
6641
6642 if (get_attr_cirrus (first) == CIRRUS_MOVE
6643 && GET_CODE (XEXP (body, 1)) == REG
6644 && arm_regno == REGNO (XEXP (body, 1)))
6645 emit_insn_after (gen_nop (), first);
6646
6647 return;
6648 }
6649 }
6650
6651 /* get_attr cannot accept USE or CLOBBER. */
6652 if (!first
6653 || GET_CODE (first) != INSN
6654 || GET_CODE (PATTERN (first)) == USE
6655 || GET_CODE (PATTERN (first)) == CLOBBER)
6656 return;
6657
6658 attr = get_attr_cirrus (first);
6659
6660 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
6661 must be followed by a non-coprocessor instruction. */
6662 if (attr == CIRRUS_COMPARE)
6663 {
6664 nops = 0;
6665
6666 t = next_nonnote_insn (first);
6667
6668 if (arm_cirrus_insn_p (t))
6669 ++ nops;
6670
6671 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
6672 ++ nops;
6673
6674 while (nops --)
6675 emit_insn_after (gen_nop (), first);
6676
6677 return;
6678 }
6679 }
6680
6681 /* Return TRUE if X references a SYMBOL_REF. */
6682 int
6683 symbol_mentioned_p (rtx x)
6684 {
6685 const char * fmt;
6686 int i;
6687
6688 if (GET_CODE (x) == SYMBOL_REF)
6689 return 1;
6690
6691 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
6692 are constant offsets, not symbols. */
6693 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
6694 return 0;
6695
6696 fmt = GET_RTX_FORMAT (GET_CODE (x));
6697
6698 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6699 {
6700 if (fmt[i] == 'E')
6701 {
6702 int j;
6703
6704 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6705 if (symbol_mentioned_p (XVECEXP (x, i, j)))
6706 return 1;
6707 }
6708 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
6709 return 1;
6710 }
6711
6712 return 0;
6713 }
6714
6715 /* Return TRUE if X references a LABEL_REF. */
6716 int
6717 label_mentioned_p (rtx x)
6718 {
6719 const char * fmt;
6720 int i;
6721
6722 if (GET_CODE (x) == LABEL_REF)
6723 return 1;
6724
6725 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
6726 instruction, but they are constant offsets, not symbols. */
6727 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
6728 return 0;
6729
6730 fmt = GET_RTX_FORMAT (GET_CODE (x));
6731 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6732 {
6733 if (fmt[i] == 'E')
6734 {
6735 int j;
6736
6737 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6738 if (label_mentioned_p (XVECEXP (x, i, j)))
6739 return 1;
6740 }
6741 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
6742 return 1;
6743 }
6744
6745 return 0;
6746 }
6747
6748 int
6749 tls_mentioned_p (rtx x)
6750 {
6751 switch (GET_CODE (x))
6752 {
6753 case CONST:
6754 return tls_mentioned_p (XEXP (x, 0));
6755
6756 case UNSPEC:
6757 if (XINT (x, 1) == UNSPEC_TLS)
6758 return 1;
6759
6760 default:
6761 return 0;
6762 }
6763 }
6764
6765 /* Must not copy a SET whose source operand is PC-relative. */
6766
6767 static bool
6768 arm_cannot_copy_insn_p (rtx insn)
6769 {
6770 rtx pat = PATTERN (insn);
6771
6772 if (GET_CODE (pat) == SET)
6773 {
6774 rtx rhs = SET_SRC (pat);
6775
6776 if (GET_CODE (rhs) == UNSPEC
6777 && XINT (rhs, 1) == UNSPEC_PIC_BASE)
6778 return TRUE;
6779
6780 if (GET_CODE (rhs) == MEM
6781 && GET_CODE (XEXP (rhs, 0)) == UNSPEC
6782 && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
6783 return TRUE;
6784 }
6785
6786 return FALSE;
6787 }
6788
6789 enum rtx_code
6790 minmax_code (rtx x)
6791 {
6792 enum rtx_code code = GET_CODE (x);
6793
6794 switch (code)
6795 {
6796 case SMAX:
6797 return GE;
6798 case SMIN:
6799 return LE;
6800 case UMIN:
6801 return LEU;
6802 case UMAX:
6803 return GEU;
6804 default:
6805 gcc_unreachable ();
6806 }
6807 }
6808
6809 /* Return 1 if memory locations are adjacent. */
6810 int
6811 adjacent_mem_locations (rtx a, rtx b)
6812 {
6813 /* We don't guarantee to preserve the order of these memory refs. */
6814 if (volatile_refs_p (a) || volatile_refs_p (b))
6815 return 0;
6816
6817 if ((GET_CODE (XEXP (a, 0)) == REG
6818 || (GET_CODE (XEXP (a, 0)) == PLUS
6819 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
6820 && (GET_CODE (XEXP (b, 0)) == REG
6821 || (GET_CODE (XEXP (b, 0)) == PLUS
6822 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
6823 {
6824 HOST_WIDE_INT val0 = 0, val1 = 0;
6825 rtx reg0, reg1;
6826 int val_diff;
6827
6828 if (GET_CODE (XEXP (a, 0)) == PLUS)
6829 {
6830 reg0 = XEXP (XEXP (a, 0), 0);
6831 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
6832 }
6833 else
6834 reg0 = XEXP (a, 0);
6835
6836 if (GET_CODE (XEXP (b, 0)) == PLUS)
6837 {
6838 reg1 = XEXP (XEXP (b, 0), 0);
6839 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
6840 }
6841 else
6842 reg1 = XEXP (b, 0);
6843
6844 /* Don't accept any offset that will require multiple
6845 instructions to handle, since this would cause the
6846 arith_adjacentmem pattern to output an overlong sequence. */
6847 if (!const_ok_for_op (PLUS, val0) || !const_ok_for_op (PLUS, val1))
6848 return 0;
6849
6850 /* Don't allow an eliminable register: register elimination can make
6851 the offset too large. */
6852 if (arm_eliminable_register (reg0))
6853 return 0;
6854
6855 val_diff = val1 - val0;
6856
6857 if (arm_ld_sched)
6858 {
6859 /* If the target has load delay slots, then there's no benefit
6860 to using an ldm instruction unless the offset is zero and
6861 we are optimizing for size. */
6862 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
6863 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
6864 && (val_diff == 4 || val_diff == -4));
6865 }
6866
6867 return ((REGNO (reg0) == REGNO (reg1))
6868 && (val_diff == 4 || val_diff == -4));
6869 }
6870
6871 return 0;
6872 }
6873
6874 int
6875 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
6876 HOST_WIDE_INT *load_offset)
6877 {
6878 int unsorted_regs[4];
6879 HOST_WIDE_INT unsorted_offsets[4];
6880 int order[4];
6881 int base_reg = -1;
6882 int i;
6883
6884 /* Can only handle 2, 3, or 4 insns at present,
6885 though could be easily extended if required. */
6886 gcc_assert (nops >= 2 && nops <= 4);
6887
6888 /* Loop over the operands and check that the memory references are
6889 suitable (i.e. immediate offsets from the same base register). At
6890 the same time, extract the target register, and the memory
6891 offsets. */
6892 for (i = 0; i < nops; i++)
6893 {
6894 rtx reg;
6895 rtx offset;
6896
6897 /* Convert a subreg of a mem into the mem itself. */
6898 if (GET_CODE (operands[nops + i]) == SUBREG)
6899 operands[nops + i] = alter_subreg (operands + (nops + i));
6900
6901 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
6902
6903 /* Don't reorder volatile memory references; it doesn't seem worth
6904 looking for the case where the order is ok anyway. */
6905 if (MEM_VOLATILE_P (operands[nops + i]))
6906 return 0;
6907
6908 offset = const0_rtx;
6909
6910 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
6911 || (GET_CODE (reg) == SUBREG
6912 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
6913 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
6914 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
6915 == REG)
6916 || (GET_CODE (reg) == SUBREG
6917 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
6918 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
6919 == CONST_INT)))
6920 {
6921 if (i == 0)
6922 {
6923 base_reg = REGNO (reg);
6924 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
6925 ? REGNO (operands[i])
6926 : REGNO (SUBREG_REG (operands[i])));
6927 order[0] = 0;
6928 }
6929 else
6930 {
6931 if (base_reg != (int) REGNO (reg))
6932 /* Not addressed from the same base register. */
6933 return 0;
6934
6935 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
6936 ? REGNO (operands[i])
6937 : REGNO (SUBREG_REG (operands[i])));
6938 if (unsorted_regs[i] < unsorted_regs[order[0]])
6939 order[0] = i;
6940 }
6941
6942 /* If it isn't an integer register, or if it overwrites the
6943 base register but isn't the last insn in the list, then
6944 we can't do this. */
6945 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
6946 || (i != nops - 1 && unsorted_regs[i] == base_reg))
6947 return 0;
6948
6949 unsorted_offsets[i] = INTVAL (offset);
6950 }
6951 else
6952 /* Not a suitable memory address. */
6953 return 0;
6954 }
6955
6956 /* All the useful information has now been extracted from the
6957 operands into unsorted_regs and unsorted_offsets; additionally,
6958 order[0] has been set to the lowest numbered register in the
6959 list. Sort the registers into order, and check that the memory
6960 offsets are ascending and adjacent. */
6961
6962 for (i = 1; i < nops; i++)
6963 {
6964 int j;
6965
6966 order[i] = order[i - 1];
6967 for (j = 0; j < nops; j++)
6968 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
6969 && (order[i] == order[i - 1]
6970 || unsorted_regs[j] < unsorted_regs[order[i]]))
6971 order[i] = j;
6972
6973 /* Have we found a suitable register? if not, one must be used more
6974 than once. */
6975 if (order[i] == order[i - 1])
6976 return 0;
6977
6978 /* Is the memory address adjacent and ascending? */
6979 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
6980 return 0;
6981 }
6982
6983 if (base)
6984 {
6985 *base = base_reg;
6986
6987 for (i = 0; i < nops; i++)
6988 regs[i] = unsorted_regs[order[i]];
6989
6990 *load_offset = unsorted_offsets[order[0]];
6991 }
6992
6993 if (unsorted_offsets[order[0]] == 0)
6994 return 1; /* ldmia */
6995
6996 if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
6997 return 2; /* ldmib */
6998
6999 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
7000 return 3; /* ldmda */
7001
7002 if (unsorted_offsets[order[nops - 1]] == -4)
7003 return 4; /* ldmdb */
7004
7005 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
7006 if the offset isn't small enough. The reason 2 ldrs are faster
7007 is because these ARMs are able to do more than one cache access
7008 in a single cycle. The ARM9 and StrongARM have Harvard caches,
7009 whilst the ARM8 has a double bandwidth cache. This means that
7010 these cores can do both an instruction fetch and a data fetch in
7011 a single cycle, so the trick of calculating the address into a
7012 scratch register (one of the result regs) and then doing a load
7013 multiple actually becomes slower (and no smaller in code size).
7014 That is the transformation
7015
7016 ldr rd1, [rbase + offset]
7017 ldr rd2, [rbase + offset + 4]
7018
7019 to
7020
7021 add rd1, rbase, offset
7022 ldmia rd1, {rd1, rd2}
7023
7024 produces worse code -- '3 cycles + any stalls on rd2' instead of
7025 '2 cycles + any stalls on rd2'. On ARMs with only one cache
7026 access per cycle, the first sequence could never complete in less
7027 than 6 cycles, whereas the ldm sequence would only take 5 and
7028 would make better use of sequential accesses if not hitting the
7029 cache.
7030
7031 We cheat here and test 'arm_ld_sched' which we currently know to
7032 only be true for the ARM8, ARM9 and StrongARM. If this ever
7033 changes, then the test below needs to be reworked. */
7034 if (nops == 2 && arm_ld_sched)
7035 return 0;
7036
7037 /* Can't do it without setting up the offset, only do this if it takes
7038 no more than one insn. */
7039 return (const_ok_for_arm (unsorted_offsets[order[0]])
7040 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
7041 }
7042
7043 const char *
7044 emit_ldm_seq (rtx *operands, int nops)
7045 {
7046 int regs[4];
7047 int base_reg;
7048 HOST_WIDE_INT offset;
7049 char buf[100];
7050 int i;
7051
7052 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7053 {
7054 case 1:
7055 strcpy (buf, "ldm%(ia%)\t");
7056 break;
7057
7058 case 2:
7059 strcpy (buf, "ldm%(ib%)\t");
7060 break;
7061
7062 case 3:
7063 strcpy (buf, "ldm%(da%)\t");
7064 break;
7065
7066 case 4:
7067 strcpy (buf, "ldm%(db%)\t");
7068 break;
7069
7070 case 5:
7071 if (offset >= 0)
7072 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7073 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7074 (long) offset);
7075 else
7076 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7077 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7078 (long) -offset);
7079 output_asm_insn (buf, operands);
7080 base_reg = regs[0];
7081 strcpy (buf, "ldm%(ia%)\t");
7082 break;
7083
7084 default:
7085 gcc_unreachable ();
7086 }
7087
7088 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7089 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7090
7091 for (i = 1; i < nops; i++)
7092 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7093 reg_names[regs[i]]);
7094
7095 strcat (buf, "}\t%@ phole ldm");
7096
7097 output_asm_insn (buf, operands);
7098 return "";
7099 }
7100
7101 int
7102 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
7103 HOST_WIDE_INT * load_offset)
7104 {
7105 int unsorted_regs[4];
7106 HOST_WIDE_INT unsorted_offsets[4];
7107 int order[4];
7108 int base_reg = -1;
7109 int i;
7110
7111 /* Can only handle 2, 3, or 4 insns at present, though could be easily
7112 extended if required. */
7113 gcc_assert (nops >= 2 && nops <= 4);
7114
7115 /* Loop over the operands and check that the memory references are
7116 suitable (i.e. immediate offsets from the same base register). At
7117 the same time, extract the target register, and the memory
7118 offsets. */
7119 for (i = 0; i < nops; i++)
7120 {
7121 rtx reg;
7122 rtx offset;
7123
7124 /* Convert a subreg of a mem into the mem itself. */
7125 if (GET_CODE (operands[nops + i]) == SUBREG)
7126 operands[nops + i] = alter_subreg (operands + (nops + i));
7127
7128 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
7129
7130 /* Don't reorder volatile memory references; it doesn't seem worth
7131 looking for the case where the order is ok anyway. */
7132 if (MEM_VOLATILE_P (operands[nops + i]))
7133 return 0;
7134
7135 offset = const0_rtx;
7136
7137 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
7138 || (GET_CODE (reg) == SUBREG
7139 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7140 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
7141 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
7142 == REG)
7143 || (GET_CODE (reg) == SUBREG
7144 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7145 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
7146 == CONST_INT)))
7147 {
7148 if (i == 0)
7149 {
7150 base_reg = REGNO (reg);
7151 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
7152 ? REGNO (operands[i])
7153 : REGNO (SUBREG_REG (operands[i])));
7154 order[0] = 0;
7155 }
7156 else
7157 {
7158 if (base_reg != (int) REGNO (reg))
7159 /* Not addressed from the same base register. */
7160 return 0;
7161
7162 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
7163 ? REGNO (operands[i])
7164 : REGNO (SUBREG_REG (operands[i])));
7165 if (unsorted_regs[i] < unsorted_regs[order[0]])
7166 order[0] = i;
7167 }
7168
7169 /* If it isn't an integer register, then we can't do this. */
7170 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
7171 return 0;
7172
7173 unsorted_offsets[i] = INTVAL (offset);
7174 }
7175 else
7176 /* Not a suitable memory address. */
7177 return 0;
7178 }
7179
7180 /* All the useful information has now been extracted from the
7181 operands into unsorted_regs and unsorted_offsets; additionally,
7182 order[0] has been set to the lowest numbered register in the
7183 list. Sort the registers into order, and check that the memory
7184 offsets are ascending and adjacent. */
7185
7186 for (i = 1; i < nops; i++)
7187 {
7188 int j;
7189
7190 order[i] = order[i - 1];
7191 for (j = 0; j < nops; j++)
7192 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
7193 && (order[i] == order[i - 1]
7194 || unsorted_regs[j] < unsorted_regs[order[i]]))
7195 order[i] = j;
7196
7197 /* Have we found a suitable register? if not, one must be used more
7198 than once. */
7199 if (order[i] == order[i - 1])
7200 return 0;
7201
7202 /* Is the memory address adjacent and ascending? */
7203 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7204 return 0;
7205 }
7206
7207 if (base)
7208 {
7209 *base = base_reg;
7210
7211 for (i = 0; i < nops; i++)
7212 regs[i] = unsorted_regs[order[i]];
7213
7214 *load_offset = unsorted_offsets[order[0]];
7215 }
7216
7217 if (unsorted_offsets[order[0]] == 0)
7218 return 1; /* stmia */
7219
7220 if (unsorted_offsets[order[0]] == 4)
7221 return 2; /* stmib */
7222
7223 if (unsorted_offsets[order[nops - 1]] == 0)
7224 return 3; /* stmda */
7225
7226 if (unsorted_offsets[order[nops - 1]] == -4)
7227 return 4; /* stmdb */
7228
7229 return 0;
7230 }
7231
7232 const char *
7233 emit_stm_seq (rtx *operands, int nops)
7234 {
7235 int regs[4];
7236 int base_reg;
7237 HOST_WIDE_INT offset;
7238 char buf[100];
7239 int i;
7240
7241 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7242 {
7243 case 1:
7244 strcpy (buf, "stm%(ia%)\t");
7245 break;
7246
7247 case 2:
7248 strcpy (buf, "stm%(ib%)\t");
7249 break;
7250
7251 case 3:
7252 strcpy (buf, "stm%(da%)\t");
7253 break;
7254
7255 case 4:
7256 strcpy (buf, "stm%(db%)\t");
7257 break;
7258
7259 default:
7260 gcc_unreachable ();
7261 }
7262
7263 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7264 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7265
7266 for (i = 1; i < nops; i++)
7267 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7268 reg_names[regs[i]]);
7269
7270 strcat (buf, "}\t%@ phole stm");
7271
7272 output_asm_insn (buf, operands);
7273 return "";
7274 }
7275 \f
7276 /* Routines for use in generating RTL. */
7277
7278 rtx
7279 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
7280 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
7281 {
7282 HOST_WIDE_INT offset = *offsetp;
7283 int i = 0, j;
7284 rtx result;
7285 int sign = up ? 1 : -1;
7286 rtx mem, addr;
7287
7288 /* XScale has load-store double instructions, but they have stricter
7289 alignment requirements than load-store multiple, so we cannot
7290 use them.
7291
7292 For XScale ldm requires 2 + NREGS cycles to complete and blocks
7293 the pipeline until completion.
7294
7295 NREGS CYCLES
7296 1 3
7297 2 4
7298 3 5
7299 4 6
7300
7301 An ldr instruction takes 1-3 cycles, but does not block the
7302 pipeline.
7303
7304 NREGS CYCLES
7305 1 1-3
7306 2 2-6
7307 3 3-9
7308 4 4-12
7309
7310 Best case ldr will always win. However, the more ldr instructions
7311 we issue, the less likely we are to be able to schedule them well.
7312 Using ldr instructions also increases code size.
7313
7314 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
7315 for counts of 3 or 4 regs. */
7316 if (arm_tune_xscale && count <= 2 && ! optimize_size)
7317 {
7318 rtx seq;
7319
7320 start_sequence ();
7321
7322 for (i = 0; i < count; i++)
7323 {
7324 addr = plus_constant (from, i * 4 * sign);
7325 mem = adjust_automodify_address (basemem, SImode, addr, offset);
7326 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
7327 offset += 4 * sign;
7328 }
7329
7330 if (write_back)
7331 {
7332 emit_move_insn (from, plus_constant (from, count * 4 * sign));
7333 *offsetp = offset;
7334 }
7335
7336 seq = get_insns ();
7337 end_sequence ();
7338
7339 return seq;
7340 }
7341
7342 result = gen_rtx_PARALLEL (VOIDmode,
7343 rtvec_alloc (count + (write_back ? 1 : 0)));
7344 if (write_back)
7345 {
7346 XVECEXP (result, 0, 0)
7347 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
7348 i = 1;
7349 count++;
7350 }
7351
7352 for (j = 0; i < count; i++, j++)
7353 {
7354 addr = plus_constant (from, j * 4 * sign);
7355 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
7356 XVECEXP (result, 0, i)
7357 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
7358 offset += 4 * sign;
7359 }
7360
7361 if (write_back)
7362 *offsetp = offset;
7363
7364 return result;
7365 }
7366
7367 rtx
7368 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
7369 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
7370 {
7371 HOST_WIDE_INT offset = *offsetp;
7372 int i = 0, j;
7373 rtx result;
7374 int sign = up ? 1 : -1;
7375 rtx mem, addr;
7376
7377 /* See arm_gen_load_multiple for discussion of
7378 the pros/cons of ldm/stm usage for XScale. */
7379 if (arm_tune_xscale && count <= 2 && ! optimize_size)
7380 {
7381 rtx seq;
7382
7383 start_sequence ();
7384
7385 for (i = 0; i < count; i++)
7386 {
7387 addr = plus_constant (to, i * 4 * sign);
7388 mem = adjust_automodify_address (basemem, SImode, addr, offset);
7389 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
7390 offset += 4 * sign;
7391 }
7392
7393 if (write_back)
7394 {
7395 emit_move_insn (to, plus_constant (to, count * 4 * sign));
7396 *offsetp = offset;
7397 }
7398
7399 seq = get_insns ();
7400 end_sequence ();
7401
7402 return seq;
7403 }
7404
7405 result = gen_rtx_PARALLEL (VOIDmode,
7406 rtvec_alloc (count + (write_back ? 1 : 0)));
7407 if (write_back)
7408 {
7409 XVECEXP (result, 0, 0)
7410 = gen_rtx_SET (VOIDmode, to,
7411 plus_constant (to, count * 4 * sign));
7412 i = 1;
7413 count++;
7414 }
7415
7416 for (j = 0; i < count; i++, j++)
7417 {
7418 addr = plus_constant (to, j * 4 * sign);
7419 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
7420 XVECEXP (result, 0, i)
7421 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
7422 offset += 4 * sign;
7423 }
7424
7425 if (write_back)
7426 *offsetp = offset;
7427
7428 return result;
7429 }
7430
7431 int
7432 arm_gen_movmemqi (rtx *operands)
7433 {
7434 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
7435 HOST_WIDE_INT srcoffset, dstoffset;
7436 int i;
7437 rtx src, dst, srcbase, dstbase;
7438 rtx part_bytes_reg = NULL;
7439 rtx mem;
7440
7441 if (GET_CODE (operands[2]) != CONST_INT
7442 || GET_CODE (operands[3]) != CONST_INT
7443 || INTVAL (operands[2]) > 64
7444 || INTVAL (operands[3]) & 3)
7445 return 0;
7446
7447 dstbase = operands[0];
7448 srcbase = operands[1];
7449
7450 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
7451 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
7452
7453 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
7454 out_words_to_go = INTVAL (operands[2]) / 4;
7455 last_bytes = INTVAL (operands[2]) & 3;
7456 dstoffset = srcoffset = 0;
7457
7458 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
7459 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
7460
7461 for (i = 0; in_words_to_go >= 2; i+=4)
7462 {
7463 if (in_words_to_go > 4)
7464 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
7465 srcbase, &srcoffset));
7466 else
7467 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
7468 FALSE, srcbase, &srcoffset));
7469
7470 if (out_words_to_go)
7471 {
7472 if (out_words_to_go > 4)
7473 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
7474 dstbase, &dstoffset));
7475 else if (out_words_to_go != 1)
7476 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
7477 dst, TRUE,
7478 (last_bytes == 0
7479 ? FALSE : TRUE),
7480 dstbase, &dstoffset));
7481 else
7482 {
7483 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
7484 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
7485 if (last_bytes != 0)
7486 {
7487 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
7488 dstoffset += 4;
7489 }
7490 }
7491 }
7492
7493 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
7494 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
7495 }
7496
7497 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
7498 if (out_words_to_go)
7499 {
7500 rtx sreg;
7501
7502 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
7503 sreg = copy_to_reg (mem);
7504
7505 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
7506 emit_move_insn (mem, sreg);
7507 in_words_to_go--;
7508
7509 gcc_assert (!in_words_to_go); /* Sanity check */
7510 }
7511
7512 if (in_words_to_go)
7513 {
7514 gcc_assert (in_words_to_go > 0);
7515
7516 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
7517 part_bytes_reg = copy_to_mode_reg (SImode, mem);
7518 }
7519
7520 gcc_assert (!last_bytes || part_bytes_reg);
7521
7522 if (BYTES_BIG_ENDIAN && last_bytes)
7523 {
7524 rtx tmp = gen_reg_rtx (SImode);
7525
7526 /* The bytes we want are in the top end of the word. */
7527 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
7528 GEN_INT (8 * (4 - last_bytes))));
7529 part_bytes_reg = tmp;
7530
7531 while (last_bytes)
7532 {
7533 mem = adjust_automodify_address (dstbase, QImode,
7534 plus_constant (dst, last_bytes - 1),
7535 dstoffset + last_bytes - 1);
7536 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
7537
7538 if (--last_bytes)
7539 {
7540 tmp = gen_reg_rtx (SImode);
7541 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
7542 part_bytes_reg = tmp;
7543 }
7544 }
7545
7546 }
7547 else
7548 {
7549 if (last_bytes > 1)
7550 {
7551 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
7552 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
7553 last_bytes -= 2;
7554 if (last_bytes)
7555 {
7556 rtx tmp = gen_reg_rtx (SImode);
7557 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
7558 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
7559 part_bytes_reg = tmp;
7560 dstoffset += 2;
7561 }
7562 }
7563
7564 if (last_bytes)
7565 {
7566 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
7567 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
7568 }
7569 }
7570
7571 return 1;
7572 }
7573
7574 /* Select a dominance comparison mode if possible for a test of the general
7575 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
7576 COND_OR == DOM_CC_X_AND_Y => (X && Y)
7577 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
7578 COND_OR == DOM_CC_X_OR_Y => (X || Y)
7579 In all cases OP will be either EQ or NE, but we don't need to know which
7580 here. If we are unable to support a dominance comparison we return
7581 CC mode. This will then fail to match for the RTL expressions that
7582 generate this call. */
7583 enum machine_mode
7584 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
7585 {
7586 enum rtx_code cond1, cond2;
7587 int swapped = 0;
7588
7589 /* Currently we will probably get the wrong result if the individual
7590 comparisons are not simple. This also ensures that it is safe to
7591 reverse a comparison if necessary. */
7592 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
7593 != CCmode)
7594 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
7595 != CCmode))
7596 return CCmode;
7597
7598 /* The if_then_else variant of this tests the second condition if the
7599 first passes, but is true if the first fails. Reverse the first
7600 condition to get a true "inclusive-or" expression. */
7601 if (cond_or == DOM_CC_NX_OR_Y)
7602 cond1 = reverse_condition (cond1);
7603
7604 /* If the comparisons are not equal, and one doesn't dominate the other,
7605 then we can't do this. */
7606 if (cond1 != cond2
7607 && !comparison_dominates_p (cond1, cond2)
7608 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
7609 return CCmode;
7610
7611 if (swapped)
7612 {
7613 enum rtx_code temp = cond1;
7614 cond1 = cond2;
7615 cond2 = temp;
7616 }
7617
7618 switch (cond1)
7619 {
7620 case EQ:
7621 if (cond_or == DOM_CC_X_AND_Y)
7622 return CC_DEQmode;
7623
7624 switch (cond2)
7625 {
7626 case EQ: return CC_DEQmode;
7627 case LE: return CC_DLEmode;
7628 case LEU: return CC_DLEUmode;
7629 case GE: return CC_DGEmode;
7630 case GEU: return CC_DGEUmode;
7631 default: gcc_unreachable ();
7632 }
7633
7634 case LT:
7635 if (cond_or == DOM_CC_X_AND_Y)
7636 return CC_DLTmode;
7637
7638 switch (cond2)
7639 {
7640 case LT:
7641 return CC_DLTmode;
7642 case LE:
7643 return CC_DLEmode;
7644 case NE:
7645 return CC_DNEmode;
7646 default:
7647 gcc_unreachable ();
7648 }
7649
7650 case GT:
7651 if (cond_or == DOM_CC_X_AND_Y)
7652 return CC_DGTmode;
7653
7654 switch (cond2)
7655 {
7656 case GT:
7657 return CC_DGTmode;
7658 case GE:
7659 return CC_DGEmode;
7660 case NE:
7661 return CC_DNEmode;
7662 default:
7663 gcc_unreachable ();
7664 }
7665
7666 case LTU:
7667 if (cond_or == DOM_CC_X_AND_Y)
7668 return CC_DLTUmode;
7669
7670 switch (cond2)
7671 {
7672 case LTU:
7673 return CC_DLTUmode;
7674 case LEU:
7675 return CC_DLEUmode;
7676 case NE:
7677 return CC_DNEmode;
7678 default:
7679 gcc_unreachable ();
7680 }
7681
7682 case GTU:
7683 if (cond_or == DOM_CC_X_AND_Y)
7684 return CC_DGTUmode;
7685
7686 switch (cond2)
7687 {
7688 case GTU:
7689 return CC_DGTUmode;
7690 case GEU:
7691 return CC_DGEUmode;
7692 case NE:
7693 return CC_DNEmode;
7694 default:
7695 gcc_unreachable ();
7696 }
7697
7698 /* The remaining cases only occur when both comparisons are the
7699 same. */
7700 case NE:
7701 gcc_assert (cond1 == cond2);
7702 return CC_DNEmode;
7703
7704 case LE:
7705 gcc_assert (cond1 == cond2);
7706 return CC_DLEmode;
7707
7708 case GE:
7709 gcc_assert (cond1 == cond2);
7710 return CC_DGEmode;
7711
7712 case LEU:
7713 gcc_assert (cond1 == cond2);
7714 return CC_DLEUmode;
7715
7716 case GEU:
7717 gcc_assert (cond1 == cond2);
7718 return CC_DGEUmode;
7719
7720 default:
7721 gcc_unreachable ();
7722 }
7723 }
7724
7725 enum machine_mode
7726 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
7727 {
7728 /* All floating point compares return CCFP if it is an equality
7729 comparison, and CCFPE otherwise. */
7730 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
7731 {
7732 switch (op)
7733 {
7734 case EQ:
7735 case NE:
7736 case UNORDERED:
7737 case ORDERED:
7738 case UNLT:
7739 case UNLE:
7740 case UNGT:
7741 case UNGE:
7742 case UNEQ:
7743 case LTGT:
7744 return CCFPmode;
7745
7746 case LT:
7747 case LE:
7748 case GT:
7749 case GE:
7750 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
7751 return CCFPmode;
7752 return CCFPEmode;
7753
7754 default:
7755 gcc_unreachable ();
7756 }
7757 }
7758
7759 /* A compare with a shifted operand. Because of canonicalization, the
7760 comparison will have to be swapped when we emit the assembler. */
7761 if (GET_MODE (y) == SImode && GET_CODE (y) == REG
7762 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
7763 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
7764 || GET_CODE (x) == ROTATERT))
7765 return CC_SWPmode;
7766
7767 /* This operation is performed swapped, but since we only rely on the Z
7768 flag we don't need an additional mode. */
7769 if (GET_MODE (y) == SImode && REG_P (y)
7770 && GET_CODE (x) == NEG
7771 && (op == EQ || op == NE))
7772 return CC_Zmode;
7773
7774 /* This is a special case that is used by combine to allow a
7775 comparison of a shifted byte load to be split into a zero-extend
7776 followed by a comparison of the shifted integer (only valid for
7777 equalities and unsigned inequalities). */
7778 if (GET_MODE (x) == SImode
7779 && GET_CODE (x) == ASHIFT
7780 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
7781 && GET_CODE (XEXP (x, 0)) == SUBREG
7782 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
7783 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
7784 && (op == EQ || op == NE
7785 || op == GEU || op == GTU || op == LTU || op == LEU)
7786 && GET_CODE (y) == CONST_INT)
7787 return CC_Zmode;
7788
7789 /* A construct for a conditional compare, if the false arm contains
7790 0, then both conditions must be true, otherwise either condition
7791 must be true. Not all conditions are possible, so CCmode is
7792 returned if it can't be done. */
7793 if (GET_CODE (x) == IF_THEN_ELSE
7794 && (XEXP (x, 2) == const0_rtx
7795 || XEXP (x, 2) == const1_rtx)
7796 && COMPARISON_P (XEXP (x, 0))
7797 && COMPARISON_P (XEXP (x, 1)))
7798 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7799 INTVAL (XEXP (x, 2)));
7800
7801 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
7802 if (GET_CODE (x) == AND
7803 && COMPARISON_P (XEXP (x, 0))
7804 && COMPARISON_P (XEXP (x, 1)))
7805 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7806 DOM_CC_X_AND_Y);
7807
7808 if (GET_CODE (x) == IOR
7809 && COMPARISON_P (XEXP (x, 0))
7810 && COMPARISON_P (XEXP (x, 1)))
7811 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7812 DOM_CC_X_OR_Y);
7813
7814 /* An operation (on Thumb) where we want to test for a single bit.
7815 This is done by shifting that bit up into the top bit of a
7816 scratch register; we can then branch on the sign bit. */
7817 if (TARGET_THUMB1
7818 && GET_MODE (x) == SImode
7819 && (op == EQ || op == NE)
7820 && GET_CODE (x) == ZERO_EXTRACT
7821 && XEXP (x, 1) == const1_rtx)
7822 return CC_Nmode;
7823
7824 /* An operation that sets the condition codes as a side-effect, the
7825 V flag is not set correctly, so we can only use comparisons where
7826 this doesn't matter. (For LT and GE we can use "mi" and "pl"
7827 instead.) */
7828 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
7829 if (GET_MODE (x) == SImode
7830 && y == const0_rtx
7831 && (op == EQ || op == NE || op == LT || op == GE)
7832 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
7833 || GET_CODE (x) == AND || GET_CODE (x) == IOR
7834 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
7835 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
7836 || GET_CODE (x) == LSHIFTRT
7837 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
7838 || GET_CODE (x) == ROTATERT
7839 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
7840 return CC_NOOVmode;
7841
7842 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
7843 return CC_Zmode;
7844
7845 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
7846 && GET_CODE (x) == PLUS
7847 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
7848 return CC_Cmode;
7849
7850 return CCmode;
7851 }
7852
7853 /* X and Y are two things to compare using CODE. Emit the compare insn and
7854 return the rtx for register 0 in the proper mode. FP means this is a
7855 floating point compare: I don't think that it is needed on the arm. */
7856 rtx
7857 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
7858 {
7859 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
7860 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
7861
7862 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
7863
7864 return cc_reg;
7865 }
7866
7867 /* Generate a sequence of insns that will generate the correct return
7868 address mask depending on the physical architecture that the program
7869 is running on. */
7870 rtx
7871 arm_gen_return_addr_mask (void)
7872 {
7873 rtx reg = gen_reg_rtx (Pmode);
7874
7875 emit_insn (gen_return_addr_mask (reg));
7876 return reg;
7877 }
7878
7879 void
7880 arm_reload_in_hi (rtx *operands)
7881 {
7882 rtx ref = operands[1];
7883 rtx base, scratch;
7884 HOST_WIDE_INT offset = 0;
7885
7886 if (GET_CODE (ref) == SUBREG)
7887 {
7888 offset = SUBREG_BYTE (ref);
7889 ref = SUBREG_REG (ref);
7890 }
7891
7892 if (GET_CODE (ref) == REG)
7893 {
7894 /* We have a pseudo which has been spilt onto the stack; there
7895 are two cases here: the first where there is a simple
7896 stack-slot replacement and a second where the stack-slot is
7897 out of range, or is used as a subreg. */
7898 if (reg_equiv_mem[REGNO (ref)])
7899 {
7900 ref = reg_equiv_mem[REGNO (ref)];
7901 base = find_replacement (&XEXP (ref, 0));
7902 }
7903 else
7904 /* The slot is out of range, or was dressed up in a SUBREG. */
7905 base = reg_equiv_address[REGNO (ref)];
7906 }
7907 else
7908 base = find_replacement (&XEXP (ref, 0));
7909
7910 /* Handle the case where the address is too complex to be offset by 1. */
7911 if (GET_CODE (base) == MINUS
7912 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
7913 {
7914 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7915
7916 emit_set_insn (base_plus, base);
7917 base = base_plus;
7918 }
7919 else if (GET_CODE (base) == PLUS)
7920 {
7921 /* The addend must be CONST_INT, or we would have dealt with it above. */
7922 HOST_WIDE_INT hi, lo;
7923
7924 offset += INTVAL (XEXP (base, 1));
7925 base = XEXP (base, 0);
7926
7927 /* Rework the address into a legal sequence of insns. */
7928 /* Valid range for lo is -4095 -> 4095 */
7929 lo = (offset >= 0
7930 ? (offset & 0xfff)
7931 : -((-offset) & 0xfff));
7932
7933 /* Corner case, if lo is the max offset then we would be out of range
7934 once we have added the additional 1 below, so bump the msb into the
7935 pre-loading insn(s). */
7936 if (lo == 4095)
7937 lo &= 0x7ff;
7938
7939 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
7940 ^ (HOST_WIDE_INT) 0x80000000)
7941 - (HOST_WIDE_INT) 0x80000000);
7942
7943 gcc_assert (hi + lo == offset);
7944
7945 if (hi != 0)
7946 {
7947 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7948
7949 /* Get the base address; addsi3 knows how to handle constants
7950 that require more than one insn. */
7951 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
7952 base = base_plus;
7953 offset = lo;
7954 }
7955 }
7956
7957 /* Operands[2] may overlap operands[0] (though it won't overlap
7958 operands[1]), that's why we asked for a DImode reg -- so we can
7959 use the bit that does not overlap. */
7960 if (REGNO (operands[2]) == REGNO (operands[0]))
7961 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7962 else
7963 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
7964
7965 emit_insn (gen_zero_extendqisi2 (scratch,
7966 gen_rtx_MEM (QImode,
7967 plus_constant (base,
7968 offset))));
7969 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
7970 gen_rtx_MEM (QImode,
7971 plus_constant (base,
7972 offset + 1))));
7973 if (!BYTES_BIG_ENDIAN)
7974 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
7975 gen_rtx_IOR (SImode,
7976 gen_rtx_ASHIFT
7977 (SImode,
7978 gen_rtx_SUBREG (SImode, operands[0], 0),
7979 GEN_INT (8)),
7980 scratch));
7981 else
7982 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
7983 gen_rtx_IOR (SImode,
7984 gen_rtx_ASHIFT (SImode, scratch,
7985 GEN_INT (8)),
7986 gen_rtx_SUBREG (SImode, operands[0], 0)));
7987 }
7988
7989 /* Handle storing a half-word to memory during reload by synthesizing as two
7990 byte stores. Take care not to clobber the input values until after we
7991 have moved them somewhere safe. This code assumes that if the DImode
7992 scratch in operands[2] overlaps either the input value or output address
7993 in some way, then that value must die in this insn (we absolutely need
7994 two scratch registers for some corner cases). */
7995 void
7996 arm_reload_out_hi (rtx *operands)
7997 {
7998 rtx ref = operands[0];
7999 rtx outval = operands[1];
8000 rtx base, scratch;
8001 HOST_WIDE_INT offset = 0;
8002
8003 if (GET_CODE (ref) == SUBREG)
8004 {
8005 offset = SUBREG_BYTE (ref);
8006 ref = SUBREG_REG (ref);
8007 }
8008
8009 if (GET_CODE (ref) == REG)
8010 {
8011 /* We have a pseudo which has been spilt onto the stack; there
8012 are two cases here: the first where there is a simple
8013 stack-slot replacement and a second where the stack-slot is
8014 out of range, or is used as a subreg. */
8015 if (reg_equiv_mem[REGNO (ref)])
8016 {
8017 ref = reg_equiv_mem[REGNO (ref)];
8018 base = find_replacement (&XEXP (ref, 0));
8019 }
8020 else
8021 /* The slot is out of range, or was dressed up in a SUBREG. */
8022 base = reg_equiv_address[REGNO (ref)];
8023 }
8024 else
8025 base = find_replacement (&XEXP (ref, 0));
8026
8027 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
8028
8029 /* Handle the case where the address is too complex to be offset by 1. */
8030 if (GET_CODE (base) == MINUS
8031 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
8032 {
8033 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8034
8035 /* Be careful not to destroy OUTVAL. */
8036 if (reg_overlap_mentioned_p (base_plus, outval))
8037 {
8038 /* Updating base_plus might destroy outval, see if we can
8039 swap the scratch and base_plus. */
8040 if (!reg_overlap_mentioned_p (scratch, outval))
8041 {
8042 rtx tmp = scratch;
8043 scratch = base_plus;
8044 base_plus = tmp;
8045 }
8046 else
8047 {
8048 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8049
8050 /* Be conservative and copy OUTVAL into the scratch now,
8051 this should only be necessary if outval is a subreg
8052 of something larger than a word. */
8053 /* XXX Might this clobber base? I can't see how it can,
8054 since scratch is known to overlap with OUTVAL, and
8055 must be wider than a word. */
8056 emit_insn (gen_movhi (scratch_hi, outval));
8057 outval = scratch_hi;
8058 }
8059 }
8060
8061 emit_set_insn (base_plus, base);
8062 base = base_plus;
8063 }
8064 else if (GET_CODE (base) == PLUS)
8065 {
8066 /* The addend must be CONST_INT, or we would have dealt with it above. */
8067 HOST_WIDE_INT hi, lo;
8068
8069 offset += INTVAL (XEXP (base, 1));
8070 base = XEXP (base, 0);
8071
8072 /* Rework the address into a legal sequence of insns. */
8073 /* Valid range for lo is -4095 -> 4095 */
8074 lo = (offset >= 0
8075 ? (offset & 0xfff)
8076 : -((-offset) & 0xfff));
8077
8078 /* Corner case, if lo is the max offset then we would be out of range
8079 once we have added the additional 1 below, so bump the msb into the
8080 pre-loading insn(s). */
8081 if (lo == 4095)
8082 lo &= 0x7ff;
8083
8084 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
8085 ^ (HOST_WIDE_INT) 0x80000000)
8086 - (HOST_WIDE_INT) 0x80000000);
8087
8088 gcc_assert (hi + lo == offset);
8089
8090 if (hi != 0)
8091 {
8092 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8093
8094 /* Be careful not to destroy OUTVAL. */
8095 if (reg_overlap_mentioned_p (base_plus, outval))
8096 {
8097 /* Updating base_plus might destroy outval, see if we
8098 can swap the scratch and base_plus. */
8099 if (!reg_overlap_mentioned_p (scratch, outval))
8100 {
8101 rtx tmp = scratch;
8102 scratch = base_plus;
8103 base_plus = tmp;
8104 }
8105 else
8106 {
8107 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8108
8109 /* Be conservative and copy outval into scratch now,
8110 this should only be necessary if outval is a
8111 subreg of something larger than a word. */
8112 /* XXX Might this clobber base? I can't see how it
8113 can, since scratch is known to overlap with
8114 outval. */
8115 emit_insn (gen_movhi (scratch_hi, outval));
8116 outval = scratch_hi;
8117 }
8118 }
8119
8120 /* Get the base address; addsi3 knows how to handle constants
8121 that require more than one insn. */
8122 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
8123 base = base_plus;
8124 offset = lo;
8125 }
8126 }
8127
8128 if (BYTES_BIG_ENDIAN)
8129 {
8130 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8131 plus_constant (base, offset + 1)),
8132 gen_lowpart (QImode, outval)));
8133 emit_insn (gen_lshrsi3 (scratch,
8134 gen_rtx_SUBREG (SImode, outval, 0),
8135 GEN_INT (8)));
8136 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8137 gen_lowpart (QImode, scratch)));
8138 }
8139 else
8140 {
8141 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8142 gen_lowpart (QImode, outval)));
8143 emit_insn (gen_lshrsi3 (scratch,
8144 gen_rtx_SUBREG (SImode, outval, 0),
8145 GEN_INT (8)));
8146 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8147 plus_constant (base, offset + 1)),
8148 gen_lowpart (QImode, scratch)));
8149 }
8150 }
8151
8152 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
8153 (padded to the size of a word) should be passed in a register. */
8154
8155 static bool
8156 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
8157 {
8158 if (TARGET_AAPCS_BASED)
8159 return must_pass_in_stack_var_size (mode, type);
8160 else
8161 return must_pass_in_stack_var_size_or_pad (mode, type);
8162 }
8163
8164
8165 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
8166 Return true if an argument passed on the stack should be padded upwards,
8167 i.e. if the least-significant byte has useful data.
8168 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
8169 aggregate types are placed in the lowest memory address. */
8170
8171 bool
8172 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
8173 {
8174 if (!TARGET_AAPCS_BASED)
8175 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
8176
8177 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
8178 return false;
8179
8180 return true;
8181 }
8182
8183
8184 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
8185 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
8186 byte of the register has useful data, and return the opposite if the
8187 most significant byte does.
8188 For AAPCS, small aggregates and small complex types are always padded
8189 upwards. */
8190
8191 bool
8192 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
8193 tree type, int first ATTRIBUTE_UNUSED)
8194 {
8195 if (TARGET_AAPCS_BASED
8196 && BYTES_BIG_ENDIAN
8197 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
8198 && int_size_in_bytes (type) <= 4)
8199 return true;
8200
8201 /* Otherwise, use default padding. */
8202 return !BYTES_BIG_ENDIAN;
8203 }
8204
8205 \f
8206 /* Print a symbolic form of X to the debug file, F. */
8207 static void
8208 arm_print_value (FILE *f, rtx x)
8209 {
8210 switch (GET_CODE (x))
8211 {
8212 case CONST_INT:
8213 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
8214 return;
8215
8216 case CONST_DOUBLE:
8217 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
8218 return;
8219
8220 case CONST_VECTOR:
8221 {
8222 int i;
8223
8224 fprintf (f, "<");
8225 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
8226 {
8227 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
8228 if (i < (CONST_VECTOR_NUNITS (x) - 1))
8229 fputc (',', f);
8230 }
8231 fprintf (f, ">");
8232 }
8233 return;
8234
8235 case CONST_STRING:
8236 fprintf (f, "\"%s\"", XSTR (x, 0));
8237 return;
8238
8239 case SYMBOL_REF:
8240 fprintf (f, "`%s'", XSTR (x, 0));
8241 return;
8242
8243 case LABEL_REF:
8244 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
8245 return;
8246
8247 case CONST:
8248 arm_print_value (f, XEXP (x, 0));
8249 return;
8250
8251 case PLUS:
8252 arm_print_value (f, XEXP (x, 0));
8253 fprintf (f, "+");
8254 arm_print_value (f, XEXP (x, 1));
8255 return;
8256
8257 case PC:
8258 fprintf (f, "pc");
8259 return;
8260
8261 default:
8262 fprintf (f, "????");
8263 return;
8264 }
8265 }
8266 \f
8267 /* Routines for manipulation of the constant pool. */
8268
8269 /* Arm instructions cannot load a large constant directly into a
8270 register; they have to come from a pc relative load. The constant
8271 must therefore be placed in the addressable range of the pc
8272 relative load. Depending on the precise pc relative load
8273 instruction the range is somewhere between 256 bytes and 4k. This
8274 means that we often have to dump a constant inside a function, and
8275 generate code to branch around it.
8276
8277 It is important to minimize this, since the branches will slow
8278 things down and make the code larger.
8279
8280 Normally we can hide the table after an existing unconditional
8281 branch so that there is no interruption of the flow, but in the
8282 worst case the code looks like this:
8283
8284 ldr rn, L1
8285 ...
8286 b L2
8287 align
8288 L1: .long value
8289 L2:
8290 ...
8291
8292 ldr rn, L3
8293 ...
8294 b L4
8295 align
8296 L3: .long value
8297 L4:
8298 ...
8299
8300 We fix this by performing a scan after scheduling, which notices
8301 which instructions need to have their operands fetched from the
8302 constant table and builds the table.
8303
8304 The algorithm starts by building a table of all the constants that
8305 need fixing up and all the natural barriers in the function (places
8306 where a constant table can be dropped without breaking the flow).
8307 For each fixup we note how far the pc-relative replacement will be
8308 able to reach and the offset of the instruction into the function.
8309
8310 Having built the table we then group the fixes together to form
8311 tables that are as large as possible (subject to addressing
8312 constraints) and emit each table of constants after the last
8313 barrier that is within range of all the instructions in the group.
8314 If a group does not contain a barrier, then we forcibly create one
8315 by inserting a jump instruction into the flow. Once the table has
8316 been inserted, the insns are then modified to reference the
8317 relevant entry in the pool.
8318
8319 Possible enhancements to the algorithm (not implemented) are:
8320
8321 1) For some processors and object formats, there may be benefit in
8322 aligning the pools to the start of cache lines; this alignment
8323 would need to be taken into account when calculating addressability
8324 of a pool. */
8325
8326 /* These typedefs are located at the start of this file, so that
8327 they can be used in the prototypes there. This comment is to
8328 remind readers of that fact so that the following structures
8329 can be understood more easily.
8330
8331 typedef struct minipool_node Mnode;
8332 typedef struct minipool_fixup Mfix; */
8333
8334 struct minipool_node
8335 {
8336 /* Doubly linked chain of entries. */
8337 Mnode * next;
8338 Mnode * prev;
8339 /* The maximum offset into the code that this entry can be placed. While
8340 pushing fixes for forward references, all entries are sorted in order
8341 of increasing max_address. */
8342 HOST_WIDE_INT max_address;
8343 /* Similarly for an entry inserted for a backwards ref. */
8344 HOST_WIDE_INT min_address;
8345 /* The number of fixes referencing this entry. This can become zero
8346 if we "unpush" an entry. In this case we ignore the entry when we
8347 come to emit the code. */
8348 int refcount;
8349 /* The offset from the start of the minipool. */
8350 HOST_WIDE_INT offset;
8351 /* The value in table. */
8352 rtx value;
8353 /* The mode of value. */
8354 enum machine_mode mode;
8355 /* The size of the value. With iWMMXt enabled
8356 sizes > 4 also imply an alignment of 8-bytes. */
8357 int fix_size;
8358 };
8359
8360 struct minipool_fixup
8361 {
8362 Mfix * next;
8363 rtx insn;
8364 HOST_WIDE_INT address;
8365 rtx * loc;
8366 enum machine_mode mode;
8367 int fix_size;
8368 rtx value;
8369 Mnode * minipool;
8370 HOST_WIDE_INT forwards;
8371 HOST_WIDE_INT backwards;
8372 };
8373
8374 /* Fixes less than a word need padding out to a word boundary. */
8375 #define MINIPOOL_FIX_SIZE(mode) \
8376 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
8377
8378 static Mnode * minipool_vector_head;
8379 static Mnode * minipool_vector_tail;
8380 static rtx minipool_vector_label;
8381 static int minipool_pad;
8382
8383 /* The linked list of all minipool fixes required for this function. */
8384 Mfix * minipool_fix_head;
8385 Mfix * minipool_fix_tail;
8386 /* The fix entry for the current minipool, once it has been placed. */
8387 Mfix * minipool_barrier;
8388
8389 /* Determines if INSN is the start of a jump table. Returns the end
8390 of the TABLE or NULL_RTX. */
8391 static rtx
8392 is_jump_table (rtx insn)
8393 {
8394 rtx table;
8395
8396 if (GET_CODE (insn) == JUMP_INSN
8397 && JUMP_LABEL (insn) != NULL
8398 && ((table = next_real_insn (JUMP_LABEL (insn)))
8399 == next_real_insn (insn))
8400 && table != NULL
8401 && GET_CODE (table) == JUMP_INSN
8402 && (GET_CODE (PATTERN (table)) == ADDR_VEC
8403 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
8404 return table;
8405
8406 return NULL_RTX;
8407 }
8408
8409 #ifndef JUMP_TABLES_IN_TEXT_SECTION
8410 #define JUMP_TABLES_IN_TEXT_SECTION 0
8411 #endif
8412
8413 static HOST_WIDE_INT
8414 get_jump_table_size (rtx insn)
8415 {
8416 /* ADDR_VECs only take room if read-only data does into the text
8417 section. */
8418 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
8419 {
8420 rtx body = PATTERN (insn);
8421 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
8422 HOST_WIDE_INT size;
8423 HOST_WIDE_INT modesize;
8424
8425 modesize = GET_MODE_SIZE (GET_MODE (body));
8426 size = modesize * XVECLEN (body, elt);
8427 switch (modesize)
8428 {
8429 case 1:
8430 /* Round up size of TBB table to a halfword boundary. */
8431 size = (size + 1) & ~(HOST_WIDE_INT)1;
8432 break;
8433 case 2:
8434 /* No padding necessary for TBH. */
8435 break;
8436 case 4:
8437 /* Add two bytes for alignment on Thumb. */
8438 if (TARGET_THUMB)
8439 size += 2;
8440 break;
8441 default:
8442 gcc_unreachable ();
8443 }
8444 return size;
8445 }
8446
8447 return 0;
8448 }
8449
8450 /* Move a minipool fix MP from its current location to before MAX_MP.
8451 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
8452 constraints may need updating. */
8453 static Mnode *
8454 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
8455 HOST_WIDE_INT max_address)
8456 {
8457 /* The code below assumes these are different. */
8458 gcc_assert (mp != max_mp);
8459
8460 if (max_mp == NULL)
8461 {
8462 if (max_address < mp->max_address)
8463 mp->max_address = max_address;
8464 }
8465 else
8466 {
8467 if (max_address > max_mp->max_address - mp->fix_size)
8468 mp->max_address = max_mp->max_address - mp->fix_size;
8469 else
8470 mp->max_address = max_address;
8471
8472 /* Unlink MP from its current position. Since max_mp is non-null,
8473 mp->prev must be non-null. */
8474 mp->prev->next = mp->next;
8475 if (mp->next != NULL)
8476 mp->next->prev = mp->prev;
8477 else
8478 minipool_vector_tail = mp->prev;
8479
8480 /* Re-insert it before MAX_MP. */
8481 mp->next = max_mp;
8482 mp->prev = max_mp->prev;
8483 max_mp->prev = mp;
8484
8485 if (mp->prev != NULL)
8486 mp->prev->next = mp;
8487 else
8488 minipool_vector_head = mp;
8489 }
8490
8491 /* Save the new entry. */
8492 max_mp = mp;
8493
8494 /* Scan over the preceding entries and adjust their addresses as
8495 required. */
8496 while (mp->prev != NULL
8497 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
8498 {
8499 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
8500 mp = mp->prev;
8501 }
8502
8503 return max_mp;
8504 }
8505
8506 /* Add a constant to the minipool for a forward reference. Returns the
8507 node added or NULL if the constant will not fit in this pool. */
8508 static Mnode *
8509 add_minipool_forward_ref (Mfix *fix)
8510 {
8511 /* If set, max_mp is the first pool_entry that has a lower
8512 constraint than the one we are trying to add. */
8513 Mnode * max_mp = NULL;
8514 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
8515 Mnode * mp;
8516
8517 /* If the minipool starts before the end of FIX->INSN then this FIX
8518 can not be placed into the current pool. Furthermore, adding the
8519 new constant pool entry may cause the pool to start FIX_SIZE bytes
8520 earlier. */
8521 if (minipool_vector_head &&
8522 (fix->address + get_attr_length (fix->insn)
8523 >= minipool_vector_head->max_address - fix->fix_size))
8524 return NULL;
8525
8526 /* Scan the pool to see if a constant with the same value has
8527 already been added. While we are doing this, also note the
8528 location where we must insert the constant if it doesn't already
8529 exist. */
8530 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8531 {
8532 if (GET_CODE (fix->value) == GET_CODE (mp->value)
8533 && fix->mode == mp->mode
8534 && (GET_CODE (fix->value) != CODE_LABEL
8535 || (CODE_LABEL_NUMBER (fix->value)
8536 == CODE_LABEL_NUMBER (mp->value)))
8537 && rtx_equal_p (fix->value, mp->value))
8538 {
8539 /* More than one fix references this entry. */
8540 mp->refcount++;
8541 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
8542 }
8543
8544 /* Note the insertion point if necessary. */
8545 if (max_mp == NULL
8546 && mp->max_address > max_address)
8547 max_mp = mp;
8548
8549 /* If we are inserting an 8-bytes aligned quantity and
8550 we have not already found an insertion point, then
8551 make sure that all such 8-byte aligned quantities are
8552 placed at the start of the pool. */
8553 if (ARM_DOUBLEWORD_ALIGN
8554 && max_mp == NULL
8555 && fix->fix_size >= 8
8556 && mp->fix_size < 8)
8557 {
8558 max_mp = mp;
8559 max_address = mp->max_address;
8560 }
8561 }
8562
8563 /* The value is not currently in the minipool, so we need to create
8564 a new entry for it. If MAX_MP is NULL, the entry will be put on
8565 the end of the list since the placement is less constrained than
8566 any existing entry. Otherwise, we insert the new fix before
8567 MAX_MP and, if necessary, adjust the constraints on the other
8568 entries. */
8569 mp = XNEW (Mnode);
8570 mp->fix_size = fix->fix_size;
8571 mp->mode = fix->mode;
8572 mp->value = fix->value;
8573 mp->refcount = 1;
8574 /* Not yet required for a backwards ref. */
8575 mp->min_address = -65536;
8576
8577 if (max_mp == NULL)
8578 {
8579 mp->max_address = max_address;
8580 mp->next = NULL;
8581 mp->prev = minipool_vector_tail;
8582
8583 if (mp->prev == NULL)
8584 {
8585 minipool_vector_head = mp;
8586 minipool_vector_label = gen_label_rtx ();
8587 }
8588 else
8589 mp->prev->next = mp;
8590
8591 minipool_vector_tail = mp;
8592 }
8593 else
8594 {
8595 if (max_address > max_mp->max_address - mp->fix_size)
8596 mp->max_address = max_mp->max_address - mp->fix_size;
8597 else
8598 mp->max_address = max_address;
8599
8600 mp->next = max_mp;
8601 mp->prev = max_mp->prev;
8602 max_mp->prev = mp;
8603 if (mp->prev != NULL)
8604 mp->prev->next = mp;
8605 else
8606 minipool_vector_head = mp;
8607 }
8608
8609 /* Save the new entry. */
8610 max_mp = mp;
8611
8612 /* Scan over the preceding entries and adjust their addresses as
8613 required. */
8614 while (mp->prev != NULL
8615 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
8616 {
8617 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
8618 mp = mp->prev;
8619 }
8620
8621 return max_mp;
8622 }
8623
8624 static Mnode *
8625 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
8626 HOST_WIDE_INT min_address)
8627 {
8628 HOST_WIDE_INT offset;
8629
8630 /* The code below assumes these are different. */
8631 gcc_assert (mp != min_mp);
8632
8633 if (min_mp == NULL)
8634 {
8635 if (min_address > mp->min_address)
8636 mp->min_address = min_address;
8637 }
8638 else
8639 {
8640 /* We will adjust this below if it is too loose. */
8641 mp->min_address = min_address;
8642
8643 /* Unlink MP from its current position. Since min_mp is non-null,
8644 mp->next must be non-null. */
8645 mp->next->prev = mp->prev;
8646 if (mp->prev != NULL)
8647 mp->prev->next = mp->next;
8648 else
8649 minipool_vector_head = mp->next;
8650
8651 /* Reinsert it after MIN_MP. */
8652 mp->prev = min_mp;
8653 mp->next = min_mp->next;
8654 min_mp->next = mp;
8655 if (mp->next != NULL)
8656 mp->next->prev = mp;
8657 else
8658 minipool_vector_tail = mp;
8659 }
8660
8661 min_mp = mp;
8662
8663 offset = 0;
8664 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8665 {
8666 mp->offset = offset;
8667 if (mp->refcount > 0)
8668 offset += mp->fix_size;
8669
8670 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
8671 mp->next->min_address = mp->min_address + mp->fix_size;
8672 }
8673
8674 return min_mp;
8675 }
8676
8677 /* Add a constant to the minipool for a backward reference. Returns the
8678 node added or NULL if the constant will not fit in this pool.
8679
8680 Note that the code for insertion for a backwards reference can be
8681 somewhat confusing because the calculated offsets for each fix do
8682 not take into account the size of the pool (which is still under
8683 construction. */
8684 static Mnode *
8685 add_minipool_backward_ref (Mfix *fix)
8686 {
8687 /* If set, min_mp is the last pool_entry that has a lower constraint
8688 than the one we are trying to add. */
8689 Mnode *min_mp = NULL;
8690 /* This can be negative, since it is only a constraint. */
8691 HOST_WIDE_INT min_address = fix->address - fix->backwards;
8692 Mnode *mp;
8693
8694 /* If we can't reach the current pool from this insn, or if we can't
8695 insert this entry at the end of the pool without pushing other
8696 fixes out of range, then we don't try. This ensures that we
8697 can't fail later on. */
8698 if (min_address >= minipool_barrier->address
8699 || (minipool_vector_tail->min_address + fix->fix_size
8700 >= minipool_barrier->address))
8701 return NULL;
8702
8703 /* Scan the pool to see if a constant with the same value has
8704 already been added. While we are doing this, also note the
8705 location where we must insert the constant if it doesn't already
8706 exist. */
8707 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
8708 {
8709 if (GET_CODE (fix->value) == GET_CODE (mp->value)
8710 && fix->mode == mp->mode
8711 && (GET_CODE (fix->value) != CODE_LABEL
8712 || (CODE_LABEL_NUMBER (fix->value)
8713 == CODE_LABEL_NUMBER (mp->value)))
8714 && rtx_equal_p (fix->value, mp->value)
8715 /* Check that there is enough slack to move this entry to the
8716 end of the table (this is conservative). */
8717 && (mp->max_address
8718 > (minipool_barrier->address
8719 + minipool_vector_tail->offset
8720 + minipool_vector_tail->fix_size)))
8721 {
8722 mp->refcount++;
8723 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
8724 }
8725
8726 if (min_mp != NULL)
8727 mp->min_address += fix->fix_size;
8728 else
8729 {
8730 /* Note the insertion point if necessary. */
8731 if (mp->min_address < min_address)
8732 {
8733 /* For now, we do not allow the insertion of 8-byte alignment
8734 requiring nodes anywhere but at the start of the pool. */
8735 if (ARM_DOUBLEWORD_ALIGN
8736 && fix->fix_size >= 8 && mp->fix_size < 8)
8737 return NULL;
8738 else
8739 min_mp = mp;
8740 }
8741 else if (mp->max_address
8742 < minipool_barrier->address + mp->offset + fix->fix_size)
8743 {
8744 /* Inserting before this entry would push the fix beyond
8745 its maximum address (which can happen if we have
8746 re-located a forwards fix); force the new fix to come
8747 after it. */
8748 min_mp = mp;
8749 min_address = mp->min_address + fix->fix_size;
8750 }
8751 /* If we are inserting an 8-bytes aligned quantity and
8752 we have not already found an insertion point, then
8753 make sure that all such 8-byte aligned quantities are
8754 placed at the start of the pool. */
8755 else if (ARM_DOUBLEWORD_ALIGN
8756 && min_mp == NULL
8757 && fix->fix_size >= 8
8758 && mp->fix_size < 8)
8759 {
8760 min_mp = mp;
8761 min_address = mp->min_address + fix->fix_size;
8762 }
8763 }
8764 }
8765
8766 /* We need to create a new entry. */
8767 mp = XNEW (Mnode);
8768 mp->fix_size = fix->fix_size;
8769 mp->mode = fix->mode;
8770 mp->value = fix->value;
8771 mp->refcount = 1;
8772 mp->max_address = minipool_barrier->address + 65536;
8773
8774 mp->min_address = min_address;
8775
8776 if (min_mp == NULL)
8777 {
8778 mp->prev = NULL;
8779 mp->next = minipool_vector_head;
8780
8781 if (mp->next == NULL)
8782 {
8783 minipool_vector_tail = mp;
8784 minipool_vector_label = gen_label_rtx ();
8785 }
8786 else
8787 mp->next->prev = mp;
8788
8789 minipool_vector_head = mp;
8790 }
8791 else
8792 {
8793 mp->next = min_mp->next;
8794 mp->prev = min_mp;
8795 min_mp->next = mp;
8796
8797 if (mp->next != NULL)
8798 mp->next->prev = mp;
8799 else
8800 minipool_vector_tail = mp;
8801 }
8802
8803 /* Save the new entry. */
8804 min_mp = mp;
8805
8806 if (mp->prev)
8807 mp = mp->prev;
8808 else
8809 mp->offset = 0;
8810
8811 /* Scan over the following entries and adjust their offsets. */
8812 while (mp->next != NULL)
8813 {
8814 if (mp->next->min_address < mp->min_address + mp->fix_size)
8815 mp->next->min_address = mp->min_address + mp->fix_size;
8816
8817 if (mp->refcount)
8818 mp->next->offset = mp->offset + mp->fix_size;
8819 else
8820 mp->next->offset = mp->offset;
8821
8822 mp = mp->next;
8823 }
8824
8825 return min_mp;
8826 }
8827
8828 static void
8829 assign_minipool_offsets (Mfix *barrier)
8830 {
8831 HOST_WIDE_INT offset = 0;
8832 Mnode *mp;
8833
8834 minipool_barrier = barrier;
8835
8836 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8837 {
8838 mp->offset = offset;
8839
8840 if (mp->refcount > 0)
8841 offset += mp->fix_size;
8842 }
8843 }
8844
8845 /* Output the literal table */
8846 static void
8847 dump_minipool (rtx scan)
8848 {
8849 Mnode * mp;
8850 Mnode * nmp;
8851 int align64 = 0;
8852
8853 if (ARM_DOUBLEWORD_ALIGN)
8854 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8855 if (mp->refcount > 0 && mp->fix_size >= 8)
8856 {
8857 align64 = 1;
8858 break;
8859 }
8860
8861 if (dump_file)
8862 fprintf (dump_file,
8863 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
8864 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
8865
8866 scan = emit_label_after (gen_label_rtx (), scan);
8867 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
8868 scan = emit_label_after (minipool_vector_label, scan);
8869
8870 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
8871 {
8872 if (mp->refcount > 0)
8873 {
8874 if (dump_file)
8875 {
8876 fprintf (dump_file,
8877 ";; Offset %u, min %ld, max %ld ",
8878 (unsigned) mp->offset, (unsigned long) mp->min_address,
8879 (unsigned long) mp->max_address);
8880 arm_print_value (dump_file, mp->value);
8881 fputc ('\n', dump_file);
8882 }
8883
8884 switch (mp->fix_size)
8885 {
8886 #ifdef HAVE_consttable_1
8887 case 1:
8888 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
8889 break;
8890
8891 #endif
8892 #ifdef HAVE_consttable_2
8893 case 2:
8894 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
8895 break;
8896
8897 #endif
8898 #ifdef HAVE_consttable_4
8899 case 4:
8900 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
8901 break;
8902
8903 #endif
8904 #ifdef HAVE_consttable_8
8905 case 8:
8906 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
8907 break;
8908
8909 #endif
8910 #ifdef HAVE_consttable_16
8911 case 16:
8912 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
8913 break;
8914
8915 #endif
8916 default:
8917 gcc_unreachable ();
8918 }
8919 }
8920
8921 nmp = mp->next;
8922 free (mp);
8923 }
8924
8925 minipool_vector_head = minipool_vector_tail = NULL;
8926 scan = emit_insn_after (gen_consttable_end (), scan);
8927 scan = emit_barrier_after (scan);
8928 }
8929
8930 /* Return the cost of forcibly inserting a barrier after INSN. */
8931 static int
8932 arm_barrier_cost (rtx insn)
8933 {
8934 /* Basing the location of the pool on the loop depth is preferable,
8935 but at the moment, the basic block information seems to be
8936 corrupt by this stage of the compilation. */
8937 int base_cost = 50;
8938 rtx next = next_nonnote_insn (insn);
8939
8940 if (next != NULL && GET_CODE (next) == CODE_LABEL)
8941 base_cost -= 20;
8942
8943 switch (GET_CODE (insn))
8944 {
8945 case CODE_LABEL:
8946 /* It will always be better to place the table before the label, rather
8947 than after it. */
8948 return 50;
8949
8950 case INSN:
8951 case CALL_INSN:
8952 return base_cost;
8953
8954 case JUMP_INSN:
8955 return base_cost - 10;
8956
8957 default:
8958 return base_cost + 10;
8959 }
8960 }
8961
8962 /* Find the best place in the insn stream in the range
8963 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
8964 Create the barrier by inserting a jump and add a new fix entry for
8965 it. */
8966 static Mfix *
8967 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
8968 {
8969 HOST_WIDE_INT count = 0;
8970 rtx barrier;
8971 rtx from = fix->insn;
8972 /* The instruction after which we will insert the jump. */
8973 rtx selected = NULL;
8974 int selected_cost;
8975 /* The address at which the jump instruction will be placed. */
8976 HOST_WIDE_INT selected_address;
8977 Mfix * new_fix;
8978 HOST_WIDE_INT max_count = max_address - fix->address;
8979 rtx label = gen_label_rtx ();
8980
8981 selected_cost = arm_barrier_cost (from);
8982 selected_address = fix->address;
8983
8984 while (from && count < max_count)
8985 {
8986 rtx tmp;
8987 int new_cost;
8988
8989 /* This code shouldn't have been called if there was a natural barrier
8990 within range. */
8991 gcc_assert (GET_CODE (from) != BARRIER);
8992
8993 /* Count the length of this insn. */
8994 count += get_attr_length (from);
8995
8996 /* If there is a jump table, add its length. */
8997 tmp = is_jump_table (from);
8998 if (tmp != NULL)
8999 {
9000 count += get_jump_table_size (tmp);
9001
9002 /* Jump tables aren't in a basic block, so base the cost on
9003 the dispatch insn. If we select this location, we will
9004 still put the pool after the table. */
9005 new_cost = arm_barrier_cost (from);
9006
9007 if (count < max_count
9008 && (!selected || new_cost <= selected_cost))
9009 {
9010 selected = tmp;
9011 selected_cost = new_cost;
9012 selected_address = fix->address + count;
9013 }
9014
9015 /* Continue after the dispatch table. */
9016 from = NEXT_INSN (tmp);
9017 continue;
9018 }
9019
9020 new_cost = arm_barrier_cost (from);
9021
9022 if (count < max_count
9023 && (!selected || new_cost <= selected_cost))
9024 {
9025 selected = from;
9026 selected_cost = new_cost;
9027 selected_address = fix->address + count;
9028 }
9029
9030 from = NEXT_INSN (from);
9031 }
9032
9033 /* Make sure that we found a place to insert the jump. */
9034 gcc_assert (selected);
9035
9036 /* Create a new JUMP_INSN that branches around a barrier. */
9037 from = emit_jump_insn_after (gen_jump (label), selected);
9038 JUMP_LABEL (from) = label;
9039 barrier = emit_barrier_after (from);
9040 emit_label_after (label, barrier);
9041
9042 /* Create a minipool barrier entry for the new barrier. */
9043 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
9044 new_fix->insn = barrier;
9045 new_fix->address = selected_address;
9046 new_fix->next = fix->next;
9047 fix->next = new_fix;
9048
9049 return new_fix;
9050 }
9051
9052 /* Record that there is a natural barrier in the insn stream at
9053 ADDRESS. */
9054 static void
9055 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
9056 {
9057 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9058
9059 fix->insn = insn;
9060 fix->address = address;
9061
9062 fix->next = NULL;
9063 if (minipool_fix_head != NULL)
9064 minipool_fix_tail->next = fix;
9065 else
9066 minipool_fix_head = fix;
9067
9068 minipool_fix_tail = fix;
9069 }
9070
9071 /* Record INSN, which will need fixing up to load a value from the
9072 minipool. ADDRESS is the offset of the insn since the start of the
9073 function; LOC is a pointer to the part of the insn which requires
9074 fixing; VALUE is the constant that must be loaded, which is of type
9075 MODE. */
9076 static void
9077 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
9078 enum machine_mode mode, rtx value)
9079 {
9080 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9081
9082 fix->insn = insn;
9083 fix->address = address;
9084 fix->loc = loc;
9085 fix->mode = mode;
9086 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
9087 fix->value = value;
9088 fix->forwards = get_attr_pool_range (insn);
9089 fix->backwards = get_attr_neg_pool_range (insn);
9090 fix->minipool = NULL;
9091
9092 /* If an insn doesn't have a range defined for it, then it isn't
9093 expecting to be reworked by this code. Better to stop now than
9094 to generate duff assembly code. */
9095 gcc_assert (fix->forwards || fix->backwards);
9096
9097 /* If an entry requires 8-byte alignment then assume all constant pools
9098 require 4 bytes of padding. Trying to do this later on a per-pool
9099 basis is awkward because existing pool entries have to be modified. */
9100 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
9101 minipool_pad = 4;
9102
9103 if (dump_file)
9104 {
9105 fprintf (dump_file,
9106 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
9107 GET_MODE_NAME (mode),
9108 INSN_UID (insn), (unsigned long) address,
9109 -1 * (long)fix->backwards, (long)fix->forwards);
9110 arm_print_value (dump_file, fix->value);
9111 fprintf (dump_file, "\n");
9112 }
9113
9114 /* Add it to the chain of fixes. */
9115 fix->next = NULL;
9116
9117 if (minipool_fix_head != NULL)
9118 minipool_fix_tail->next = fix;
9119 else
9120 minipool_fix_head = fix;
9121
9122 minipool_fix_tail = fix;
9123 }
9124
9125 /* Return the cost of synthesizing a 64-bit constant VAL inline.
9126 Returns the number of insns needed, or 99 if we don't know how to
9127 do it. */
9128 int
9129 arm_const_double_inline_cost (rtx val)
9130 {
9131 rtx lowpart, highpart;
9132 enum machine_mode mode;
9133
9134 mode = GET_MODE (val);
9135
9136 if (mode == VOIDmode)
9137 mode = DImode;
9138
9139 gcc_assert (GET_MODE_SIZE (mode) == 8);
9140
9141 lowpart = gen_lowpart (SImode, val);
9142 highpart = gen_highpart_mode (SImode, mode, val);
9143
9144 gcc_assert (GET_CODE (lowpart) == CONST_INT);
9145 gcc_assert (GET_CODE (highpart) == CONST_INT);
9146
9147 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
9148 NULL_RTX, NULL_RTX, 0, 0)
9149 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
9150 NULL_RTX, NULL_RTX, 0, 0));
9151 }
9152
9153 /* Return true if it is worthwhile to split a 64-bit constant into two
9154 32-bit operations. This is the case if optimizing for size, or
9155 if we have load delay slots, or if one 32-bit part can be done with
9156 a single data operation. */
9157 bool
9158 arm_const_double_by_parts (rtx val)
9159 {
9160 enum machine_mode mode = GET_MODE (val);
9161 rtx part;
9162
9163 if (optimize_size || arm_ld_sched)
9164 return true;
9165
9166 if (mode == VOIDmode)
9167 mode = DImode;
9168
9169 part = gen_highpart_mode (SImode, mode, val);
9170
9171 gcc_assert (GET_CODE (part) == CONST_INT);
9172
9173 if (const_ok_for_arm (INTVAL (part))
9174 || const_ok_for_arm (~INTVAL (part)))
9175 return true;
9176
9177 part = gen_lowpart (SImode, val);
9178
9179 gcc_assert (GET_CODE (part) == CONST_INT);
9180
9181 if (const_ok_for_arm (INTVAL (part))
9182 || const_ok_for_arm (~INTVAL (part)))
9183 return true;
9184
9185 return false;
9186 }
9187
9188 /* Scan INSN and note any of its operands that need fixing.
9189 If DO_PUSHES is false we do not actually push any of the fixups
9190 needed. The function returns TRUE if any fixups were needed/pushed.
9191 This is used by arm_memory_load_p() which needs to know about loads
9192 of constants that will be converted into minipool loads. */
9193 static bool
9194 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
9195 {
9196 bool result = false;
9197 int opno;
9198
9199 extract_insn (insn);
9200
9201 if (!constrain_operands (1))
9202 fatal_insn_not_found (insn);
9203
9204 if (recog_data.n_alternatives == 0)
9205 return false;
9206
9207 /* Fill in recog_op_alt with information about the constraints of
9208 this insn. */
9209 preprocess_constraints ();
9210
9211 for (opno = 0; opno < recog_data.n_operands; opno++)
9212 {
9213 /* Things we need to fix can only occur in inputs. */
9214 if (recog_data.operand_type[opno] != OP_IN)
9215 continue;
9216
9217 /* If this alternative is a memory reference, then any mention
9218 of constants in this alternative is really to fool reload
9219 into allowing us to accept one there. We need to fix them up
9220 now so that we output the right code. */
9221 if (recog_op_alt[opno][which_alternative].memory_ok)
9222 {
9223 rtx op = recog_data.operand[opno];
9224
9225 if (CONSTANT_P (op))
9226 {
9227 if (do_pushes)
9228 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
9229 recog_data.operand_mode[opno], op);
9230 result = true;
9231 }
9232 else if (GET_CODE (op) == MEM
9233 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
9234 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
9235 {
9236 if (do_pushes)
9237 {
9238 rtx cop = avoid_constant_pool_reference (op);
9239
9240 /* Casting the address of something to a mode narrower
9241 than a word can cause avoid_constant_pool_reference()
9242 to return the pool reference itself. That's no good to
9243 us here. Lets just hope that we can use the
9244 constant pool value directly. */
9245 if (op == cop)
9246 cop = get_pool_constant (XEXP (op, 0));
9247
9248 push_minipool_fix (insn, address,
9249 recog_data.operand_loc[opno],
9250 recog_data.operand_mode[opno], cop);
9251 }
9252
9253 result = true;
9254 }
9255 }
9256 }
9257
9258 return result;
9259 }
9260
9261 /* Gcc puts the pool in the wrong place for ARM, since we can only
9262 load addresses a limited distance around the pc. We do some
9263 special munging to move the constant pool values to the correct
9264 point in the code. */
9265 static void
9266 arm_reorg (void)
9267 {
9268 rtx insn;
9269 HOST_WIDE_INT address = 0;
9270 Mfix * fix;
9271
9272 minipool_fix_head = minipool_fix_tail = NULL;
9273
9274 /* The first insn must always be a note, or the code below won't
9275 scan it properly. */
9276 insn = get_insns ();
9277 gcc_assert (GET_CODE (insn) == NOTE);
9278 minipool_pad = 0;
9279
9280 /* Scan all the insns and record the operands that will need fixing. */
9281 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
9282 {
9283 if (TARGET_CIRRUS_FIX_INVALID_INSNS
9284 && (arm_cirrus_insn_p (insn)
9285 || GET_CODE (insn) == JUMP_INSN
9286 || arm_memory_load_p (insn)))
9287 cirrus_reorg (insn);
9288
9289 if (GET_CODE (insn) == BARRIER)
9290 push_minipool_barrier (insn, address);
9291 else if (INSN_P (insn))
9292 {
9293 rtx table;
9294
9295 note_invalid_constants (insn, address, true);
9296 address += get_attr_length (insn);
9297
9298 /* If the insn is a vector jump, add the size of the table
9299 and skip the table. */
9300 if ((table = is_jump_table (insn)) != NULL)
9301 {
9302 address += get_jump_table_size (table);
9303 insn = table;
9304 }
9305 }
9306 }
9307
9308 fix = minipool_fix_head;
9309
9310 /* Now scan the fixups and perform the required changes. */
9311 while (fix)
9312 {
9313 Mfix * ftmp;
9314 Mfix * fdel;
9315 Mfix * last_added_fix;
9316 Mfix * last_barrier = NULL;
9317 Mfix * this_fix;
9318
9319 /* Skip any further barriers before the next fix. */
9320 while (fix && GET_CODE (fix->insn) == BARRIER)
9321 fix = fix->next;
9322
9323 /* No more fixes. */
9324 if (fix == NULL)
9325 break;
9326
9327 last_added_fix = NULL;
9328
9329 for (ftmp = fix; ftmp; ftmp = ftmp->next)
9330 {
9331 if (GET_CODE (ftmp->insn) == BARRIER)
9332 {
9333 if (ftmp->address >= minipool_vector_head->max_address)
9334 break;
9335
9336 last_barrier = ftmp;
9337 }
9338 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
9339 break;
9340
9341 last_added_fix = ftmp; /* Keep track of the last fix added. */
9342 }
9343
9344 /* If we found a barrier, drop back to that; any fixes that we
9345 could have reached but come after the barrier will now go in
9346 the next mini-pool. */
9347 if (last_barrier != NULL)
9348 {
9349 /* Reduce the refcount for those fixes that won't go into this
9350 pool after all. */
9351 for (fdel = last_barrier->next;
9352 fdel && fdel != ftmp;
9353 fdel = fdel->next)
9354 {
9355 fdel->minipool->refcount--;
9356 fdel->minipool = NULL;
9357 }
9358
9359 ftmp = last_barrier;
9360 }
9361 else
9362 {
9363 /* ftmp is first fix that we can't fit into this pool and
9364 there no natural barriers that we could use. Insert a
9365 new barrier in the code somewhere between the previous
9366 fix and this one, and arrange to jump around it. */
9367 HOST_WIDE_INT max_address;
9368
9369 /* The last item on the list of fixes must be a barrier, so
9370 we can never run off the end of the list of fixes without
9371 last_barrier being set. */
9372 gcc_assert (ftmp);
9373
9374 max_address = minipool_vector_head->max_address;
9375 /* Check that there isn't another fix that is in range that
9376 we couldn't fit into this pool because the pool was
9377 already too large: we need to put the pool before such an
9378 instruction. The pool itself may come just after the
9379 fix because create_fix_barrier also allows space for a
9380 jump instruction. */
9381 if (ftmp->address < max_address)
9382 max_address = ftmp->address + 1;
9383
9384 last_barrier = create_fix_barrier (last_added_fix, max_address);
9385 }
9386
9387 assign_minipool_offsets (last_barrier);
9388
9389 while (ftmp)
9390 {
9391 if (GET_CODE (ftmp->insn) != BARRIER
9392 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
9393 == NULL))
9394 break;
9395
9396 ftmp = ftmp->next;
9397 }
9398
9399 /* Scan over the fixes we have identified for this pool, fixing them
9400 up and adding the constants to the pool itself. */
9401 for (this_fix = fix; this_fix && ftmp != this_fix;
9402 this_fix = this_fix->next)
9403 if (GET_CODE (this_fix->insn) != BARRIER)
9404 {
9405 rtx addr
9406 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
9407 minipool_vector_label),
9408 this_fix->minipool->offset);
9409 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
9410 }
9411
9412 dump_minipool (last_barrier->insn);
9413 fix = ftmp;
9414 }
9415
9416 /* From now on we must synthesize any constants that we can't handle
9417 directly. This can happen if the RTL gets split during final
9418 instruction generation. */
9419 after_arm_reorg = 1;
9420
9421 /* Free the minipool memory. */
9422 obstack_free (&minipool_obstack, minipool_startobj);
9423 }
9424 \f
9425 /* Routines to output assembly language. */
9426
9427 /* If the rtx is the correct value then return the string of the number.
9428 In this way we can ensure that valid double constants are generated even
9429 when cross compiling. */
9430 const char *
9431 fp_immediate_constant (rtx x)
9432 {
9433 REAL_VALUE_TYPE r;
9434 int i;
9435
9436 if (!fp_consts_inited)
9437 init_fp_table ();
9438
9439 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9440 for (i = 0; i < 8; i++)
9441 if (REAL_VALUES_EQUAL (r, values_fp[i]))
9442 return strings_fp[i];
9443
9444 gcc_unreachable ();
9445 }
9446
9447 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
9448 static const char *
9449 fp_const_from_val (REAL_VALUE_TYPE *r)
9450 {
9451 int i;
9452
9453 if (!fp_consts_inited)
9454 init_fp_table ();
9455
9456 for (i = 0; i < 8; i++)
9457 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
9458 return strings_fp[i];
9459
9460 gcc_unreachable ();
9461 }
9462
9463 /* Output the operands of a LDM/STM instruction to STREAM.
9464 MASK is the ARM register set mask of which only bits 0-15 are important.
9465 REG is the base register, either the frame pointer or the stack pointer,
9466 INSTR is the possibly suffixed load or store instruction.
9467 RFE is nonzero if the instruction should also copy spsr to cpsr. */
9468
9469 static void
9470 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
9471 unsigned long mask, int rfe)
9472 {
9473 unsigned i;
9474 bool not_first = FALSE;
9475
9476 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
9477 fputc ('\t', stream);
9478 asm_fprintf (stream, instr, reg);
9479 fputc ('{', stream);
9480
9481 for (i = 0; i <= LAST_ARM_REGNUM; i++)
9482 if (mask & (1 << i))
9483 {
9484 if (not_first)
9485 fprintf (stream, ", ");
9486
9487 asm_fprintf (stream, "%r", i);
9488 not_first = TRUE;
9489 }
9490
9491 if (rfe)
9492 fprintf (stream, "}^\n");
9493 else
9494 fprintf (stream, "}\n");
9495 }
9496
9497
9498 /* Output a FLDMD instruction to STREAM.
9499 BASE if the register containing the address.
9500 REG and COUNT specify the register range.
9501 Extra registers may be added to avoid hardware bugs.
9502
9503 We output FLDMD even for ARMv5 VFP implementations. Although
9504 FLDMD is technically not supported until ARMv6, it is believed
9505 that all VFP implementations support its use in this context. */
9506
9507 static void
9508 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
9509 {
9510 int i;
9511
9512 /* Workaround ARM10 VFPr1 bug. */
9513 if (count == 2 && !arm_arch6)
9514 {
9515 if (reg == 15)
9516 reg--;
9517 count++;
9518 }
9519
9520 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
9521 load into multiple parts if we have to handle more than 16 registers. */
9522 if (count > 16)
9523 {
9524 vfp_output_fldmd (stream, base, reg, 16);
9525 vfp_output_fldmd (stream, base, reg + 16, count - 16);
9526 return;
9527 }
9528
9529 fputc ('\t', stream);
9530 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
9531
9532 for (i = reg; i < reg + count; i++)
9533 {
9534 if (i > reg)
9535 fputs (", ", stream);
9536 asm_fprintf (stream, "d%d", i);
9537 }
9538 fputs ("}\n", stream);
9539
9540 }
9541
9542
9543 /* Output the assembly for a store multiple. */
9544
9545 const char *
9546 vfp_output_fstmd (rtx * operands)
9547 {
9548 char pattern[100];
9549 int p;
9550 int base;
9551 int i;
9552
9553 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
9554 p = strlen (pattern);
9555
9556 gcc_assert (GET_CODE (operands[1]) == REG);
9557
9558 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
9559 for (i = 1; i < XVECLEN (operands[2], 0); i++)
9560 {
9561 p += sprintf (&pattern[p], ", d%d", base + i);
9562 }
9563 strcpy (&pattern[p], "}");
9564
9565 output_asm_insn (pattern, operands);
9566 return "";
9567 }
9568
9569
9570 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
9571 number of bytes pushed. */
9572
9573 static int
9574 vfp_emit_fstmd (int base_reg, int count)
9575 {
9576 rtx par;
9577 rtx dwarf;
9578 rtx tmp, reg;
9579 int i;
9580
9581 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
9582 register pairs are stored by a store multiple insn. We avoid this
9583 by pushing an extra pair. */
9584 if (count == 2 && !arm_arch6)
9585 {
9586 if (base_reg == LAST_VFP_REGNUM - 3)
9587 base_reg -= 2;
9588 count++;
9589 }
9590
9591 /* FSTMD may not store more than 16 doubleword registers at once. Split
9592 larger stores into multiple parts (up to a maximum of two, in
9593 practice). */
9594 if (count > 16)
9595 {
9596 int saved;
9597 /* NOTE: base_reg is an internal register number, so each D register
9598 counts as 2. */
9599 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
9600 saved += vfp_emit_fstmd (base_reg, 16);
9601 return saved;
9602 }
9603
9604 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
9605 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
9606
9607 reg = gen_rtx_REG (DFmode, base_reg);
9608 base_reg += 2;
9609
9610 XVECEXP (par, 0, 0)
9611 = gen_rtx_SET (VOIDmode,
9612 gen_frame_mem (BLKmode,
9613 gen_rtx_PRE_DEC (BLKmode,
9614 stack_pointer_rtx)),
9615 gen_rtx_UNSPEC (BLKmode,
9616 gen_rtvec (1, reg),
9617 UNSPEC_PUSH_MULT));
9618
9619 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9620 plus_constant (stack_pointer_rtx, -(count * 8)));
9621 RTX_FRAME_RELATED_P (tmp) = 1;
9622 XVECEXP (dwarf, 0, 0) = tmp;
9623
9624 tmp = gen_rtx_SET (VOIDmode,
9625 gen_frame_mem (DFmode, stack_pointer_rtx),
9626 reg);
9627 RTX_FRAME_RELATED_P (tmp) = 1;
9628 XVECEXP (dwarf, 0, 1) = tmp;
9629
9630 for (i = 1; i < count; i++)
9631 {
9632 reg = gen_rtx_REG (DFmode, base_reg);
9633 base_reg += 2;
9634 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
9635
9636 tmp = gen_rtx_SET (VOIDmode,
9637 gen_frame_mem (DFmode,
9638 plus_constant (stack_pointer_rtx,
9639 i * 8)),
9640 reg);
9641 RTX_FRAME_RELATED_P (tmp) = 1;
9642 XVECEXP (dwarf, 0, i + 1) = tmp;
9643 }
9644
9645 par = emit_insn (par);
9646 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
9647 REG_NOTES (par));
9648 RTX_FRAME_RELATED_P (par) = 1;
9649
9650 return count * 8;
9651 }
9652
9653 /* Emit a call instruction with pattern PAT. ADDR is the address of
9654 the call target. */
9655
9656 void
9657 arm_emit_call_insn (rtx pat, rtx addr)
9658 {
9659 rtx insn;
9660
9661 insn = emit_call_insn (pat);
9662
9663 /* The PIC register is live on entry to VxWorks PIC PLT entries.
9664 If the call might use such an entry, add a use of the PIC register
9665 to the instruction's CALL_INSN_FUNCTION_USAGE. */
9666 if (TARGET_VXWORKS_RTP
9667 && flag_pic
9668 && GET_CODE (addr) == SYMBOL_REF
9669 && (SYMBOL_REF_DECL (addr)
9670 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
9671 : !SYMBOL_REF_LOCAL_P (addr)))
9672 {
9673 require_pic_register ();
9674 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
9675 }
9676 }
9677
9678 /* Output a 'call' insn. */
9679 const char *
9680 output_call (rtx *operands)
9681 {
9682 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
9683
9684 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
9685 if (REGNO (operands[0]) == LR_REGNUM)
9686 {
9687 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
9688 output_asm_insn ("mov%?\t%0, %|lr", operands);
9689 }
9690
9691 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9692
9693 if (TARGET_INTERWORK || arm_arch4t)
9694 output_asm_insn ("bx%?\t%0", operands);
9695 else
9696 output_asm_insn ("mov%?\t%|pc, %0", operands);
9697
9698 return "";
9699 }
9700
9701 /* Output a 'call' insn that is a reference in memory. */
9702 const char *
9703 output_call_mem (rtx *operands)
9704 {
9705 if (TARGET_INTERWORK && !arm_arch5)
9706 {
9707 output_asm_insn ("ldr%?\t%|ip, %0", operands);
9708 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9709 output_asm_insn ("bx%?\t%|ip", operands);
9710 }
9711 else if (regno_use_in (LR_REGNUM, operands[0]))
9712 {
9713 /* LR is used in the memory address. We load the address in the
9714 first instruction. It's safe to use IP as the target of the
9715 load since the call will kill it anyway. */
9716 output_asm_insn ("ldr%?\t%|ip, %0", operands);
9717 if (arm_arch5)
9718 output_asm_insn ("blx%?\t%|ip", operands);
9719 else
9720 {
9721 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9722 if (arm_arch4t)
9723 output_asm_insn ("bx%?\t%|ip", operands);
9724 else
9725 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
9726 }
9727 }
9728 else
9729 {
9730 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9731 output_asm_insn ("ldr%?\t%|pc, %0", operands);
9732 }
9733
9734 return "";
9735 }
9736
9737
9738 /* Output a move from arm registers to an fpa registers.
9739 OPERANDS[0] is an fpa register.
9740 OPERANDS[1] is the first registers of an arm register pair. */
9741 const char *
9742 output_mov_long_double_fpa_from_arm (rtx *operands)
9743 {
9744 int arm_reg0 = REGNO (operands[1]);
9745 rtx ops[3];
9746
9747 gcc_assert (arm_reg0 != IP_REGNUM);
9748
9749 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9750 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9751 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
9752
9753 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
9754 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
9755
9756 return "";
9757 }
9758
9759 /* Output a move from an fpa register to arm registers.
9760 OPERANDS[0] is the first registers of an arm register pair.
9761 OPERANDS[1] is an fpa register. */
9762 const char *
9763 output_mov_long_double_arm_from_fpa (rtx *operands)
9764 {
9765 int arm_reg0 = REGNO (operands[0]);
9766 rtx ops[3];
9767
9768 gcc_assert (arm_reg0 != IP_REGNUM);
9769
9770 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9771 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9772 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
9773
9774 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
9775 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
9776 return "";
9777 }
9778
9779 /* Output a move from arm registers to arm registers of a long double
9780 OPERANDS[0] is the destination.
9781 OPERANDS[1] is the source. */
9782 const char *
9783 output_mov_long_double_arm_from_arm (rtx *operands)
9784 {
9785 /* We have to be careful here because the two might overlap. */
9786 int dest_start = REGNO (operands[0]);
9787 int src_start = REGNO (operands[1]);
9788 rtx ops[2];
9789 int i;
9790
9791 if (dest_start < src_start)
9792 {
9793 for (i = 0; i < 3; i++)
9794 {
9795 ops[0] = gen_rtx_REG (SImode, dest_start + i);
9796 ops[1] = gen_rtx_REG (SImode, src_start + i);
9797 output_asm_insn ("mov%?\t%0, %1", ops);
9798 }
9799 }
9800 else
9801 {
9802 for (i = 2; i >= 0; i--)
9803 {
9804 ops[0] = gen_rtx_REG (SImode, dest_start + i);
9805 ops[1] = gen_rtx_REG (SImode, src_start + i);
9806 output_asm_insn ("mov%?\t%0, %1", ops);
9807 }
9808 }
9809
9810 return "";
9811 }
9812
9813
9814 /* Output a move from arm registers to an fpa registers.
9815 OPERANDS[0] is an fpa register.
9816 OPERANDS[1] is the first registers of an arm register pair. */
9817 const char *
9818 output_mov_double_fpa_from_arm (rtx *operands)
9819 {
9820 int arm_reg0 = REGNO (operands[1]);
9821 rtx ops[2];
9822
9823 gcc_assert (arm_reg0 != IP_REGNUM);
9824
9825 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9826 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9827 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
9828 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
9829 return "";
9830 }
9831
9832 /* Output a move from an fpa register to arm registers.
9833 OPERANDS[0] is the first registers of an arm register pair.
9834 OPERANDS[1] is an fpa register. */
9835 const char *
9836 output_mov_double_arm_from_fpa (rtx *operands)
9837 {
9838 int arm_reg0 = REGNO (operands[0]);
9839 rtx ops[2];
9840
9841 gcc_assert (arm_reg0 != IP_REGNUM);
9842
9843 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9844 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9845 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
9846 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
9847 return "";
9848 }
9849
9850 /* Output a move between double words.
9851 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
9852 or MEM<-REG and all MEMs must be offsettable addresses. */
9853 const char *
9854 output_move_double (rtx *operands)
9855 {
9856 enum rtx_code code0 = GET_CODE (operands[0]);
9857 enum rtx_code code1 = GET_CODE (operands[1]);
9858 rtx otherops[3];
9859
9860 if (code0 == REG)
9861 {
9862 int reg0 = REGNO (operands[0]);
9863
9864 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9865
9866 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
9867
9868 switch (GET_CODE (XEXP (operands[1], 0)))
9869 {
9870 case REG:
9871 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
9872 break;
9873
9874 case PRE_INC:
9875 gcc_assert (TARGET_LDRD);
9876 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
9877 break;
9878
9879 case PRE_DEC:
9880 if (TARGET_LDRD)
9881 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
9882 else
9883 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
9884 break;
9885
9886 case POST_INC:
9887 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
9888 break;
9889
9890 case POST_DEC:
9891 gcc_assert (TARGET_LDRD);
9892 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
9893 break;
9894
9895 case PRE_MODIFY:
9896 case POST_MODIFY:
9897 otherops[0] = operands[0];
9898 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
9899 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
9900
9901 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
9902 {
9903 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
9904 {
9905 /* Registers overlap so split out the increment. */
9906 output_asm_insn ("add%?\t%1, %1, %2", otherops);
9907 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
9908 }
9909 else
9910 {
9911 /* IWMMXT allows offsets larger than ldrd can handle,
9912 fix these up with a pair of ldr. */
9913 if (GET_CODE (otherops[2]) == CONST_INT
9914 && (INTVAL(otherops[2]) <= -256
9915 || INTVAL(otherops[2]) >= 256))
9916 {
9917 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
9918 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9919 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
9920 }
9921 else
9922 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
9923 }
9924 }
9925 else
9926 {
9927 /* IWMMXT allows offsets larger than ldrd can handle,
9928 fix these up with a pair of ldr. */
9929 if (GET_CODE (otherops[2]) == CONST_INT
9930 && (INTVAL(otherops[2]) <= -256
9931 || INTVAL(otherops[2]) >= 256))
9932 {
9933 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9934 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
9935 otherops[0] = operands[0];
9936 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
9937 }
9938 else
9939 /* We only allow constant increments, so this is safe. */
9940 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
9941 }
9942 break;
9943
9944 case LABEL_REF:
9945 case CONST:
9946 output_asm_insn ("adr%?\t%0, %1", operands);
9947 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
9948 break;
9949
9950 /* ??? This needs checking for thumb2. */
9951 default:
9952 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
9953 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
9954 {
9955 otherops[0] = operands[0];
9956 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
9957 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
9958
9959 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
9960 {
9961 if (GET_CODE (otherops[2]) == CONST_INT)
9962 {
9963 switch ((int) INTVAL (otherops[2]))
9964 {
9965 case -8:
9966 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
9967 return "";
9968 case -4:
9969 if (TARGET_THUMB2)
9970 break;
9971 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
9972 return "";
9973 case 4:
9974 if (TARGET_THUMB2)
9975 break;
9976 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
9977 return "";
9978 }
9979 }
9980 if (TARGET_LDRD
9981 && (GET_CODE (otherops[2]) == REG
9982 || (GET_CODE (otherops[2]) == CONST_INT
9983 && INTVAL (otherops[2]) > -256
9984 && INTVAL (otherops[2]) < 256)))
9985 {
9986 if (reg_overlap_mentioned_p (otherops[0],
9987 otherops[2]))
9988 {
9989 /* Swap base and index registers over to
9990 avoid a conflict. */
9991 otherops[1] = XEXP (XEXP (operands[1], 0), 1);
9992 otherops[2] = XEXP (XEXP (operands[1], 0), 0);
9993 }
9994 /* If both registers conflict, it will usually
9995 have been fixed by a splitter. */
9996 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
9997 {
9998 output_asm_insn ("add%?\t%1, %1, %2", otherops);
9999 output_asm_insn ("ldr%(d%)\t%0, [%1]",
10000 otherops);
10001 }
10002 else
10003 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
10004 return "";
10005 }
10006
10007 if (GET_CODE (otherops[2]) == CONST_INT)
10008 {
10009 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
10010 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
10011 else
10012 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10013 }
10014 else
10015 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10016 }
10017 else
10018 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
10019
10020 return "ldm%(ia%)\t%0, %M0";
10021 }
10022 else
10023 {
10024 otherops[1] = adjust_address (operands[1], SImode, 4);
10025 /* Take care of overlapping base/data reg. */
10026 if (reg_mentioned_p (operands[0], operands[1]))
10027 {
10028 output_asm_insn ("ldr%?\t%0, %1", otherops);
10029 output_asm_insn ("ldr%?\t%0, %1", operands);
10030 }
10031 else
10032 {
10033 output_asm_insn ("ldr%?\t%0, %1", operands);
10034 output_asm_insn ("ldr%?\t%0, %1", otherops);
10035 }
10036 }
10037 }
10038 }
10039 else
10040 {
10041 /* Constraints should ensure this. */
10042 gcc_assert (code0 == MEM && code1 == REG);
10043 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
10044
10045 switch (GET_CODE (XEXP (operands[0], 0)))
10046 {
10047 case REG:
10048 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10049 break;
10050
10051 case PRE_INC:
10052 gcc_assert (TARGET_LDRD);
10053 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
10054 break;
10055
10056 case PRE_DEC:
10057 if (TARGET_LDRD)
10058 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
10059 else
10060 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
10061 break;
10062
10063 case POST_INC:
10064 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
10065 break;
10066
10067 case POST_DEC:
10068 gcc_assert (TARGET_LDRD);
10069 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
10070 break;
10071
10072 case PRE_MODIFY:
10073 case POST_MODIFY:
10074 otherops[0] = operands[1];
10075 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
10076 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
10077
10078 /* IWMMXT allows offsets larger than ldrd can handle,
10079 fix these up with a pair of ldr. */
10080 if (GET_CODE (otherops[2]) == CONST_INT
10081 && (INTVAL(otherops[2]) <= -256
10082 || INTVAL(otherops[2]) >= 256))
10083 {
10084 rtx reg1;
10085 reg1 = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
10086 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10087 {
10088 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
10089 otherops[0] = reg1;
10090 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
10091 }
10092 else
10093 {
10094 otherops[0] = reg1;
10095 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
10096 otherops[0] = operands[1];
10097 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
10098 }
10099 }
10100 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10101 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
10102 else
10103 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
10104 break;
10105
10106 case PLUS:
10107 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
10108 if (GET_CODE (otherops[2]) == CONST_INT)
10109 {
10110 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
10111 {
10112 case -8:
10113 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
10114 return "";
10115
10116 case -4:
10117 if (TARGET_THUMB2)
10118 break;
10119 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
10120 return "";
10121
10122 case 4:
10123 if (TARGET_THUMB2)
10124 break;
10125 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
10126 return "";
10127 }
10128 }
10129 if (TARGET_LDRD
10130 && (GET_CODE (otherops[2]) == REG
10131 || (GET_CODE (otherops[2]) == CONST_INT
10132 && INTVAL (otherops[2]) > -256
10133 && INTVAL (otherops[2]) < 256)))
10134 {
10135 otherops[0] = operands[1];
10136 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
10137 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
10138 return "";
10139 }
10140 /* Fall through */
10141
10142 default:
10143 otherops[0] = adjust_address (operands[0], SImode, 4);
10144 otherops[1] = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
10145 output_asm_insn ("str%?\t%1, %0", operands);
10146 output_asm_insn ("str%?\t%1, %0", otherops);
10147 }
10148 }
10149
10150 return "";
10151 }
10152
10153 /* Output a move, load or store for quad-word vectors in ARM registers. Only
10154 handles MEMs accepted by neon_vector_mem_operand with CORE=true. */
10155
10156 const char *
10157 output_move_quad (rtx *operands)
10158 {
10159 if (REG_P (operands[0]))
10160 {
10161 /* Load, or reg->reg move. */
10162
10163 if (MEM_P (operands[1]))
10164 {
10165 switch (GET_CODE (XEXP (operands[1], 0)))
10166 {
10167 case REG:
10168 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
10169 break;
10170
10171 case LABEL_REF:
10172 case CONST:
10173 output_asm_insn ("adr%?\t%0, %1", operands);
10174 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
10175 break;
10176
10177 default:
10178 gcc_unreachable ();
10179 }
10180 }
10181 else
10182 {
10183 rtx ops[2];
10184 int dest, src, i;
10185
10186 gcc_assert (REG_P (operands[1]));
10187
10188 dest = REGNO (operands[0]);
10189 src = REGNO (operands[1]);
10190
10191 /* This seems pretty dumb, but hopefully GCC won't try to do it
10192 very often. */
10193 if (dest < src)
10194 for (i = 0; i < 4; i++)
10195 {
10196 ops[0] = gen_rtx_REG (SImode, dest + i);
10197 ops[1] = gen_rtx_REG (SImode, src + i);
10198 output_asm_insn ("mov%?\t%0, %1", ops);
10199 }
10200 else
10201 for (i = 3; i >= 0; i--)
10202 {
10203 ops[0] = gen_rtx_REG (SImode, dest + i);
10204 ops[1] = gen_rtx_REG (SImode, src + i);
10205 output_asm_insn ("mov%?\t%0, %1", ops);
10206 }
10207 }
10208 }
10209 else
10210 {
10211 gcc_assert (MEM_P (operands[0]));
10212 gcc_assert (REG_P (operands[1]));
10213 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
10214
10215 switch (GET_CODE (XEXP (operands[0], 0)))
10216 {
10217 case REG:
10218 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10219 break;
10220
10221 default:
10222 gcc_unreachable ();
10223 }
10224 }
10225
10226 return "";
10227 }
10228
10229 /* Output a VFP load or store instruction. */
10230
10231 const char *
10232 output_move_vfp (rtx *operands)
10233 {
10234 rtx reg, mem, addr, ops[2];
10235 int load = REG_P (operands[0]);
10236 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
10237 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
10238 const char *template;
10239 char buff[50];
10240 enum machine_mode mode;
10241
10242 reg = operands[!load];
10243 mem = operands[load];
10244
10245 mode = GET_MODE (reg);
10246
10247 gcc_assert (REG_P (reg));
10248 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
10249 gcc_assert (mode == SFmode
10250 || mode == DFmode
10251 || mode == SImode
10252 || mode == DImode
10253 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
10254 gcc_assert (MEM_P (mem));
10255
10256 addr = XEXP (mem, 0);
10257
10258 switch (GET_CODE (addr))
10259 {
10260 case PRE_DEC:
10261 template = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
10262 ops[0] = XEXP (addr, 0);
10263 ops[1] = reg;
10264 break;
10265
10266 case POST_INC:
10267 template = "f%smia%c%%?\t%%0!, {%%%s1}%s";
10268 ops[0] = XEXP (addr, 0);
10269 ops[1] = reg;
10270 break;
10271
10272 default:
10273 template = "f%s%c%%?\t%%%s0, %%1%s";
10274 ops[0] = reg;
10275 ops[1] = mem;
10276 break;
10277 }
10278
10279 sprintf (buff, template,
10280 load ? "ld" : "st",
10281 dp ? 'd' : 's',
10282 dp ? "P" : "",
10283 integer_p ? "\t%@ int" : "");
10284 output_asm_insn (buff, ops);
10285
10286 return "";
10287 }
10288
10289 /* Output a Neon quad-word load or store, or a load or store for
10290 larger structure modes. We could also support post-modify forms using
10291 VLD1/VST1 (for the vectorizer, and perhaps otherwise), but we don't do that
10292 yet.
10293 WARNING: The ordering of elements in memory is weird in big-endian mode,
10294 because we use VSTM instead of VST1, to make it easy to make vector stores
10295 via ARM registers write values in the same order as stores direct from Neon
10296 registers. For example, the byte ordering of a quadword vector with 16-byte
10297 elements like this:
10298
10299 [e7:e6:e5:e4:e3:e2:e1:e0] (highest-numbered element first)
10300
10301 will be (with lowest address first, h = most-significant byte,
10302 l = least-significant byte of element):
10303
10304 [e3h, e3l, e2h, e2l, e1h, e1l, e0h, e0l,
10305 e7h, e7l, e6h, e6l, e5h, e5l, e4h, e4l]
10306
10307 When necessary, quadword registers (dN, dN+1) are moved to ARM registers from
10308 rN in the order:
10309
10310 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
10311
10312 So that STM/LDM can be used on vectors in ARM registers, and the same memory
10313 layout will result as if VSTM/VLDM were used. */
10314
10315 const char *
10316 output_move_neon (rtx *operands)
10317 {
10318 rtx reg, mem, addr, ops[2];
10319 int regno, load = REG_P (operands[0]);
10320 const char *template;
10321 char buff[50];
10322 enum machine_mode mode;
10323
10324 reg = operands[!load];
10325 mem = operands[load];
10326
10327 mode = GET_MODE (reg);
10328
10329 gcc_assert (REG_P (reg));
10330 regno = REGNO (reg);
10331 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
10332 || NEON_REGNO_OK_FOR_QUAD (regno));
10333 gcc_assert (VALID_NEON_DREG_MODE (mode)
10334 || VALID_NEON_QREG_MODE (mode)
10335 || VALID_NEON_STRUCT_MODE (mode));
10336 gcc_assert (MEM_P (mem));
10337
10338 addr = XEXP (mem, 0);
10339
10340 /* Strip off const from addresses like (const (plus (...))). */
10341 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
10342 addr = XEXP (addr, 0);
10343
10344 switch (GET_CODE (addr))
10345 {
10346 case POST_INC:
10347 template = "v%smia%%?\t%%0!, %%h1";
10348 ops[0] = XEXP (addr, 0);
10349 ops[1] = reg;
10350 break;
10351
10352 case POST_MODIFY:
10353 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
10354 gcc_unreachable ();
10355
10356 case LABEL_REF:
10357 case PLUS:
10358 {
10359 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
10360 int i;
10361 int overlap = -1;
10362 for (i = 0; i < nregs; i++)
10363 {
10364 /* We're only using DImode here because it's a convenient size. */
10365 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
10366 ops[1] = adjust_address (mem, SImode, 8 * i);
10367 if (reg_overlap_mentioned_p (ops[0], mem))
10368 {
10369 gcc_assert (overlap == -1);
10370 overlap = i;
10371 }
10372 else
10373 {
10374 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
10375 output_asm_insn (buff, ops);
10376 }
10377 }
10378 if (overlap != -1)
10379 {
10380 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
10381 ops[1] = adjust_address (mem, SImode, 8 * overlap);
10382 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
10383 output_asm_insn (buff, ops);
10384 }
10385
10386 return "";
10387 }
10388
10389 default:
10390 template = "v%smia%%?\t%%m0, %%h1";
10391 ops[0] = mem;
10392 ops[1] = reg;
10393 }
10394
10395 sprintf (buff, template, load ? "ld" : "st");
10396 output_asm_insn (buff, ops);
10397
10398 return "";
10399 }
10400
10401 /* Output an ADD r, s, #n where n may be too big for one instruction.
10402 If adding zero to one register, output nothing. */
10403 const char *
10404 output_add_immediate (rtx *operands)
10405 {
10406 HOST_WIDE_INT n = INTVAL (operands[2]);
10407
10408 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
10409 {
10410 if (n < 0)
10411 output_multi_immediate (operands,
10412 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
10413 -n);
10414 else
10415 output_multi_immediate (operands,
10416 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
10417 n);
10418 }
10419
10420 return "";
10421 }
10422
10423 /* Output a multiple immediate operation.
10424 OPERANDS is the vector of operands referred to in the output patterns.
10425 INSTR1 is the output pattern to use for the first constant.
10426 INSTR2 is the output pattern to use for subsequent constants.
10427 IMMED_OP is the index of the constant slot in OPERANDS.
10428 N is the constant value. */
10429 static const char *
10430 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
10431 int immed_op, HOST_WIDE_INT n)
10432 {
10433 #if HOST_BITS_PER_WIDE_INT > 32
10434 n &= 0xffffffff;
10435 #endif
10436
10437 if (n == 0)
10438 {
10439 /* Quick and easy output. */
10440 operands[immed_op] = const0_rtx;
10441 output_asm_insn (instr1, operands);
10442 }
10443 else
10444 {
10445 int i;
10446 const char * instr = instr1;
10447
10448 /* Note that n is never zero here (which would give no output). */
10449 for (i = 0; i < 32; i += 2)
10450 {
10451 if (n & (3 << i))
10452 {
10453 operands[immed_op] = GEN_INT (n & (255 << i));
10454 output_asm_insn (instr, operands);
10455 instr = instr2;
10456 i += 6;
10457 }
10458 }
10459 }
10460
10461 return "";
10462 }
10463
10464 /* Return the name of a shifter operation. */
10465 static const char *
10466 arm_shift_nmem(enum rtx_code code)
10467 {
10468 switch (code)
10469 {
10470 case ASHIFT:
10471 return ARM_LSL_NAME;
10472
10473 case ASHIFTRT:
10474 return "asr";
10475
10476 case LSHIFTRT:
10477 return "lsr";
10478
10479 case ROTATERT:
10480 return "ror";
10481
10482 default:
10483 abort();
10484 }
10485 }
10486
10487 /* Return the appropriate ARM instruction for the operation code.
10488 The returned result should not be overwritten. OP is the rtx of the
10489 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
10490 was shifted. */
10491 const char *
10492 arithmetic_instr (rtx op, int shift_first_arg)
10493 {
10494 switch (GET_CODE (op))
10495 {
10496 case PLUS:
10497 return "add";
10498
10499 case MINUS:
10500 return shift_first_arg ? "rsb" : "sub";
10501
10502 case IOR:
10503 return "orr";
10504
10505 case XOR:
10506 return "eor";
10507
10508 case AND:
10509 return "and";
10510
10511 case ASHIFT:
10512 case ASHIFTRT:
10513 case LSHIFTRT:
10514 case ROTATERT:
10515 return arm_shift_nmem(GET_CODE(op));
10516
10517 default:
10518 gcc_unreachable ();
10519 }
10520 }
10521
10522 /* Ensure valid constant shifts and return the appropriate shift mnemonic
10523 for the operation code. The returned result should not be overwritten.
10524 OP is the rtx code of the shift.
10525 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
10526 shift. */
10527 static const char *
10528 shift_op (rtx op, HOST_WIDE_INT *amountp)
10529 {
10530 const char * mnem;
10531 enum rtx_code code = GET_CODE (op);
10532
10533 switch (GET_CODE (XEXP (op, 1)))
10534 {
10535 case REG:
10536 case SUBREG:
10537 *amountp = -1;
10538 break;
10539
10540 case CONST_INT:
10541 *amountp = INTVAL (XEXP (op, 1));
10542 break;
10543
10544 default:
10545 gcc_unreachable ();
10546 }
10547
10548 switch (code)
10549 {
10550 case ROTATE:
10551 gcc_assert (*amountp != -1);
10552 *amountp = 32 - *amountp;
10553 code = ROTATERT;
10554
10555 /* Fall through. */
10556
10557 case ASHIFT:
10558 case ASHIFTRT:
10559 case LSHIFTRT:
10560 case ROTATERT:
10561 mnem = arm_shift_nmem(code);
10562 break;
10563
10564 case MULT:
10565 /* We never have to worry about the amount being other than a
10566 power of 2, since this case can never be reloaded from a reg. */
10567 gcc_assert (*amountp != -1);
10568 *amountp = int_log2 (*amountp);
10569 return ARM_LSL_NAME;
10570
10571 default:
10572 gcc_unreachable ();
10573 }
10574
10575 if (*amountp != -1)
10576 {
10577 /* This is not 100% correct, but follows from the desire to merge
10578 multiplication by a power of 2 with the recognizer for a
10579 shift. >=32 is not a valid shift for "lsl", so we must try and
10580 output a shift that produces the correct arithmetical result.
10581 Using lsr #32 is identical except for the fact that the carry bit
10582 is not set correctly if we set the flags; but we never use the
10583 carry bit from such an operation, so we can ignore that. */
10584 if (code == ROTATERT)
10585 /* Rotate is just modulo 32. */
10586 *amountp &= 31;
10587 else if (*amountp != (*amountp & 31))
10588 {
10589 if (code == ASHIFT)
10590 mnem = "lsr";
10591 *amountp = 32;
10592 }
10593
10594 /* Shifts of 0 are no-ops. */
10595 if (*amountp == 0)
10596 return NULL;
10597 }
10598
10599 return mnem;
10600 }
10601
10602 /* Obtain the shift from the POWER of two. */
10603
10604 static HOST_WIDE_INT
10605 int_log2 (HOST_WIDE_INT power)
10606 {
10607 HOST_WIDE_INT shift = 0;
10608
10609 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
10610 {
10611 gcc_assert (shift <= 31);
10612 shift++;
10613 }
10614
10615 return shift;
10616 }
10617
10618 /* Output a .ascii pseudo-op, keeping track of lengths. This is
10619 because /bin/as is horribly restrictive. The judgement about
10620 whether or not each character is 'printable' (and can be output as
10621 is) or not (and must be printed with an octal escape) must be made
10622 with reference to the *host* character set -- the situation is
10623 similar to that discussed in the comments above pp_c_char in
10624 c-pretty-print.c. */
10625
10626 #define MAX_ASCII_LEN 51
10627
10628 void
10629 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
10630 {
10631 int i;
10632 int len_so_far = 0;
10633
10634 fputs ("\t.ascii\t\"", stream);
10635
10636 for (i = 0; i < len; i++)
10637 {
10638 int c = p[i];
10639
10640 if (len_so_far >= MAX_ASCII_LEN)
10641 {
10642 fputs ("\"\n\t.ascii\t\"", stream);
10643 len_so_far = 0;
10644 }
10645
10646 if (ISPRINT (c))
10647 {
10648 if (c == '\\' || c == '\"')
10649 {
10650 putc ('\\', stream);
10651 len_so_far++;
10652 }
10653 putc (c, stream);
10654 len_so_far++;
10655 }
10656 else
10657 {
10658 fprintf (stream, "\\%03o", c);
10659 len_so_far += 4;
10660 }
10661 }
10662
10663 fputs ("\"\n", stream);
10664 }
10665 \f
10666 /* Compute the register save mask for registers 0 through 12
10667 inclusive. This code is used by arm_compute_save_reg_mask. */
10668
10669 static unsigned long
10670 arm_compute_save_reg0_reg12_mask (void)
10671 {
10672 unsigned long func_type = arm_current_func_type ();
10673 unsigned long save_reg_mask = 0;
10674 unsigned int reg;
10675
10676 if (IS_INTERRUPT (func_type))
10677 {
10678 unsigned int max_reg;
10679 /* Interrupt functions must not corrupt any registers,
10680 even call clobbered ones. If this is a leaf function
10681 we can just examine the registers used by the RTL, but
10682 otherwise we have to assume that whatever function is
10683 called might clobber anything, and so we have to save
10684 all the call-clobbered registers as well. */
10685 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
10686 /* FIQ handlers have registers r8 - r12 banked, so
10687 we only need to check r0 - r7, Normal ISRs only
10688 bank r14 and r15, so we must check up to r12.
10689 r13 is the stack pointer which is always preserved,
10690 so we do not need to consider it here. */
10691 max_reg = 7;
10692 else
10693 max_reg = 12;
10694
10695 for (reg = 0; reg <= max_reg; reg++)
10696 if (df_regs_ever_live_p (reg)
10697 || (! current_function_is_leaf && call_used_regs[reg]))
10698 save_reg_mask |= (1 << reg);
10699
10700 /* Also save the pic base register if necessary. */
10701 if (flag_pic
10702 && !TARGET_SINGLE_PIC_BASE
10703 && arm_pic_register != INVALID_REGNUM
10704 && current_function_uses_pic_offset_table)
10705 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10706 }
10707 else
10708 {
10709 /* In the normal case we only need to save those registers
10710 which are call saved and which are used by this function. */
10711 for (reg = 0; reg <= 11; reg++)
10712 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
10713 save_reg_mask |= (1 << reg);
10714
10715 /* Handle the frame pointer as a special case. */
10716 if (frame_pointer_needed)
10717 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
10718
10719 /* If we aren't loading the PIC register,
10720 don't stack it even though it may be live. */
10721 if (flag_pic
10722 && !TARGET_SINGLE_PIC_BASE
10723 && arm_pic_register != INVALID_REGNUM
10724 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
10725 || current_function_uses_pic_offset_table))
10726 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10727
10728 /* The prologue will copy SP into R0, so save it. */
10729 if (IS_STACKALIGN (func_type))
10730 save_reg_mask |= 1;
10731 }
10732
10733 /* Save registers so the exception handler can modify them. */
10734 if (current_function_calls_eh_return)
10735 {
10736 unsigned int i;
10737
10738 for (i = 0; ; i++)
10739 {
10740 reg = EH_RETURN_DATA_REGNO (i);
10741 if (reg == INVALID_REGNUM)
10742 break;
10743 save_reg_mask |= 1 << reg;
10744 }
10745 }
10746
10747 return save_reg_mask;
10748 }
10749
10750
10751 /* Compute a bit mask of which registers need to be
10752 saved on the stack for the current function.
10753 This is used by arm_get_frame_offsets, which may add extra registers. */
10754
10755 static unsigned long
10756 arm_compute_save_reg_mask (void)
10757 {
10758 unsigned int save_reg_mask = 0;
10759 unsigned long func_type = arm_current_func_type ();
10760 unsigned int reg;
10761
10762 if (IS_NAKED (func_type))
10763 /* This should never really happen. */
10764 return 0;
10765
10766 /* If we are creating a stack frame, then we must save the frame pointer,
10767 IP (which will hold the old stack pointer), LR and the PC. */
10768 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
10769 save_reg_mask |=
10770 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
10771 | (1 << IP_REGNUM)
10772 | (1 << LR_REGNUM)
10773 | (1 << PC_REGNUM);
10774
10775 /* Volatile functions do not return, so there
10776 is no need to save any other registers. */
10777 if (IS_VOLATILE (func_type))
10778 return save_reg_mask;
10779
10780 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
10781
10782 /* Decide if we need to save the link register.
10783 Interrupt routines have their own banked link register,
10784 so they never need to save it.
10785 Otherwise if we do not use the link register we do not need to save
10786 it. If we are pushing other registers onto the stack however, we
10787 can save an instruction in the epilogue by pushing the link register
10788 now and then popping it back into the PC. This incurs extra memory
10789 accesses though, so we only do it when optimizing for size, and only
10790 if we know that we will not need a fancy return sequence. */
10791 if (df_regs_ever_live_p (LR_REGNUM)
10792 || (save_reg_mask
10793 && optimize_size
10794 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
10795 && !current_function_calls_eh_return))
10796 save_reg_mask |= 1 << LR_REGNUM;
10797
10798 if (cfun->machine->lr_save_eliminated)
10799 save_reg_mask &= ~ (1 << LR_REGNUM);
10800
10801 if (TARGET_REALLY_IWMMXT
10802 && ((bit_count (save_reg_mask)
10803 + ARM_NUM_INTS (current_function_pretend_args_size)) % 2) != 0)
10804 {
10805 /* The total number of registers that are going to be pushed
10806 onto the stack is odd. We need to ensure that the stack
10807 is 64-bit aligned before we start to save iWMMXt registers,
10808 and also before we start to create locals. (A local variable
10809 might be a double or long long which we will load/store using
10810 an iWMMXt instruction). Therefore we need to push another
10811 ARM register, so that the stack will be 64-bit aligned. We
10812 try to avoid using the arg registers (r0 -r3) as they might be
10813 used to pass values in a tail call. */
10814 for (reg = 4; reg <= 12; reg++)
10815 if ((save_reg_mask & (1 << reg)) == 0)
10816 break;
10817
10818 if (reg <= 12)
10819 save_reg_mask |= (1 << reg);
10820 else
10821 {
10822 cfun->machine->sibcall_blocked = 1;
10823 save_reg_mask |= (1 << 3);
10824 }
10825 }
10826
10827 /* We may need to push an additional register for use initializing the
10828 PIC base register. */
10829 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
10830 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
10831 {
10832 reg = thumb_find_work_register (1 << 4);
10833 if (!call_used_regs[reg])
10834 save_reg_mask |= (1 << reg);
10835 }
10836
10837 return save_reg_mask;
10838 }
10839
10840
10841 /* Compute a bit mask of which registers need to be
10842 saved on the stack for the current function. */
10843 static unsigned long
10844 thumb1_compute_save_reg_mask (void)
10845 {
10846 unsigned long mask;
10847 unsigned reg;
10848
10849 mask = 0;
10850 for (reg = 0; reg < 12; reg ++)
10851 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
10852 mask |= 1 << reg;
10853
10854 if (flag_pic
10855 && !TARGET_SINGLE_PIC_BASE
10856 && arm_pic_register != INVALID_REGNUM
10857 && current_function_uses_pic_offset_table)
10858 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10859
10860 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
10861 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
10862 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
10863
10864 /* LR will also be pushed if any lo regs are pushed. */
10865 if (mask & 0xff || thumb_force_lr_save ())
10866 mask |= (1 << LR_REGNUM);
10867
10868 /* Make sure we have a low work register if we need one.
10869 We will need one if we are going to push a high register,
10870 but we are not currently intending to push a low register. */
10871 if ((mask & 0xff) == 0
10872 && ((mask & 0x0f00) || TARGET_BACKTRACE))
10873 {
10874 /* Use thumb_find_work_register to choose which register
10875 we will use. If the register is live then we will
10876 have to push it. Use LAST_LO_REGNUM as our fallback
10877 choice for the register to select. */
10878 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
10879 /* Make sure the register returned by thumb_find_work_register is
10880 not part of the return value. */
10881 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
10882 reg = LAST_LO_REGNUM;
10883
10884 if (! call_used_regs[reg])
10885 mask |= 1 << reg;
10886 }
10887
10888 return mask;
10889 }
10890
10891
10892 /* Return the number of bytes required to save VFP registers. */
10893 static int
10894 arm_get_vfp_saved_size (void)
10895 {
10896 unsigned int regno;
10897 int count;
10898 int saved;
10899
10900 saved = 0;
10901 /* Space for saved VFP registers. */
10902 if (TARGET_HARD_FLOAT && TARGET_VFP)
10903 {
10904 count = 0;
10905 for (regno = FIRST_VFP_REGNUM;
10906 regno < LAST_VFP_REGNUM;
10907 regno += 2)
10908 {
10909 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
10910 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
10911 {
10912 if (count > 0)
10913 {
10914 /* Workaround ARM10 VFPr1 bug. */
10915 if (count == 2 && !arm_arch6)
10916 count++;
10917 saved += count * 8;
10918 }
10919 count = 0;
10920 }
10921 else
10922 count++;
10923 }
10924 if (count > 0)
10925 {
10926 if (count == 2 && !arm_arch6)
10927 count++;
10928 saved += count * 8;
10929 }
10930 }
10931 return saved;
10932 }
10933
10934
10935 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
10936 everything bar the final return instruction. */
10937 const char *
10938 output_return_instruction (rtx operand, int really_return, int reverse)
10939 {
10940 char conditional[10];
10941 char instr[100];
10942 unsigned reg;
10943 unsigned long live_regs_mask;
10944 unsigned long func_type;
10945 arm_stack_offsets *offsets;
10946
10947 func_type = arm_current_func_type ();
10948
10949 if (IS_NAKED (func_type))
10950 return "";
10951
10952 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
10953 {
10954 /* If this function was declared non-returning, and we have
10955 found a tail call, then we have to trust that the called
10956 function won't return. */
10957 if (really_return)
10958 {
10959 rtx ops[2];
10960
10961 /* Otherwise, trap an attempted return by aborting. */
10962 ops[0] = operand;
10963 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
10964 : "abort");
10965 assemble_external_libcall (ops[1]);
10966 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
10967 }
10968
10969 return "";
10970 }
10971
10972 gcc_assert (!current_function_calls_alloca || really_return);
10973
10974 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
10975
10976 return_used_this_function = 1;
10977
10978 offsets = arm_get_frame_offsets ();
10979 live_regs_mask = offsets->saved_regs_mask;
10980
10981 if (live_regs_mask)
10982 {
10983 const char * return_reg;
10984
10985 /* If we do not have any special requirements for function exit
10986 (e.g. interworking) then we can load the return address
10987 directly into the PC. Otherwise we must load it into LR. */
10988 if (really_return
10989 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
10990 return_reg = reg_names[PC_REGNUM];
10991 else
10992 return_reg = reg_names[LR_REGNUM];
10993
10994 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
10995 {
10996 /* There are three possible reasons for the IP register
10997 being saved. 1) a stack frame was created, in which case
10998 IP contains the old stack pointer, or 2) an ISR routine
10999 corrupted it, or 3) it was saved to align the stack on
11000 iWMMXt. In case 1, restore IP into SP, otherwise just
11001 restore IP. */
11002 if (frame_pointer_needed)
11003 {
11004 live_regs_mask &= ~ (1 << IP_REGNUM);
11005 live_regs_mask |= (1 << SP_REGNUM);
11006 }
11007 else
11008 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
11009 }
11010
11011 /* On some ARM architectures it is faster to use LDR rather than
11012 LDM to load a single register. On other architectures, the
11013 cost is the same. In 26 bit mode, or for exception handlers,
11014 we have to use LDM to load the PC so that the CPSR is also
11015 restored. */
11016 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11017 if (live_regs_mask == (1U << reg))
11018 break;
11019
11020 if (reg <= LAST_ARM_REGNUM
11021 && (reg != LR_REGNUM
11022 || ! really_return
11023 || ! IS_INTERRUPT (func_type)))
11024 {
11025 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
11026 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
11027 }
11028 else
11029 {
11030 char *p;
11031 int first = 1;
11032
11033 /* Generate the load multiple instruction to restore the
11034 registers. Note we can get here, even if
11035 frame_pointer_needed is true, but only if sp already
11036 points to the base of the saved core registers. */
11037 if (live_regs_mask & (1 << SP_REGNUM))
11038 {
11039 unsigned HOST_WIDE_INT stack_adjust;
11040
11041 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
11042 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
11043
11044 if (stack_adjust && arm_arch5 && TARGET_ARM)
11045 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
11046 else
11047 {
11048 /* If we can't use ldmib (SA110 bug),
11049 then try to pop r3 instead. */
11050 if (stack_adjust)
11051 live_regs_mask |= 1 << 3;
11052 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
11053 }
11054 }
11055 else
11056 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
11057
11058 p = instr + strlen (instr);
11059
11060 for (reg = 0; reg <= SP_REGNUM; reg++)
11061 if (live_regs_mask & (1 << reg))
11062 {
11063 int l = strlen (reg_names[reg]);
11064
11065 if (first)
11066 first = 0;
11067 else
11068 {
11069 memcpy (p, ", ", 2);
11070 p += 2;
11071 }
11072
11073 memcpy (p, "%|", 2);
11074 memcpy (p + 2, reg_names[reg], l);
11075 p += l + 2;
11076 }
11077
11078 if (live_regs_mask & (1 << LR_REGNUM))
11079 {
11080 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
11081 /* If returning from an interrupt, restore the CPSR. */
11082 if (IS_INTERRUPT (func_type))
11083 strcat (p, "^");
11084 }
11085 else
11086 strcpy (p, "}");
11087 }
11088
11089 output_asm_insn (instr, & operand);
11090
11091 /* See if we need to generate an extra instruction to
11092 perform the actual function return. */
11093 if (really_return
11094 && func_type != ARM_FT_INTERWORKED
11095 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
11096 {
11097 /* The return has already been handled
11098 by loading the LR into the PC. */
11099 really_return = 0;
11100 }
11101 }
11102
11103 if (really_return)
11104 {
11105 switch ((int) ARM_FUNC_TYPE (func_type))
11106 {
11107 case ARM_FT_ISR:
11108 case ARM_FT_FIQ:
11109 /* ??? This is wrong for unified assembly syntax. */
11110 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
11111 break;
11112
11113 case ARM_FT_INTERWORKED:
11114 sprintf (instr, "bx%s\t%%|lr", conditional);
11115 break;
11116
11117 case ARM_FT_EXCEPTION:
11118 /* ??? This is wrong for unified assembly syntax. */
11119 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
11120 break;
11121
11122 default:
11123 /* Use bx if it's available. */
11124 if (arm_arch5 || arm_arch4t)
11125 sprintf (instr, "bx%s\t%%|lr", conditional);
11126 else
11127 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
11128 break;
11129 }
11130
11131 output_asm_insn (instr, & operand);
11132 }
11133
11134 return "";
11135 }
11136
11137 /* Write the function name into the code section, directly preceding
11138 the function prologue.
11139
11140 Code will be output similar to this:
11141 t0
11142 .ascii "arm_poke_function_name", 0
11143 .align
11144 t1
11145 .word 0xff000000 + (t1 - t0)
11146 arm_poke_function_name
11147 mov ip, sp
11148 stmfd sp!, {fp, ip, lr, pc}
11149 sub fp, ip, #4
11150
11151 When performing a stack backtrace, code can inspect the value
11152 of 'pc' stored at 'fp' + 0. If the trace function then looks
11153 at location pc - 12 and the top 8 bits are set, then we know
11154 that there is a function name embedded immediately preceding this
11155 location and has length ((pc[-3]) & 0xff000000).
11156
11157 We assume that pc is declared as a pointer to an unsigned long.
11158
11159 It is of no benefit to output the function name if we are assembling
11160 a leaf function. These function types will not contain a stack
11161 backtrace structure, therefore it is not possible to determine the
11162 function name. */
11163 void
11164 arm_poke_function_name (FILE *stream, const char *name)
11165 {
11166 unsigned long alignlength;
11167 unsigned long length;
11168 rtx x;
11169
11170 length = strlen (name) + 1;
11171 alignlength = ROUND_UP_WORD (length);
11172
11173 ASM_OUTPUT_ASCII (stream, name, length);
11174 ASM_OUTPUT_ALIGN (stream, 2);
11175 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
11176 assemble_aligned_integer (UNITS_PER_WORD, x);
11177 }
11178
11179 /* Place some comments into the assembler stream
11180 describing the current function. */
11181 static void
11182 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
11183 {
11184 unsigned long func_type;
11185
11186 if (TARGET_THUMB1)
11187 {
11188 thumb1_output_function_prologue (f, frame_size);
11189 return;
11190 }
11191
11192 /* Sanity check. */
11193 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
11194
11195 func_type = arm_current_func_type ();
11196
11197 switch ((int) ARM_FUNC_TYPE (func_type))
11198 {
11199 default:
11200 case ARM_FT_NORMAL:
11201 break;
11202 case ARM_FT_INTERWORKED:
11203 asm_fprintf (f, "\t%@ Function supports interworking.\n");
11204 break;
11205 case ARM_FT_ISR:
11206 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
11207 break;
11208 case ARM_FT_FIQ:
11209 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
11210 break;
11211 case ARM_FT_EXCEPTION:
11212 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
11213 break;
11214 }
11215
11216 if (IS_NAKED (func_type))
11217 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
11218
11219 if (IS_VOLATILE (func_type))
11220 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
11221
11222 if (IS_NESTED (func_type))
11223 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
11224 if (IS_STACKALIGN (func_type))
11225 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
11226
11227 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
11228 current_function_args_size,
11229 current_function_pretend_args_size, frame_size);
11230
11231 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
11232 frame_pointer_needed,
11233 cfun->machine->uses_anonymous_args);
11234
11235 if (cfun->machine->lr_save_eliminated)
11236 asm_fprintf (f, "\t%@ link register save eliminated.\n");
11237
11238 if (current_function_calls_eh_return)
11239 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
11240
11241 return_used_this_function = 0;
11242 }
11243
11244 const char *
11245 arm_output_epilogue (rtx sibling)
11246 {
11247 int reg;
11248 unsigned long saved_regs_mask;
11249 unsigned long func_type;
11250 /* Floats_offset is the offset from the "virtual" frame. In an APCS
11251 frame that is $fp + 4 for a non-variadic function. */
11252 int floats_offset = 0;
11253 rtx operands[3];
11254 FILE * f = asm_out_file;
11255 unsigned int lrm_count = 0;
11256 int really_return = (sibling == NULL);
11257 int start_reg;
11258 arm_stack_offsets *offsets;
11259
11260 /* If we have already generated the return instruction
11261 then it is futile to generate anything else. */
11262 if (use_return_insn (FALSE, sibling) && return_used_this_function)
11263 return "";
11264
11265 func_type = arm_current_func_type ();
11266
11267 if (IS_NAKED (func_type))
11268 /* Naked functions don't have epilogues. */
11269 return "";
11270
11271 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
11272 {
11273 rtx op;
11274
11275 /* A volatile function should never return. Call abort. */
11276 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
11277 assemble_external_libcall (op);
11278 output_asm_insn ("bl\t%a0", &op);
11279
11280 return "";
11281 }
11282
11283 /* If we are throwing an exception, then we really must be doing a
11284 return, so we can't tail-call. */
11285 gcc_assert (!current_function_calls_eh_return || really_return);
11286
11287 offsets = arm_get_frame_offsets ();
11288 saved_regs_mask = offsets->saved_regs_mask;
11289
11290 if (TARGET_IWMMXT)
11291 lrm_count = bit_count (saved_regs_mask);
11292
11293 floats_offset = offsets->saved_args;
11294 /* Compute how far away the floats will be. */
11295 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11296 if (saved_regs_mask & (1 << reg))
11297 floats_offset += 4;
11298
11299 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
11300 {
11301 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
11302 int vfp_offset = offsets->frame;
11303
11304 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
11305 {
11306 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
11307 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11308 {
11309 floats_offset += 12;
11310 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
11311 reg, FP_REGNUM, floats_offset - vfp_offset);
11312 }
11313 }
11314 else
11315 {
11316 start_reg = LAST_FPA_REGNUM;
11317
11318 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
11319 {
11320 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11321 {
11322 floats_offset += 12;
11323
11324 /* We can't unstack more than four registers at once. */
11325 if (start_reg - reg == 3)
11326 {
11327 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
11328 reg, FP_REGNUM, floats_offset - vfp_offset);
11329 start_reg = reg - 1;
11330 }
11331 }
11332 else
11333 {
11334 if (reg != start_reg)
11335 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
11336 reg + 1, start_reg - reg,
11337 FP_REGNUM, floats_offset - vfp_offset);
11338 start_reg = reg - 1;
11339 }
11340 }
11341
11342 /* Just in case the last register checked also needs unstacking. */
11343 if (reg != start_reg)
11344 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
11345 reg + 1, start_reg - reg,
11346 FP_REGNUM, floats_offset - vfp_offset);
11347 }
11348
11349 if (TARGET_HARD_FLOAT && TARGET_VFP)
11350 {
11351 int saved_size;
11352
11353 /* The fldmd insns do not have base+offset addressing
11354 modes, so we use IP to hold the address. */
11355 saved_size = arm_get_vfp_saved_size ();
11356
11357 if (saved_size > 0)
11358 {
11359 floats_offset += saved_size;
11360 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
11361 FP_REGNUM, floats_offset - vfp_offset);
11362 }
11363 start_reg = FIRST_VFP_REGNUM;
11364 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
11365 {
11366 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
11367 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
11368 {
11369 if (start_reg != reg)
11370 vfp_output_fldmd (f, IP_REGNUM,
11371 (start_reg - FIRST_VFP_REGNUM) / 2,
11372 (reg - start_reg) / 2);
11373 start_reg = reg + 2;
11374 }
11375 }
11376 if (start_reg != reg)
11377 vfp_output_fldmd (f, IP_REGNUM,
11378 (start_reg - FIRST_VFP_REGNUM) / 2,
11379 (reg - start_reg) / 2);
11380 }
11381
11382 if (TARGET_IWMMXT)
11383 {
11384 /* The frame pointer is guaranteed to be non-double-word aligned.
11385 This is because it is set to (old_stack_pointer - 4) and the
11386 old_stack_pointer was double word aligned. Thus the offset to
11387 the iWMMXt registers to be loaded must also be non-double-word
11388 sized, so that the resultant address *is* double-word aligned.
11389 We can ignore floats_offset since that was already included in
11390 the live_regs_mask. */
11391 lrm_count += (lrm_count % 2 ? 2 : 1);
11392
11393 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
11394 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11395 {
11396 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
11397 reg, FP_REGNUM, lrm_count * 4);
11398 lrm_count += 2;
11399 }
11400 }
11401
11402 /* saved_regs_mask should contain the IP, which at the time of stack
11403 frame generation actually contains the old stack pointer. So a
11404 quick way to unwind the stack is just pop the IP register directly
11405 into the stack pointer. */
11406 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
11407 saved_regs_mask &= ~ (1 << IP_REGNUM);
11408 saved_regs_mask |= (1 << SP_REGNUM);
11409
11410 /* There are two registers left in saved_regs_mask - LR and PC. We
11411 only need to restore the LR register (the return address), but to
11412 save time we can load it directly into the PC, unless we need a
11413 special function exit sequence, or we are not really returning. */
11414 if (really_return
11415 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
11416 && !current_function_calls_eh_return)
11417 /* Delete the LR from the register mask, so that the LR on
11418 the stack is loaded into the PC in the register mask. */
11419 saved_regs_mask &= ~ (1 << LR_REGNUM);
11420 else
11421 saved_regs_mask &= ~ (1 << PC_REGNUM);
11422
11423 /* We must use SP as the base register, because SP is one of the
11424 registers being restored. If an interrupt or page fault
11425 happens in the ldm instruction, the SP might or might not
11426 have been restored. That would be bad, as then SP will no
11427 longer indicate the safe area of stack, and we can get stack
11428 corruption. Using SP as the base register means that it will
11429 be reset correctly to the original value, should an interrupt
11430 occur. If the stack pointer already points at the right
11431 place, then omit the subtraction. */
11432 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
11433 || current_function_calls_alloca)
11434 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
11435 4 * bit_count (saved_regs_mask));
11436 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
11437
11438 if (IS_INTERRUPT (func_type))
11439 /* Interrupt handlers will have pushed the
11440 IP onto the stack, so restore it now. */
11441 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
11442 }
11443 else
11444 {
11445 /* This branch is executed for ARM mode (non-apcs frames) and
11446 Thumb-2 mode. Frame layout is essentially the same for those
11447 cases, except that in ARM mode frame pointer points to the
11448 first saved register, while in Thumb-2 mode the frame pointer points
11449 to the last saved register.
11450
11451 It is possible to make frame pointer point to last saved
11452 register in both cases, and remove some conditionals below.
11453 That means that fp setup in prologue would be just "mov fp, sp"
11454 and sp restore in epilogue would be just "mov sp, fp", whereas
11455 now we have to use add/sub in those cases. However, the value
11456 of that would be marginal, as both mov and add/sub are 32-bit
11457 in ARM mode, and it would require extra conditionals
11458 in arm_expand_prologue to distingish ARM-apcs-frame case
11459 (where frame pointer is required to point at first register)
11460 and ARM-non-apcs-frame. Therefore, such change is postponed
11461 until real need arise. */
11462 HOST_WIDE_INT amount;
11463 int rfe;
11464 /* Restore stack pointer if necessary. */
11465 if (TARGET_ARM && frame_pointer_needed)
11466 {
11467 operands[0] = stack_pointer_rtx;
11468 operands[1] = hard_frame_pointer_rtx;
11469
11470 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
11471 output_add_immediate (operands);
11472 }
11473 else
11474 {
11475 if (frame_pointer_needed)
11476 {
11477 /* For Thumb-2 restore sp from the frame pointer.
11478 Operand restrictions mean we have to incrememnt FP, then copy
11479 to SP. */
11480 amount = offsets->locals_base - offsets->saved_regs;
11481 operands[0] = hard_frame_pointer_rtx;
11482 }
11483 else
11484 {
11485 unsigned long count;
11486 operands[0] = stack_pointer_rtx;
11487 amount = offsets->outgoing_args - offsets->saved_regs;
11488 /* pop call clobbered registers if it avoids a
11489 separate stack adjustment. */
11490 count = offsets->saved_regs - offsets->saved_args;
11491 if (optimize_size
11492 && count != 0
11493 && !current_function_calls_eh_return
11494 && bit_count(saved_regs_mask) * 4 == count
11495 && !IS_INTERRUPT (func_type)
11496 && !cfun->tail_call_emit)
11497 {
11498 unsigned long mask;
11499 mask = (1 << (arm_size_return_regs() / 4)) - 1;
11500 mask ^= 0xf;
11501 mask &= ~saved_regs_mask;
11502 reg = 0;
11503 while (bit_count (mask) * 4 > amount)
11504 {
11505 while ((mask & (1 << reg)) == 0)
11506 reg++;
11507 mask &= ~(1 << reg);
11508 }
11509 if (bit_count (mask) * 4 == amount) {
11510 amount = 0;
11511 saved_regs_mask |= mask;
11512 }
11513 }
11514 }
11515
11516 if (amount)
11517 {
11518 operands[1] = operands[0];
11519 operands[2] = GEN_INT (amount);
11520 output_add_immediate (operands);
11521 }
11522 if (frame_pointer_needed)
11523 asm_fprintf (f, "\tmov\t%r, %r\n",
11524 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
11525 }
11526
11527 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
11528 {
11529 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
11530 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11531 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
11532 reg, SP_REGNUM);
11533 }
11534 else
11535 {
11536 start_reg = FIRST_FPA_REGNUM;
11537
11538 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
11539 {
11540 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11541 {
11542 if (reg - start_reg == 3)
11543 {
11544 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
11545 start_reg, SP_REGNUM);
11546 start_reg = reg + 1;
11547 }
11548 }
11549 else
11550 {
11551 if (reg != start_reg)
11552 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
11553 start_reg, reg - start_reg,
11554 SP_REGNUM);
11555
11556 start_reg = reg + 1;
11557 }
11558 }
11559
11560 /* Just in case the last register checked also needs unstacking. */
11561 if (reg != start_reg)
11562 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
11563 start_reg, reg - start_reg, SP_REGNUM);
11564 }
11565
11566 if (TARGET_HARD_FLOAT && TARGET_VFP)
11567 {
11568 start_reg = FIRST_VFP_REGNUM;
11569 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
11570 {
11571 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
11572 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
11573 {
11574 if (start_reg != reg)
11575 vfp_output_fldmd (f, SP_REGNUM,
11576 (start_reg - FIRST_VFP_REGNUM) / 2,
11577 (reg - start_reg) / 2);
11578 start_reg = reg + 2;
11579 }
11580 }
11581 if (start_reg != reg)
11582 vfp_output_fldmd (f, SP_REGNUM,
11583 (start_reg - FIRST_VFP_REGNUM) / 2,
11584 (reg - start_reg) / 2);
11585 }
11586 if (TARGET_IWMMXT)
11587 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
11588 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11589 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
11590
11591 /* If we can, restore the LR into the PC. */
11592 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
11593 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
11594 && !IS_STACKALIGN (func_type)
11595 && really_return
11596 && current_function_pretend_args_size == 0
11597 && saved_regs_mask & (1 << LR_REGNUM)
11598 && !current_function_calls_eh_return)
11599 {
11600 saved_regs_mask &= ~ (1 << LR_REGNUM);
11601 saved_regs_mask |= (1 << PC_REGNUM);
11602 rfe = IS_INTERRUPT (func_type);
11603 }
11604 else
11605 rfe = 0;
11606
11607 /* Load the registers off the stack. If we only have one register
11608 to load use the LDR instruction - it is faster. For Thumb-2
11609 always use pop and the assembler will pick the best instruction.*/
11610 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
11611 && !IS_INTERRUPT(func_type))
11612 {
11613 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
11614 }
11615 else if (saved_regs_mask)
11616 {
11617 if (saved_regs_mask & (1 << SP_REGNUM))
11618 /* Note - write back to the stack register is not enabled
11619 (i.e. "ldmfd sp!..."). We know that the stack pointer is
11620 in the list of registers and if we add writeback the
11621 instruction becomes UNPREDICTABLE. */
11622 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
11623 rfe);
11624 else if (TARGET_ARM)
11625 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
11626 rfe);
11627 else
11628 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
11629 }
11630
11631 if (current_function_pretend_args_size)
11632 {
11633 /* Unwind the pre-pushed regs. */
11634 operands[0] = operands[1] = stack_pointer_rtx;
11635 operands[2] = GEN_INT (current_function_pretend_args_size);
11636 output_add_immediate (operands);
11637 }
11638 }
11639
11640 /* We may have already restored PC directly from the stack. */
11641 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
11642 return "";
11643
11644 /* Stack adjustment for exception handler. */
11645 if (current_function_calls_eh_return)
11646 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
11647 ARM_EH_STACKADJ_REGNUM);
11648
11649 /* Generate the return instruction. */
11650 switch ((int) ARM_FUNC_TYPE (func_type))
11651 {
11652 case ARM_FT_ISR:
11653 case ARM_FT_FIQ:
11654 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
11655 break;
11656
11657 case ARM_FT_EXCEPTION:
11658 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
11659 break;
11660
11661 case ARM_FT_INTERWORKED:
11662 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
11663 break;
11664
11665 default:
11666 if (IS_STACKALIGN (func_type))
11667 {
11668 /* See comment in arm_expand_prologue. */
11669 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
11670 }
11671 if (arm_arch5 || arm_arch4t)
11672 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
11673 else
11674 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
11675 break;
11676 }
11677
11678 return "";
11679 }
11680
11681 static void
11682 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11683 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
11684 {
11685 arm_stack_offsets *offsets;
11686
11687 if (TARGET_THUMB1)
11688 {
11689 int regno;
11690
11691 /* Emit any call-via-reg trampolines that are needed for v4t support
11692 of call_reg and call_value_reg type insns. */
11693 for (regno = 0; regno < LR_REGNUM; regno++)
11694 {
11695 rtx label = cfun->machine->call_via[regno];
11696
11697 if (label != NULL)
11698 {
11699 switch_to_section (function_section (current_function_decl));
11700 targetm.asm_out.internal_label (asm_out_file, "L",
11701 CODE_LABEL_NUMBER (label));
11702 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
11703 }
11704 }
11705
11706 /* ??? Probably not safe to set this here, since it assumes that a
11707 function will be emitted as assembly immediately after we generate
11708 RTL for it. This does not happen for inline functions. */
11709 return_used_this_function = 0;
11710 }
11711 else /* TARGET_32BIT */
11712 {
11713 /* We need to take into account any stack-frame rounding. */
11714 offsets = arm_get_frame_offsets ();
11715
11716 gcc_assert (!use_return_insn (FALSE, NULL)
11717 || !return_used_this_function
11718 || offsets->saved_regs == offsets->outgoing_args
11719 || frame_pointer_needed);
11720
11721 /* Reset the ARM-specific per-function variables. */
11722 after_arm_reorg = 0;
11723 }
11724 }
11725
11726 /* Generate and emit an insn that we will recognize as a push_multi.
11727 Unfortunately, since this insn does not reflect very well the actual
11728 semantics of the operation, we need to annotate the insn for the benefit
11729 of DWARF2 frame unwind information. */
11730 static rtx
11731 emit_multi_reg_push (unsigned long mask)
11732 {
11733 int num_regs = 0;
11734 int num_dwarf_regs;
11735 int i, j;
11736 rtx par;
11737 rtx dwarf;
11738 int dwarf_par_index;
11739 rtx tmp, reg;
11740
11741 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11742 if (mask & (1 << i))
11743 num_regs++;
11744
11745 gcc_assert (num_regs && num_regs <= 16);
11746
11747 /* We don't record the PC in the dwarf frame information. */
11748 num_dwarf_regs = num_regs;
11749 if (mask & (1 << PC_REGNUM))
11750 num_dwarf_regs--;
11751
11752 /* For the body of the insn we are going to generate an UNSPEC in
11753 parallel with several USEs. This allows the insn to be recognized
11754 by the push_multi pattern in the arm.md file. The insn looks
11755 something like this:
11756
11757 (parallel [
11758 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
11759 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
11760 (use (reg:SI 11 fp))
11761 (use (reg:SI 12 ip))
11762 (use (reg:SI 14 lr))
11763 (use (reg:SI 15 pc))
11764 ])
11765
11766 For the frame note however, we try to be more explicit and actually
11767 show each register being stored into the stack frame, plus a (single)
11768 decrement of the stack pointer. We do it this way in order to be
11769 friendly to the stack unwinding code, which only wants to see a single
11770 stack decrement per instruction. The RTL we generate for the note looks
11771 something like this:
11772
11773 (sequence [
11774 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
11775 (set (mem:SI (reg:SI sp)) (reg:SI r4))
11776 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
11777 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
11778 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
11779 ])
11780
11781 This sequence is used both by the code to support stack unwinding for
11782 exceptions handlers and the code to generate dwarf2 frame debugging. */
11783
11784 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
11785 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
11786 dwarf_par_index = 1;
11787
11788 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11789 {
11790 if (mask & (1 << i))
11791 {
11792 reg = gen_rtx_REG (SImode, i);
11793
11794 XVECEXP (par, 0, 0)
11795 = gen_rtx_SET (VOIDmode,
11796 gen_frame_mem (BLKmode,
11797 gen_rtx_PRE_DEC (BLKmode,
11798 stack_pointer_rtx)),
11799 gen_rtx_UNSPEC (BLKmode,
11800 gen_rtvec (1, reg),
11801 UNSPEC_PUSH_MULT));
11802
11803 if (i != PC_REGNUM)
11804 {
11805 tmp = gen_rtx_SET (VOIDmode,
11806 gen_frame_mem (SImode, stack_pointer_rtx),
11807 reg);
11808 RTX_FRAME_RELATED_P (tmp) = 1;
11809 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
11810 dwarf_par_index++;
11811 }
11812
11813 break;
11814 }
11815 }
11816
11817 for (j = 1, i++; j < num_regs; i++)
11818 {
11819 if (mask & (1 << i))
11820 {
11821 reg = gen_rtx_REG (SImode, i);
11822
11823 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
11824
11825 if (i != PC_REGNUM)
11826 {
11827 tmp
11828 = gen_rtx_SET (VOIDmode,
11829 gen_frame_mem (SImode,
11830 plus_constant (stack_pointer_rtx,
11831 4 * j)),
11832 reg);
11833 RTX_FRAME_RELATED_P (tmp) = 1;
11834 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
11835 }
11836
11837 j++;
11838 }
11839 }
11840
11841 par = emit_insn (par);
11842
11843 tmp = gen_rtx_SET (VOIDmode,
11844 stack_pointer_rtx,
11845 plus_constant (stack_pointer_rtx, -4 * num_regs));
11846 RTX_FRAME_RELATED_P (tmp) = 1;
11847 XVECEXP (dwarf, 0, 0) = tmp;
11848
11849 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
11850 REG_NOTES (par));
11851 return par;
11852 }
11853
11854 /* Calculate the size of the return value that is passed in registers. */
11855 static unsigned
11856 arm_size_return_regs (void)
11857 {
11858 enum machine_mode mode;
11859
11860 if (current_function_return_rtx != 0)
11861 mode = GET_MODE (current_function_return_rtx);
11862 else
11863 mode = DECL_MODE (DECL_RESULT (current_function_decl));
11864
11865 return GET_MODE_SIZE (mode);
11866 }
11867
11868 static rtx
11869 emit_sfm (int base_reg, int count)
11870 {
11871 rtx par;
11872 rtx dwarf;
11873 rtx tmp, reg;
11874 int i;
11875
11876 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
11877 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
11878
11879 reg = gen_rtx_REG (XFmode, base_reg++);
11880
11881 XVECEXP (par, 0, 0)
11882 = gen_rtx_SET (VOIDmode,
11883 gen_frame_mem (BLKmode,
11884 gen_rtx_PRE_DEC (BLKmode,
11885 stack_pointer_rtx)),
11886 gen_rtx_UNSPEC (BLKmode,
11887 gen_rtvec (1, reg),
11888 UNSPEC_PUSH_MULT));
11889 tmp = gen_rtx_SET (VOIDmode,
11890 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
11891 RTX_FRAME_RELATED_P (tmp) = 1;
11892 XVECEXP (dwarf, 0, 1) = tmp;
11893
11894 for (i = 1; i < count; i++)
11895 {
11896 reg = gen_rtx_REG (XFmode, base_reg++);
11897 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
11898
11899 tmp = gen_rtx_SET (VOIDmode,
11900 gen_frame_mem (XFmode,
11901 plus_constant (stack_pointer_rtx,
11902 i * 12)),
11903 reg);
11904 RTX_FRAME_RELATED_P (tmp) = 1;
11905 XVECEXP (dwarf, 0, i + 1) = tmp;
11906 }
11907
11908 tmp = gen_rtx_SET (VOIDmode,
11909 stack_pointer_rtx,
11910 plus_constant (stack_pointer_rtx, -12 * count));
11911
11912 RTX_FRAME_RELATED_P (tmp) = 1;
11913 XVECEXP (dwarf, 0, 0) = tmp;
11914
11915 par = emit_insn (par);
11916 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
11917 REG_NOTES (par));
11918 return par;
11919 }
11920
11921
11922 /* Return true if the current function needs to save/restore LR. */
11923
11924 static bool
11925 thumb_force_lr_save (void)
11926 {
11927 return !cfun->machine->lr_save_eliminated
11928 && (!leaf_function_p ()
11929 || thumb_far_jump_used_p ()
11930 || df_regs_ever_live_p (LR_REGNUM));
11931 }
11932
11933
11934 /* Compute the distance from register FROM to register TO.
11935 These can be the arg pointer (26), the soft frame pointer (25),
11936 the stack pointer (13) or the hard frame pointer (11).
11937 In thumb mode r7 is used as the soft frame pointer, if needed.
11938 Typical stack layout looks like this:
11939
11940 old stack pointer -> | |
11941 ----
11942 | | \
11943 | | saved arguments for
11944 | | vararg functions
11945 | | /
11946 --
11947 hard FP & arg pointer -> | | \
11948 | | stack
11949 | | frame
11950 | | /
11951 --
11952 | | \
11953 | | call saved
11954 | | registers
11955 soft frame pointer -> | | /
11956 --
11957 | | \
11958 | | local
11959 | | variables
11960 locals base pointer -> | | /
11961 --
11962 | | \
11963 | | outgoing
11964 | | arguments
11965 current stack pointer -> | | /
11966 --
11967
11968 For a given function some or all of these stack components
11969 may not be needed, giving rise to the possibility of
11970 eliminating some of the registers.
11971
11972 The values returned by this function must reflect the behavior
11973 of arm_expand_prologue() and arm_compute_save_reg_mask().
11974
11975 The sign of the number returned reflects the direction of stack
11976 growth, so the values are positive for all eliminations except
11977 from the soft frame pointer to the hard frame pointer.
11978
11979 SFP may point just inside the local variables block to ensure correct
11980 alignment. */
11981
11982
11983 /* Calculate stack offsets. These are used to calculate register elimination
11984 offsets and in prologue/epilogue code. Also calculates which registers
11985 should be saved. */
11986
11987 static arm_stack_offsets *
11988 arm_get_frame_offsets (void)
11989 {
11990 struct arm_stack_offsets *offsets;
11991 unsigned long func_type;
11992 int leaf;
11993 int saved;
11994 int core_saved;
11995 HOST_WIDE_INT frame_size;
11996 int i;
11997
11998 offsets = &cfun->machine->stack_offsets;
11999
12000 /* We need to know if we are a leaf function. Unfortunately, it
12001 is possible to be called after start_sequence has been called,
12002 which causes get_insns to return the insns for the sequence,
12003 not the function, which will cause leaf_function_p to return
12004 the incorrect result.
12005
12006 to know about leaf functions once reload has completed, and the
12007 frame size cannot be changed after that time, so we can safely
12008 use the cached value. */
12009
12010 if (reload_completed)
12011 return offsets;
12012
12013 /* Initially this is the size of the local variables. It will translated
12014 into an offset once we have determined the size of preceding data. */
12015 frame_size = ROUND_UP_WORD (get_frame_size ());
12016
12017 leaf = leaf_function_p ();
12018
12019 /* Space for variadic functions. */
12020 offsets->saved_args = current_function_pretend_args_size;
12021
12022 /* In Thumb mode this is incorrect, but never used. */
12023 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0);
12024
12025 if (TARGET_32BIT)
12026 {
12027 unsigned int regno;
12028
12029 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
12030 core_saved = bit_count (offsets->saved_regs_mask) * 4;
12031 saved = core_saved;
12032
12033 /* We know that SP will be doubleword aligned on entry, and we must
12034 preserve that condition at any subroutine call. We also require the
12035 soft frame pointer to be doubleword aligned. */
12036
12037 if (TARGET_REALLY_IWMMXT)
12038 {
12039 /* Check for the call-saved iWMMXt registers. */
12040 for (regno = FIRST_IWMMXT_REGNUM;
12041 regno <= LAST_IWMMXT_REGNUM;
12042 regno++)
12043 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12044 saved += 8;
12045 }
12046
12047 func_type = arm_current_func_type ();
12048 if (! IS_VOLATILE (func_type))
12049 {
12050 /* Space for saved FPA registers. */
12051 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
12052 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12053 saved += 12;
12054
12055 /* Space for saved VFP registers. */
12056 if (TARGET_HARD_FLOAT && TARGET_VFP)
12057 saved += arm_get_vfp_saved_size ();
12058 }
12059 }
12060 else /* TARGET_THUMB1 */
12061 {
12062 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
12063 core_saved = bit_count (offsets->saved_regs_mask) * 4;
12064 saved = core_saved;
12065 if (TARGET_BACKTRACE)
12066 saved += 16;
12067 }
12068
12069 /* Saved registers include the stack frame. */
12070 offsets->saved_regs = offsets->saved_args + saved;
12071 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
12072 /* A leaf function does not need any stack alignment if it has nothing
12073 on the stack. */
12074 if (leaf && frame_size == 0)
12075 {
12076 offsets->outgoing_args = offsets->soft_frame;
12077 offsets->locals_base = offsets->soft_frame;
12078 return offsets;
12079 }
12080
12081 /* Ensure SFP has the correct alignment. */
12082 if (ARM_DOUBLEWORD_ALIGN
12083 && (offsets->soft_frame & 7))
12084 {
12085 offsets->soft_frame += 4;
12086 /* Try to align stack by pushing an extra reg. Don't bother doing this
12087 when there is a stack frame as the alignment will be rolled into
12088 the normal stack adjustment. */
12089 if (frame_size + current_function_outgoing_args_size == 0)
12090 {
12091 int reg = -1;
12092
12093 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
12094 {
12095 if ((offsets->saved_regs_mask & (1 << i)) == 0)
12096 {
12097 reg = i;
12098 break;
12099 }
12100 }
12101
12102 if (reg == -1 && arm_size_return_regs () <= 12
12103 && !cfun->tail_call_emit)
12104 {
12105 /* Push/pop an argument register (r3) if all callee saved
12106 registers are already being pushed. */
12107 reg = 3;
12108 }
12109
12110 if (reg != -1)
12111 {
12112 offsets->saved_regs += 4;
12113 offsets->saved_regs_mask |= (1 << reg);
12114 }
12115 }
12116 }
12117
12118 offsets->locals_base = offsets->soft_frame + frame_size;
12119 offsets->outgoing_args = (offsets->locals_base
12120 + current_function_outgoing_args_size);
12121
12122 if (ARM_DOUBLEWORD_ALIGN)
12123 {
12124 /* Ensure SP remains doubleword aligned. */
12125 if (offsets->outgoing_args & 7)
12126 offsets->outgoing_args += 4;
12127 gcc_assert (!(offsets->outgoing_args & 7));
12128 }
12129
12130 return offsets;
12131 }
12132
12133
12134 /* Calculate the relative offsets for the different stack pointers. Positive
12135 offsets are in the direction of stack growth. */
12136
12137 HOST_WIDE_INT
12138 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
12139 {
12140 arm_stack_offsets *offsets;
12141
12142 offsets = arm_get_frame_offsets ();
12143
12144 /* OK, now we have enough information to compute the distances.
12145 There must be an entry in these switch tables for each pair
12146 of registers in ELIMINABLE_REGS, even if some of the entries
12147 seem to be redundant or useless. */
12148 switch (from)
12149 {
12150 case ARG_POINTER_REGNUM:
12151 switch (to)
12152 {
12153 case THUMB_HARD_FRAME_POINTER_REGNUM:
12154 return 0;
12155
12156 case FRAME_POINTER_REGNUM:
12157 /* This is the reverse of the soft frame pointer
12158 to hard frame pointer elimination below. */
12159 return offsets->soft_frame - offsets->saved_args;
12160
12161 case ARM_HARD_FRAME_POINTER_REGNUM:
12162 /* If there is no stack frame then the hard
12163 frame pointer and the arg pointer coincide. */
12164 if (offsets->frame == offsets->saved_regs)
12165 return 0;
12166 /* FIXME: Not sure about this. Maybe we should always return 0 ? */
12167 return (frame_pointer_needed
12168 && cfun->static_chain_decl != NULL
12169 && ! cfun->machine->uses_anonymous_args) ? 4 : 0;
12170
12171 case STACK_POINTER_REGNUM:
12172 /* If nothing has been pushed on the stack at all
12173 then this will return -4. This *is* correct! */
12174 return offsets->outgoing_args - (offsets->saved_args + 4);
12175
12176 default:
12177 gcc_unreachable ();
12178 }
12179 gcc_unreachable ();
12180
12181 case FRAME_POINTER_REGNUM:
12182 switch (to)
12183 {
12184 case THUMB_HARD_FRAME_POINTER_REGNUM:
12185 return 0;
12186
12187 case ARM_HARD_FRAME_POINTER_REGNUM:
12188 /* The hard frame pointer points to the top entry in the
12189 stack frame. The soft frame pointer to the bottom entry
12190 in the stack frame. If there is no stack frame at all,
12191 then they are identical. */
12192
12193 return offsets->frame - offsets->soft_frame;
12194
12195 case STACK_POINTER_REGNUM:
12196 return offsets->outgoing_args - offsets->soft_frame;
12197
12198 default:
12199 gcc_unreachable ();
12200 }
12201 gcc_unreachable ();
12202
12203 default:
12204 /* You cannot eliminate from the stack pointer.
12205 In theory you could eliminate from the hard frame
12206 pointer to the stack pointer, but this will never
12207 happen, since if a stack frame is not needed the
12208 hard frame pointer will never be used. */
12209 gcc_unreachable ();
12210 }
12211 }
12212
12213
12214 /* Emit RTL to save coprocessor registers on function entry. Returns the
12215 number of bytes pushed. */
12216
12217 static int
12218 arm_save_coproc_regs(void)
12219 {
12220 int saved_size = 0;
12221 unsigned reg;
12222 unsigned start_reg;
12223 rtx insn;
12224
12225 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
12226 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
12227 {
12228 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
12229 insn = gen_rtx_MEM (V2SImode, insn);
12230 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
12231 RTX_FRAME_RELATED_P (insn) = 1;
12232 saved_size += 8;
12233 }
12234
12235 /* Save any floating point call-saved registers used by this
12236 function. */
12237 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
12238 {
12239 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12240 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12241 {
12242 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
12243 insn = gen_rtx_MEM (XFmode, insn);
12244 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
12245 RTX_FRAME_RELATED_P (insn) = 1;
12246 saved_size += 12;
12247 }
12248 }
12249 else
12250 {
12251 start_reg = LAST_FPA_REGNUM;
12252
12253 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12254 {
12255 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12256 {
12257 if (start_reg - reg == 3)
12258 {
12259 insn = emit_sfm (reg, 4);
12260 RTX_FRAME_RELATED_P (insn) = 1;
12261 saved_size += 48;
12262 start_reg = reg - 1;
12263 }
12264 }
12265 else
12266 {
12267 if (start_reg != reg)
12268 {
12269 insn = emit_sfm (reg + 1, start_reg - reg);
12270 RTX_FRAME_RELATED_P (insn) = 1;
12271 saved_size += (start_reg - reg) * 12;
12272 }
12273 start_reg = reg - 1;
12274 }
12275 }
12276
12277 if (start_reg != reg)
12278 {
12279 insn = emit_sfm (reg + 1, start_reg - reg);
12280 saved_size += (start_reg - reg) * 12;
12281 RTX_FRAME_RELATED_P (insn) = 1;
12282 }
12283 }
12284 if (TARGET_HARD_FLOAT && TARGET_VFP)
12285 {
12286 start_reg = FIRST_VFP_REGNUM;
12287
12288 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
12289 {
12290 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
12291 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
12292 {
12293 if (start_reg != reg)
12294 saved_size += vfp_emit_fstmd (start_reg,
12295 (reg - start_reg) / 2);
12296 start_reg = reg + 2;
12297 }
12298 }
12299 if (start_reg != reg)
12300 saved_size += vfp_emit_fstmd (start_reg,
12301 (reg - start_reg) / 2);
12302 }
12303 return saved_size;
12304 }
12305
12306
12307 /* Set the Thumb frame pointer from the stack pointer. */
12308
12309 static void
12310 thumb_set_frame_pointer (arm_stack_offsets *offsets)
12311 {
12312 HOST_WIDE_INT amount;
12313 rtx insn, dwarf;
12314
12315 amount = offsets->outgoing_args - offsets->locals_base;
12316 if (amount < 1024)
12317 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12318 stack_pointer_rtx, GEN_INT (amount)));
12319 else
12320 {
12321 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
12322 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
12323 expects the first two operands to be the same. */
12324 if (TARGET_THUMB2)
12325 {
12326 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12327 stack_pointer_rtx,
12328 hard_frame_pointer_rtx));
12329 }
12330 else
12331 {
12332 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12333 hard_frame_pointer_rtx,
12334 stack_pointer_rtx));
12335 }
12336 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
12337 plus_constant (stack_pointer_rtx, amount));
12338 RTX_FRAME_RELATED_P (dwarf) = 1;
12339 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
12340 REG_NOTES (insn));
12341 }
12342
12343 RTX_FRAME_RELATED_P (insn) = 1;
12344 }
12345
12346 /* Generate the prologue instructions for entry into an ARM or Thumb-2
12347 function. */
12348 void
12349 arm_expand_prologue (void)
12350 {
12351 rtx amount;
12352 rtx insn;
12353 rtx ip_rtx;
12354 unsigned long live_regs_mask;
12355 unsigned long func_type;
12356 int fp_offset = 0;
12357 int saved_pretend_args = 0;
12358 int saved_regs = 0;
12359 unsigned HOST_WIDE_INT args_to_push;
12360 arm_stack_offsets *offsets;
12361
12362 func_type = arm_current_func_type ();
12363
12364 /* Naked functions don't have prologues. */
12365 if (IS_NAKED (func_type))
12366 return;
12367
12368 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
12369 args_to_push = current_function_pretend_args_size;
12370
12371 /* Compute which register we will have to save onto the stack. */
12372 offsets = arm_get_frame_offsets ();
12373 live_regs_mask = offsets->saved_regs_mask;
12374
12375 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
12376
12377 if (IS_STACKALIGN (func_type))
12378 {
12379 rtx dwarf;
12380 rtx r0;
12381 rtx r1;
12382 /* Handle a word-aligned stack pointer. We generate the following:
12383
12384 mov r0, sp
12385 bic r1, r0, #7
12386 mov sp, r1
12387 <save and restore r0 in normal prologue/epilogue>
12388 mov sp, r0
12389 bx lr
12390
12391 The unwinder doesn't need to know about the stack realignment.
12392 Just tell it we saved SP in r0. */
12393 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
12394
12395 r0 = gen_rtx_REG (SImode, 0);
12396 r1 = gen_rtx_REG (SImode, 1);
12397 dwarf = gen_rtx_UNSPEC (SImode, NULL_RTVEC, UNSPEC_STACK_ALIGN);
12398 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
12399 insn = gen_movsi (r0, stack_pointer_rtx);
12400 RTX_FRAME_RELATED_P (insn) = 1;
12401 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
12402 dwarf, REG_NOTES (insn));
12403 emit_insn (insn);
12404 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
12405 emit_insn (gen_movsi (stack_pointer_rtx, r1));
12406 }
12407
12408 /* For APCS frames, if IP register is clobbered
12409 when creating frame, save that register in a special
12410 way. */
12411 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
12412 {
12413 if (IS_INTERRUPT (func_type))
12414 {
12415 /* Interrupt functions must not corrupt any registers.
12416 Creating a frame pointer however, corrupts the IP
12417 register, so we must push it first. */
12418 insn = emit_multi_reg_push (1 << IP_REGNUM);
12419
12420 /* Do not set RTX_FRAME_RELATED_P on this insn.
12421 The dwarf stack unwinding code only wants to see one
12422 stack decrement per function, and this is not it. If
12423 this instruction is labeled as being part of the frame
12424 creation sequence then dwarf2out_frame_debug_expr will
12425 die when it encounters the assignment of IP to FP
12426 later on, since the use of SP here establishes SP as
12427 the CFA register and not IP.
12428
12429 Anyway this instruction is not really part of the stack
12430 frame creation although it is part of the prologue. */
12431 }
12432 else if (IS_NESTED (func_type))
12433 {
12434 /* The Static chain register is the same as the IP register
12435 used as a scratch register during stack frame creation.
12436 To get around this need to find somewhere to store IP
12437 whilst the frame is being created. We try the following
12438 places in order:
12439
12440 1. The last argument register.
12441 2. A slot on the stack above the frame. (This only
12442 works if the function is not a varargs function).
12443 3. Register r3, after pushing the argument registers
12444 onto the stack.
12445
12446 Note - we only need to tell the dwarf2 backend about the SP
12447 adjustment in the second variant; the static chain register
12448 doesn't need to be unwound, as it doesn't contain a value
12449 inherited from the caller. */
12450
12451 if (df_regs_ever_live_p (3) == false)
12452 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
12453 else if (args_to_push == 0)
12454 {
12455 rtx dwarf;
12456
12457 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
12458 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
12459 fp_offset = 4;
12460
12461 /* Just tell the dwarf backend that we adjusted SP. */
12462 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12463 plus_constant (stack_pointer_rtx,
12464 -fp_offset));
12465 RTX_FRAME_RELATED_P (insn) = 1;
12466 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
12467 dwarf, REG_NOTES (insn));
12468 }
12469 else
12470 {
12471 /* Store the args on the stack. */
12472 if (cfun->machine->uses_anonymous_args)
12473 insn = emit_multi_reg_push
12474 ((0xf0 >> (args_to_push / 4)) & 0xf);
12475 else
12476 insn = emit_insn
12477 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12478 GEN_INT (- args_to_push)));
12479
12480 RTX_FRAME_RELATED_P (insn) = 1;
12481
12482 saved_pretend_args = 1;
12483 fp_offset = args_to_push;
12484 args_to_push = 0;
12485
12486 /* Now reuse r3 to preserve IP. */
12487 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
12488 }
12489 }
12490
12491 insn = emit_set_insn (ip_rtx,
12492 plus_constant (stack_pointer_rtx, fp_offset));
12493 RTX_FRAME_RELATED_P (insn) = 1;
12494 }
12495
12496 if (args_to_push)
12497 {
12498 /* Push the argument registers, or reserve space for them. */
12499 if (cfun->machine->uses_anonymous_args)
12500 insn = emit_multi_reg_push
12501 ((0xf0 >> (args_to_push / 4)) & 0xf);
12502 else
12503 insn = emit_insn
12504 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12505 GEN_INT (- args_to_push)));
12506 RTX_FRAME_RELATED_P (insn) = 1;
12507 }
12508
12509 /* If this is an interrupt service routine, and the link register
12510 is going to be pushed, and we're not generating extra
12511 push of IP (needed when frame is needed and frame layout if apcs),
12512 subtracting four from LR now will mean that the function return
12513 can be done with a single instruction. */
12514 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
12515 && (live_regs_mask & (1 << LR_REGNUM)) != 0
12516 && !(frame_pointer_needed && TARGET_APCS_FRAME)
12517 && TARGET_ARM)
12518 {
12519 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
12520
12521 emit_set_insn (lr, plus_constant (lr, -4));
12522 }
12523
12524 if (live_regs_mask)
12525 {
12526 saved_regs += bit_count (live_regs_mask) * 4;
12527 if (optimize_size && !frame_pointer_needed
12528 && saved_regs == offsets->saved_regs - offsets->saved_args)
12529 {
12530 /* If no coprocessor registers are being pushed and we don't have
12531 to worry about a frame pointer then push extra registers to
12532 create the stack frame. This is done is a way that does not
12533 alter the frame layout, so is independent of the epilogue. */
12534 int n;
12535 int frame;
12536 n = 0;
12537 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
12538 n++;
12539 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
12540 if (frame && n * 4 >= frame)
12541 {
12542 n = frame / 4;
12543 live_regs_mask |= (1 << n) - 1;
12544 saved_regs += frame;
12545 }
12546 }
12547 insn = emit_multi_reg_push (live_regs_mask);
12548 RTX_FRAME_RELATED_P (insn) = 1;
12549 }
12550
12551 if (! IS_VOLATILE (func_type))
12552 saved_regs += arm_save_coproc_regs ();
12553
12554 if (frame_pointer_needed && TARGET_ARM)
12555 {
12556 /* Create the new frame pointer. */
12557 if (TARGET_APCS_FRAME)
12558 {
12559 insn = GEN_INT (-(4 + args_to_push + fp_offset));
12560 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
12561 RTX_FRAME_RELATED_P (insn) = 1;
12562
12563 if (IS_NESTED (func_type))
12564 {
12565 /* Recover the static chain register. */
12566 if (!df_regs_ever_live_p (3)
12567 || saved_pretend_args)
12568 insn = gen_rtx_REG (SImode, 3);
12569 else /* if (current_function_pretend_args_size == 0) */
12570 {
12571 insn = plus_constant (hard_frame_pointer_rtx, 4);
12572 insn = gen_frame_mem (SImode, insn);
12573 }
12574 emit_set_insn (ip_rtx, insn);
12575 /* Add a USE to stop propagate_one_insn() from barfing. */
12576 emit_insn (gen_prologue_use (ip_rtx));
12577 }
12578 }
12579 else
12580 {
12581 insn = GEN_INT (saved_regs - 4);
12582 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12583 stack_pointer_rtx, insn));
12584 RTX_FRAME_RELATED_P (insn) = 1;
12585 }
12586 }
12587
12588 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
12589 {
12590 /* This add can produce multiple insns for a large constant, so we
12591 need to get tricky. */
12592 rtx last = get_last_insn ();
12593
12594 amount = GEN_INT (offsets->saved_args + saved_regs
12595 - offsets->outgoing_args);
12596
12597 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12598 amount));
12599 do
12600 {
12601 last = last ? NEXT_INSN (last) : get_insns ();
12602 RTX_FRAME_RELATED_P (last) = 1;
12603 }
12604 while (last != insn);
12605
12606 /* If the frame pointer is needed, emit a special barrier that
12607 will prevent the scheduler from moving stores to the frame
12608 before the stack adjustment. */
12609 if (frame_pointer_needed)
12610 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
12611 hard_frame_pointer_rtx));
12612 }
12613
12614
12615 if (frame_pointer_needed && TARGET_THUMB2)
12616 thumb_set_frame_pointer (offsets);
12617
12618 if (flag_pic && arm_pic_register != INVALID_REGNUM)
12619 {
12620 unsigned long mask;
12621
12622 mask = live_regs_mask;
12623 mask &= THUMB2_WORK_REGS;
12624 if (!IS_NESTED (func_type))
12625 mask |= (1 << IP_REGNUM);
12626 arm_load_pic_register (mask);
12627 }
12628
12629 /* If we are profiling, make sure no instructions are scheduled before
12630 the call to mcount. Similarly if the user has requested no
12631 scheduling in the prolog. Similarly if we want non-call exceptions
12632 using the EABI unwinder, to prevent faulting instructions from being
12633 swapped with a stack adjustment. */
12634 if (current_function_profile || !TARGET_SCHED_PROLOG
12635 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
12636 emit_insn (gen_blockage ());
12637
12638 /* If the link register is being kept alive, with the return address in it,
12639 then make sure that it does not get reused by the ce2 pass. */
12640 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
12641 cfun->machine->lr_save_eliminated = 1;
12642 }
12643 \f
12644 /* Print condition code to STREAM. Helper function for arm_print_operand. */
12645 static void
12646 arm_print_condition (FILE *stream)
12647 {
12648 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
12649 {
12650 /* Branch conversion is not implemented for Thumb-2. */
12651 if (TARGET_THUMB)
12652 {
12653 output_operand_lossage ("predicated Thumb instruction");
12654 return;
12655 }
12656 if (current_insn_predicate != NULL)
12657 {
12658 output_operand_lossage
12659 ("predicated instruction in conditional sequence");
12660 return;
12661 }
12662
12663 fputs (arm_condition_codes[arm_current_cc], stream);
12664 }
12665 else if (current_insn_predicate)
12666 {
12667 enum arm_cond_code code;
12668
12669 if (TARGET_THUMB1)
12670 {
12671 output_operand_lossage ("predicated Thumb instruction");
12672 return;
12673 }
12674
12675 code = get_arm_condition_code (current_insn_predicate);
12676 fputs (arm_condition_codes[code], stream);
12677 }
12678 }
12679
12680
12681 /* If CODE is 'd', then the X is a condition operand and the instruction
12682 should only be executed if the condition is true.
12683 if CODE is 'D', then the X is a condition operand and the instruction
12684 should only be executed if the condition is false: however, if the mode
12685 of the comparison is CCFPEmode, then always execute the instruction -- we
12686 do this because in these circumstances !GE does not necessarily imply LT;
12687 in these cases the instruction pattern will take care to make sure that
12688 an instruction containing %d will follow, thereby undoing the effects of
12689 doing this instruction unconditionally.
12690 If CODE is 'N' then X is a floating point operand that must be negated
12691 before output.
12692 If CODE is 'B' then output a bitwise inverted value of X (a const int).
12693 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
12694 void
12695 arm_print_operand (FILE *stream, rtx x, int code)
12696 {
12697 switch (code)
12698 {
12699 case '@':
12700 fputs (ASM_COMMENT_START, stream);
12701 return;
12702
12703 case '_':
12704 fputs (user_label_prefix, stream);
12705 return;
12706
12707 case '|':
12708 fputs (REGISTER_PREFIX, stream);
12709 return;
12710
12711 case '?':
12712 arm_print_condition (stream);
12713 return;
12714
12715 case '(':
12716 /* Nothing in unified syntax, otherwise the current condition code. */
12717 if (!TARGET_UNIFIED_ASM)
12718 arm_print_condition (stream);
12719 break;
12720
12721 case ')':
12722 /* The current condition code in unified syntax, otherwise nothing. */
12723 if (TARGET_UNIFIED_ASM)
12724 arm_print_condition (stream);
12725 break;
12726
12727 case '.':
12728 /* The current condition code for a condition code setting instruction.
12729 Preceded by 's' in unified syntax, otherwise followed by 's'. */
12730 if (TARGET_UNIFIED_ASM)
12731 {
12732 fputc('s', stream);
12733 arm_print_condition (stream);
12734 }
12735 else
12736 {
12737 arm_print_condition (stream);
12738 fputc('s', stream);
12739 }
12740 return;
12741
12742 case '!':
12743 /* If the instruction is conditionally executed then print
12744 the current condition code, otherwise print 's'. */
12745 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
12746 if (current_insn_predicate)
12747 arm_print_condition (stream);
12748 else
12749 fputc('s', stream);
12750 break;
12751
12752 /* %# is a "break" sequence. It doesn't output anything, but is used to
12753 separate e.g. operand numbers from following text, if that text consists
12754 of further digits which we don't want to be part of the operand
12755 number. */
12756 case '#':
12757 return;
12758
12759 case 'N':
12760 {
12761 REAL_VALUE_TYPE r;
12762 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12763 r = REAL_VALUE_NEGATE (r);
12764 fprintf (stream, "%s", fp_const_from_val (&r));
12765 }
12766 return;
12767
12768 /* An integer without a preceding # sign. */
12769 case 'c':
12770 gcc_assert (GET_CODE (x) == CONST_INT);
12771 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12772 return;
12773
12774 case 'B':
12775 if (GET_CODE (x) == CONST_INT)
12776 {
12777 HOST_WIDE_INT val;
12778 val = ARM_SIGN_EXTEND (~INTVAL (x));
12779 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
12780 }
12781 else
12782 {
12783 putc ('~', stream);
12784 output_addr_const (stream, x);
12785 }
12786 return;
12787
12788 case 'L':
12789 /* The low 16 bits of an immediate constant. */
12790 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
12791 return;
12792
12793 case 'i':
12794 fprintf (stream, "%s", arithmetic_instr (x, 1));
12795 return;
12796
12797 /* Truncate Cirrus shift counts. */
12798 case 's':
12799 if (GET_CODE (x) == CONST_INT)
12800 {
12801 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
12802 return;
12803 }
12804 arm_print_operand (stream, x, 0);
12805 return;
12806
12807 case 'I':
12808 fprintf (stream, "%s", arithmetic_instr (x, 0));
12809 return;
12810
12811 case 'S':
12812 {
12813 HOST_WIDE_INT val;
12814 const char *shift;
12815
12816 if (!shift_operator (x, SImode))
12817 {
12818 output_operand_lossage ("invalid shift operand");
12819 break;
12820 }
12821
12822 shift = shift_op (x, &val);
12823
12824 if (shift)
12825 {
12826 fprintf (stream, ", %s ", shift);
12827 if (val == -1)
12828 arm_print_operand (stream, XEXP (x, 1), 0);
12829 else
12830 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
12831 }
12832 }
12833 return;
12834
12835 /* An explanation of the 'Q', 'R' and 'H' register operands:
12836
12837 In a pair of registers containing a DI or DF value the 'Q'
12838 operand returns the register number of the register containing
12839 the least significant part of the value. The 'R' operand returns
12840 the register number of the register containing the most
12841 significant part of the value.
12842
12843 The 'H' operand returns the higher of the two register numbers.
12844 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
12845 same as the 'Q' operand, since the most significant part of the
12846 value is held in the lower number register. The reverse is true
12847 on systems where WORDS_BIG_ENDIAN is false.
12848
12849 The purpose of these operands is to distinguish between cases
12850 where the endian-ness of the values is important (for example
12851 when they are added together), and cases where the endian-ness
12852 is irrelevant, but the order of register operations is important.
12853 For example when loading a value from memory into a register
12854 pair, the endian-ness does not matter. Provided that the value
12855 from the lower memory address is put into the lower numbered
12856 register, and the value from the higher address is put into the
12857 higher numbered register, the load will work regardless of whether
12858 the value being loaded is big-wordian or little-wordian. The
12859 order of the two register loads can matter however, if the address
12860 of the memory location is actually held in one of the registers
12861 being overwritten by the load. */
12862 case 'Q':
12863 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12864 {
12865 output_operand_lossage ("invalid operand for code '%c'", code);
12866 return;
12867 }
12868
12869 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
12870 return;
12871
12872 case 'R':
12873 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12874 {
12875 output_operand_lossage ("invalid operand for code '%c'", code);
12876 return;
12877 }
12878
12879 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
12880 return;
12881
12882 case 'H':
12883 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12884 {
12885 output_operand_lossage ("invalid operand for code '%c'", code);
12886 return;
12887 }
12888
12889 asm_fprintf (stream, "%r", REGNO (x) + 1);
12890 return;
12891
12892 case 'J':
12893 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12894 {
12895 output_operand_lossage ("invalid operand for code '%c'", code);
12896 return;
12897 }
12898
12899 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
12900 return;
12901
12902 case 'K':
12903 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12904 {
12905 output_operand_lossage ("invalid operand for code '%c'", code);
12906 return;
12907 }
12908
12909 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
12910 return;
12911
12912 case 'm':
12913 asm_fprintf (stream, "%r",
12914 GET_CODE (XEXP (x, 0)) == REG
12915 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
12916 return;
12917
12918 case 'M':
12919 asm_fprintf (stream, "{%r-%r}",
12920 REGNO (x),
12921 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
12922 return;
12923
12924 /* Like 'M', but writing doubleword vector registers, for use by Neon
12925 insns. */
12926 case 'h':
12927 {
12928 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
12929 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
12930 if (numregs == 1)
12931 asm_fprintf (stream, "{d%d}", regno);
12932 else
12933 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
12934 }
12935 return;
12936
12937 case 'd':
12938 /* CONST_TRUE_RTX means always -- that's the default. */
12939 if (x == const_true_rtx)
12940 return;
12941
12942 if (!COMPARISON_P (x))
12943 {
12944 output_operand_lossage ("invalid operand for code '%c'", code);
12945 return;
12946 }
12947
12948 fputs (arm_condition_codes[get_arm_condition_code (x)],
12949 stream);
12950 return;
12951
12952 case 'D':
12953 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
12954 want to do that. */
12955 if (x == const_true_rtx)
12956 {
12957 output_operand_lossage ("instruction never executed");
12958 return;
12959 }
12960 if (!COMPARISON_P (x))
12961 {
12962 output_operand_lossage ("invalid operand for code '%c'", code);
12963 return;
12964 }
12965
12966 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
12967 (get_arm_condition_code (x))],
12968 stream);
12969 return;
12970
12971 /* Cirrus registers can be accessed in a variety of ways:
12972 single floating point (f)
12973 double floating point (d)
12974 32bit integer (fx)
12975 64bit integer (dx). */
12976 case 'W': /* Cirrus register in F mode. */
12977 case 'X': /* Cirrus register in D mode. */
12978 case 'Y': /* Cirrus register in FX mode. */
12979 case 'Z': /* Cirrus register in DX mode. */
12980 gcc_assert (GET_CODE (x) == REG
12981 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
12982
12983 fprintf (stream, "mv%s%s",
12984 code == 'W' ? "f"
12985 : code == 'X' ? "d"
12986 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
12987
12988 return;
12989
12990 /* Print cirrus register in the mode specified by the register's mode. */
12991 case 'V':
12992 {
12993 int mode = GET_MODE (x);
12994
12995 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
12996 {
12997 output_operand_lossage ("invalid operand for code '%c'", code);
12998 return;
12999 }
13000
13001 fprintf (stream, "mv%s%s",
13002 mode == DFmode ? "d"
13003 : mode == SImode ? "fx"
13004 : mode == DImode ? "dx"
13005 : "f", reg_names[REGNO (x)] + 2);
13006
13007 return;
13008 }
13009
13010 case 'U':
13011 if (GET_CODE (x) != REG
13012 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
13013 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
13014 /* Bad value for wCG register number. */
13015 {
13016 output_operand_lossage ("invalid operand for code '%c'", code);
13017 return;
13018 }
13019
13020 else
13021 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
13022 return;
13023
13024 /* Print an iWMMXt control register name. */
13025 case 'w':
13026 if (GET_CODE (x) != CONST_INT
13027 || INTVAL (x) < 0
13028 || INTVAL (x) >= 16)
13029 /* Bad value for wC register number. */
13030 {
13031 output_operand_lossage ("invalid operand for code '%c'", code);
13032 return;
13033 }
13034
13035 else
13036 {
13037 static const char * wc_reg_names [16] =
13038 {
13039 "wCID", "wCon", "wCSSF", "wCASF",
13040 "wC4", "wC5", "wC6", "wC7",
13041 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
13042 "wC12", "wC13", "wC14", "wC15"
13043 };
13044
13045 fprintf (stream, wc_reg_names [INTVAL (x)]);
13046 }
13047 return;
13048
13049 /* Print a VFP/Neon double precision or quad precision register name. */
13050 case 'P':
13051 case 'q':
13052 {
13053 int mode = GET_MODE (x);
13054 int is_quad = (code == 'q');
13055 int regno;
13056
13057 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
13058 {
13059 output_operand_lossage ("invalid operand for code '%c'", code);
13060 return;
13061 }
13062
13063 if (GET_CODE (x) != REG
13064 || !IS_VFP_REGNUM (REGNO (x)))
13065 {
13066 output_operand_lossage ("invalid operand for code '%c'", code);
13067 return;
13068 }
13069
13070 regno = REGNO (x);
13071 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
13072 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
13073 {
13074 output_operand_lossage ("invalid operand for code '%c'", code);
13075 return;
13076 }
13077
13078 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
13079 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
13080 }
13081 return;
13082
13083 /* These two codes print the low/high doubleword register of a Neon quad
13084 register, respectively. For pair-structure types, can also print
13085 low/high quadword registers. */
13086 case 'e':
13087 case 'f':
13088 {
13089 int mode = GET_MODE (x);
13090 int regno;
13091
13092 if ((GET_MODE_SIZE (mode) != 16
13093 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
13094 {
13095 output_operand_lossage ("invalid operand for code '%c'", code);
13096 return;
13097 }
13098
13099 regno = REGNO (x);
13100 if (!NEON_REGNO_OK_FOR_QUAD (regno))
13101 {
13102 output_operand_lossage ("invalid operand for code '%c'", code);
13103 return;
13104 }
13105
13106 if (GET_MODE_SIZE (mode) == 16)
13107 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
13108 + (code == 'f' ? 1 : 0));
13109 else
13110 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
13111 + (code == 'f' ? 1 : 0));
13112 }
13113 return;
13114
13115 /* Print a VFPv3 floating-point constant, represented as an integer
13116 index. */
13117 case 'G':
13118 {
13119 int index = vfp3_const_double_index (x);
13120 gcc_assert (index != -1);
13121 fprintf (stream, "%d", index);
13122 }
13123 return;
13124
13125 /* Print bits representing opcode features for Neon.
13126
13127 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
13128 and polynomials as unsigned.
13129
13130 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
13131
13132 Bit 2 is 1 for rounding functions, 0 otherwise. */
13133
13134 /* Identify the type as 's', 'u', 'p' or 'f'. */
13135 case 'T':
13136 {
13137 HOST_WIDE_INT bits = INTVAL (x);
13138 fputc ("uspf"[bits & 3], stream);
13139 }
13140 return;
13141
13142 /* Likewise, but signed and unsigned integers are both 'i'. */
13143 case 'F':
13144 {
13145 HOST_WIDE_INT bits = INTVAL (x);
13146 fputc ("iipf"[bits & 3], stream);
13147 }
13148 return;
13149
13150 /* As for 'T', but emit 'u' instead of 'p'. */
13151 case 't':
13152 {
13153 HOST_WIDE_INT bits = INTVAL (x);
13154 fputc ("usuf"[bits & 3], stream);
13155 }
13156 return;
13157
13158 /* Bit 2: rounding (vs none). */
13159 case 'O':
13160 {
13161 HOST_WIDE_INT bits = INTVAL (x);
13162 fputs ((bits & 4) != 0 ? "r" : "", stream);
13163 }
13164 return;
13165
13166 default:
13167 if (x == 0)
13168 {
13169 output_operand_lossage ("missing operand");
13170 return;
13171 }
13172
13173 switch (GET_CODE (x))
13174 {
13175 case REG:
13176 asm_fprintf (stream, "%r", REGNO (x));
13177 break;
13178
13179 case MEM:
13180 output_memory_reference_mode = GET_MODE (x);
13181 output_address (XEXP (x, 0));
13182 break;
13183
13184 case CONST_DOUBLE:
13185 if (TARGET_NEON)
13186 {
13187 char fpstr[20];
13188 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
13189 sizeof (fpstr), 0, 1);
13190 fprintf (stream, "#%s", fpstr);
13191 }
13192 else
13193 fprintf (stream, "#%s", fp_immediate_constant (x));
13194 break;
13195
13196 default:
13197 gcc_assert (GET_CODE (x) != NEG);
13198 fputc ('#', stream);
13199 output_addr_const (stream, x);
13200 break;
13201 }
13202 }
13203 }
13204 \f
13205 /* Target hook for assembling integer objects. The ARM version needs to
13206 handle word-sized values specially. */
13207 static bool
13208 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
13209 {
13210 enum machine_mode mode;
13211
13212 if (size == UNITS_PER_WORD && aligned_p)
13213 {
13214 fputs ("\t.word\t", asm_out_file);
13215 output_addr_const (asm_out_file, x);
13216
13217 /* Mark symbols as position independent. We only do this in the
13218 .text segment, not in the .data segment. */
13219 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
13220 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
13221 {
13222 /* See legitimize_pic_address for an explanation of the
13223 TARGET_VXWORKS_RTP check. */
13224 if (TARGET_VXWORKS_RTP
13225 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
13226 fputs ("(GOT)", asm_out_file);
13227 else
13228 fputs ("(GOTOFF)", asm_out_file);
13229 }
13230 fputc ('\n', asm_out_file);
13231 return true;
13232 }
13233
13234 mode = GET_MODE (x);
13235
13236 if (arm_vector_mode_supported_p (mode))
13237 {
13238 int i, units;
13239 unsigned int invmask = 0, parts_per_word;
13240
13241 gcc_assert (GET_CODE (x) == CONST_VECTOR);
13242
13243 units = CONST_VECTOR_NUNITS (x);
13244 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
13245
13246 /* For big-endian Neon vectors, we must permute the vector to the form
13247 which, when loaded by a VLDR or VLDM instruction, will give a vector
13248 with the elements in the right order. */
13249 if (TARGET_NEON && WORDS_BIG_ENDIAN)
13250 {
13251 parts_per_word = UNITS_PER_WORD / size;
13252 /* FIXME: This might be wrong for 64-bit vector elements, but we don't
13253 support those anywhere yet. */
13254 invmask = (parts_per_word == 0) ? 0 : (1 << (parts_per_word - 1)) - 1;
13255 }
13256
13257 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13258 for (i = 0; i < units; i++)
13259 {
13260 rtx elt = CONST_VECTOR_ELT (x, i ^ invmask);
13261 assemble_integer
13262 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
13263 }
13264 else
13265 for (i = 0; i < units; i++)
13266 {
13267 rtx elt = CONST_VECTOR_ELT (x, i);
13268 REAL_VALUE_TYPE rval;
13269
13270 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
13271
13272 assemble_real
13273 (rval, GET_MODE_INNER (mode),
13274 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
13275 }
13276
13277 return true;
13278 }
13279
13280 return default_assemble_integer (x, size, aligned_p);
13281 }
13282
13283 static void
13284 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
13285 {
13286 section *s;
13287
13288 if (!TARGET_AAPCS_BASED)
13289 {
13290 (is_ctor ?
13291 default_named_section_asm_out_constructor
13292 : default_named_section_asm_out_destructor) (symbol, priority);
13293 return;
13294 }
13295
13296 /* Put these in the .init_array section, using a special relocation. */
13297 if (priority != DEFAULT_INIT_PRIORITY)
13298 {
13299 char buf[18];
13300 sprintf (buf, "%s.%.5u",
13301 is_ctor ? ".init_array" : ".fini_array",
13302 priority);
13303 s = get_section (buf, SECTION_WRITE, NULL_TREE);
13304 }
13305 else if (is_ctor)
13306 s = ctors_section;
13307 else
13308 s = dtors_section;
13309
13310 switch_to_section (s);
13311 assemble_align (POINTER_SIZE);
13312 fputs ("\t.word\t", asm_out_file);
13313 output_addr_const (asm_out_file, symbol);
13314 fputs ("(target1)\n", asm_out_file);
13315 }
13316
13317 /* Add a function to the list of static constructors. */
13318
13319 static void
13320 arm_elf_asm_constructor (rtx symbol, int priority)
13321 {
13322 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
13323 }
13324
13325 /* Add a function to the list of static destructors. */
13326
13327 static void
13328 arm_elf_asm_destructor (rtx symbol, int priority)
13329 {
13330 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
13331 }
13332 \f
13333 /* A finite state machine takes care of noticing whether or not instructions
13334 can be conditionally executed, and thus decrease execution time and code
13335 size by deleting branch instructions. The fsm is controlled by
13336 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
13337
13338 /* The state of the fsm controlling condition codes are:
13339 0: normal, do nothing special
13340 1: make ASM_OUTPUT_OPCODE not output this instruction
13341 2: make ASM_OUTPUT_OPCODE not output this instruction
13342 3: make instructions conditional
13343 4: make instructions conditional
13344
13345 State transitions (state->state by whom under condition):
13346 0 -> 1 final_prescan_insn if the `target' is a label
13347 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
13348 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
13349 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
13350 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
13351 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
13352 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
13353 (the target insn is arm_target_insn).
13354
13355 If the jump clobbers the conditions then we use states 2 and 4.
13356
13357 A similar thing can be done with conditional return insns.
13358
13359 XXX In case the `target' is an unconditional branch, this conditionalising
13360 of the instructions always reduces code size, but not always execution
13361 time. But then, I want to reduce the code size to somewhere near what
13362 /bin/cc produces. */
13363
13364 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
13365 instructions. When a COND_EXEC instruction is seen the subsequent
13366 instructions are scanned so that multiple conditional instructions can be
13367 combined into a single IT block. arm_condexec_count and arm_condexec_mask
13368 specify the length and true/false mask for the IT block. These will be
13369 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
13370
13371 /* Returns the index of the ARM condition code string in
13372 `arm_condition_codes'. COMPARISON should be an rtx like
13373 `(eq (...) (...))'. */
13374 static enum arm_cond_code
13375 get_arm_condition_code (rtx comparison)
13376 {
13377 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
13378 int code;
13379 enum rtx_code comp_code = GET_CODE (comparison);
13380
13381 if (GET_MODE_CLASS (mode) != MODE_CC)
13382 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
13383 XEXP (comparison, 1));
13384
13385 switch (mode)
13386 {
13387 case CC_DNEmode: code = ARM_NE; goto dominance;
13388 case CC_DEQmode: code = ARM_EQ; goto dominance;
13389 case CC_DGEmode: code = ARM_GE; goto dominance;
13390 case CC_DGTmode: code = ARM_GT; goto dominance;
13391 case CC_DLEmode: code = ARM_LE; goto dominance;
13392 case CC_DLTmode: code = ARM_LT; goto dominance;
13393 case CC_DGEUmode: code = ARM_CS; goto dominance;
13394 case CC_DGTUmode: code = ARM_HI; goto dominance;
13395 case CC_DLEUmode: code = ARM_LS; goto dominance;
13396 case CC_DLTUmode: code = ARM_CC;
13397
13398 dominance:
13399 gcc_assert (comp_code == EQ || comp_code == NE);
13400
13401 if (comp_code == EQ)
13402 return ARM_INVERSE_CONDITION_CODE (code);
13403 return code;
13404
13405 case CC_NOOVmode:
13406 switch (comp_code)
13407 {
13408 case NE: return ARM_NE;
13409 case EQ: return ARM_EQ;
13410 case GE: return ARM_PL;
13411 case LT: return ARM_MI;
13412 default: gcc_unreachable ();
13413 }
13414
13415 case CC_Zmode:
13416 switch (comp_code)
13417 {
13418 case NE: return ARM_NE;
13419 case EQ: return ARM_EQ;
13420 default: gcc_unreachable ();
13421 }
13422
13423 case CC_Nmode:
13424 switch (comp_code)
13425 {
13426 case NE: return ARM_MI;
13427 case EQ: return ARM_PL;
13428 default: gcc_unreachable ();
13429 }
13430
13431 case CCFPEmode:
13432 case CCFPmode:
13433 /* These encodings assume that AC=1 in the FPA system control
13434 byte. This allows us to handle all cases except UNEQ and
13435 LTGT. */
13436 switch (comp_code)
13437 {
13438 case GE: return ARM_GE;
13439 case GT: return ARM_GT;
13440 case LE: return ARM_LS;
13441 case LT: return ARM_MI;
13442 case NE: return ARM_NE;
13443 case EQ: return ARM_EQ;
13444 case ORDERED: return ARM_VC;
13445 case UNORDERED: return ARM_VS;
13446 case UNLT: return ARM_LT;
13447 case UNLE: return ARM_LE;
13448 case UNGT: return ARM_HI;
13449 case UNGE: return ARM_PL;
13450 /* UNEQ and LTGT do not have a representation. */
13451 case UNEQ: /* Fall through. */
13452 case LTGT: /* Fall through. */
13453 default: gcc_unreachable ();
13454 }
13455
13456 case CC_SWPmode:
13457 switch (comp_code)
13458 {
13459 case NE: return ARM_NE;
13460 case EQ: return ARM_EQ;
13461 case GE: return ARM_LE;
13462 case GT: return ARM_LT;
13463 case LE: return ARM_GE;
13464 case LT: return ARM_GT;
13465 case GEU: return ARM_LS;
13466 case GTU: return ARM_CC;
13467 case LEU: return ARM_CS;
13468 case LTU: return ARM_HI;
13469 default: gcc_unreachable ();
13470 }
13471
13472 case CC_Cmode:
13473 switch (comp_code)
13474 {
13475 case LTU: return ARM_CS;
13476 case GEU: return ARM_CC;
13477 default: gcc_unreachable ();
13478 }
13479
13480 case CCmode:
13481 switch (comp_code)
13482 {
13483 case NE: return ARM_NE;
13484 case EQ: return ARM_EQ;
13485 case GE: return ARM_GE;
13486 case GT: return ARM_GT;
13487 case LE: return ARM_LE;
13488 case LT: return ARM_LT;
13489 case GEU: return ARM_CS;
13490 case GTU: return ARM_HI;
13491 case LEU: return ARM_LS;
13492 case LTU: return ARM_CC;
13493 default: gcc_unreachable ();
13494 }
13495
13496 default: gcc_unreachable ();
13497 }
13498 }
13499
13500 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
13501 instructions. */
13502 void
13503 thumb2_final_prescan_insn (rtx insn)
13504 {
13505 rtx first_insn = insn;
13506 rtx body = PATTERN (insn);
13507 rtx predicate;
13508 enum arm_cond_code code;
13509 int n;
13510 int mask;
13511
13512 /* Remove the previous insn from the count of insns to be output. */
13513 if (arm_condexec_count)
13514 arm_condexec_count--;
13515
13516 /* Nothing to do if we are already inside a conditional block. */
13517 if (arm_condexec_count)
13518 return;
13519
13520 if (GET_CODE (body) != COND_EXEC)
13521 return;
13522
13523 /* Conditional jumps are implemented directly. */
13524 if (GET_CODE (insn) == JUMP_INSN)
13525 return;
13526
13527 predicate = COND_EXEC_TEST (body);
13528 arm_current_cc = get_arm_condition_code (predicate);
13529
13530 n = get_attr_ce_count (insn);
13531 arm_condexec_count = 1;
13532 arm_condexec_mask = (1 << n) - 1;
13533 arm_condexec_masklen = n;
13534 /* See if subsequent instructions can be combined into the same block. */
13535 for (;;)
13536 {
13537 insn = next_nonnote_insn (insn);
13538
13539 /* Jumping into the middle of an IT block is illegal, so a label or
13540 barrier terminates the block. */
13541 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
13542 break;
13543
13544 body = PATTERN (insn);
13545 /* USE and CLOBBER aren't really insns, so just skip them. */
13546 if (GET_CODE (body) == USE
13547 || GET_CODE (body) == CLOBBER)
13548 continue;
13549
13550 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
13551 if (GET_CODE (body) != COND_EXEC)
13552 break;
13553 /* Allow up to 4 conditionally executed instructions in a block. */
13554 n = get_attr_ce_count (insn);
13555 if (arm_condexec_masklen + n > 4)
13556 break;
13557
13558 predicate = COND_EXEC_TEST (body);
13559 code = get_arm_condition_code (predicate);
13560 mask = (1 << n) - 1;
13561 if (arm_current_cc == code)
13562 arm_condexec_mask |= (mask << arm_condexec_masklen);
13563 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
13564 break;
13565
13566 arm_condexec_count++;
13567 arm_condexec_masklen += n;
13568
13569 /* A jump must be the last instruction in a conditional block. */
13570 if (GET_CODE(insn) == JUMP_INSN)
13571 break;
13572 }
13573 /* Restore recog_data (getting the attributes of other insns can
13574 destroy this array, but final.c assumes that it remains intact
13575 across this call). */
13576 extract_constrain_insn_cached (first_insn);
13577 }
13578
13579 void
13580 arm_final_prescan_insn (rtx insn)
13581 {
13582 /* BODY will hold the body of INSN. */
13583 rtx body = PATTERN (insn);
13584
13585 /* This will be 1 if trying to repeat the trick, and things need to be
13586 reversed if it appears to fail. */
13587 int reverse = 0;
13588
13589 /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
13590 taken are clobbered, even if the rtl suggests otherwise. It also
13591 means that we have to grub around within the jump expression to find
13592 out what the conditions are when the jump isn't taken. */
13593 int jump_clobbers = 0;
13594
13595 /* If we start with a return insn, we only succeed if we find another one. */
13596 int seeking_return = 0;
13597
13598 /* START_INSN will hold the insn from where we start looking. This is the
13599 first insn after the following code_label if REVERSE is true. */
13600 rtx start_insn = insn;
13601
13602 /* If in state 4, check if the target branch is reached, in order to
13603 change back to state 0. */
13604 if (arm_ccfsm_state == 4)
13605 {
13606 if (insn == arm_target_insn)
13607 {
13608 arm_target_insn = NULL;
13609 arm_ccfsm_state = 0;
13610 }
13611 return;
13612 }
13613
13614 /* If in state 3, it is possible to repeat the trick, if this insn is an
13615 unconditional branch to a label, and immediately following this branch
13616 is the previous target label which is only used once, and the label this
13617 branch jumps to is not too far off. */
13618 if (arm_ccfsm_state == 3)
13619 {
13620 if (simplejump_p (insn))
13621 {
13622 start_insn = next_nonnote_insn (start_insn);
13623 if (GET_CODE (start_insn) == BARRIER)
13624 {
13625 /* XXX Isn't this always a barrier? */
13626 start_insn = next_nonnote_insn (start_insn);
13627 }
13628 if (GET_CODE (start_insn) == CODE_LABEL
13629 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
13630 && LABEL_NUSES (start_insn) == 1)
13631 reverse = TRUE;
13632 else
13633 return;
13634 }
13635 else if (GET_CODE (body) == RETURN)
13636 {
13637 start_insn = next_nonnote_insn (start_insn);
13638 if (GET_CODE (start_insn) == BARRIER)
13639 start_insn = next_nonnote_insn (start_insn);
13640 if (GET_CODE (start_insn) == CODE_LABEL
13641 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
13642 && LABEL_NUSES (start_insn) == 1)
13643 {
13644 reverse = TRUE;
13645 seeking_return = 1;
13646 }
13647 else
13648 return;
13649 }
13650 else
13651 return;
13652 }
13653
13654 gcc_assert (!arm_ccfsm_state || reverse);
13655 if (GET_CODE (insn) != JUMP_INSN)
13656 return;
13657
13658 /* This jump might be paralleled with a clobber of the condition codes
13659 the jump should always come first */
13660 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
13661 body = XVECEXP (body, 0, 0);
13662
13663 if (reverse
13664 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
13665 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
13666 {
13667 int insns_skipped;
13668 int fail = FALSE, succeed = FALSE;
13669 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
13670 int then_not_else = TRUE;
13671 rtx this_insn = start_insn, label = 0;
13672
13673 /* If the jump cannot be done with one instruction, we cannot
13674 conditionally execute the instruction in the inverse case. */
13675 if (get_attr_conds (insn) == CONDS_JUMP_CLOB)
13676 {
13677 jump_clobbers = 1;
13678 return;
13679 }
13680
13681 /* Register the insn jumped to. */
13682 if (reverse)
13683 {
13684 if (!seeking_return)
13685 label = XEXP (SET_SRC (body), 0);
13686 }
13687 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
13688 label = XEXP (XEXP (SET_SRC (body), 1), 0);
13689 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
13690 {
13691 label = XEXP (XEXP (SET_SRC (body), 2), 0);
13692 then_not_else = FALSE;
13693 }
13694 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
13695 seeking_return = 1;
13696 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
13697 {
13698 seeking_return = 1;
13699 then_not_else = FALSE;
13700 }
13701 else
13702 gcc_unreachable ();
13703
13704 /* See how many insns this branch skips, and what kind of insns. If all
13705 insns are okay, and the label or unconditional branch to the same
13706 label is not too far away, succeed. */
13707 for (insns_skipped = 0;
13708 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
13709 {
13710 rtx scanbody;
13711
13712 this_insn = next_nonnote_insn (this_insn);
13713 if (!this_insn)
13714 break;
13715
13716 switch (GET_CODE (this_insn))
13717 {
13718 case CODE_LABEL:
13719 /* Succeed if it is the target label, otherwise fail since
13720 control falls in from somewhere else. */
13721 if (this_insn == label)
13722 {
13723 if (jump_clobbers)
13724 {
13725 arm_ccfsm_state = 2;
13726 this_insn = next_nonnote_insn (this_insn);
13727 }
13728 else
13729 arm_ccfsm_state = 1;
13730 succeed = TRUE;
13731 }
13732 else
13733 fail = TRUE;
13734 break;
13735
13736 case BARRIER:
13737 /* Succeed if the following insn is the target label.
13738 Otherwise fail.
13739 If return insns are used then the last insn in a function
13740 will be a barrier. */
13741 this_insn = next_nonnote_insn (this_insn);
13742 if (this_insn && this_insn == label)
13743 {
13744 if (jump_clobbers)
13745 {
13746 arm_ccfsm_state = 2;
13747 this_insn = next_nonnote_insn (this_insn);
13748 }
13749 else
13750 arm_ccfsm_state = 1;
13751 succeed = TRUE;
13752 }
13753 else
13754 fail = TRUE;
13755 break;
13756
13757 case CALL_INSN:
13758 /* The AAPCS says that conditional calls should not be
13759 used since they make interworking inefficient (the
13760 linker can't transform BL<cond> into BLX). That's
13761 only a problem if the machine has BLX. */
13762 if (arm_arch5)
13763 {
13764 fail = TRUE;
13765 break;
13766 }
13767
13768 /* Succeed if the following insn is the target label, or
13769 if the following two insns are a barrier and the
13770 target label. */
13771 this_insn = next_nonnote_insn (this_insn);
13772 if (this_insn && GET_CODE (this_insn) == BARRIER)
13773 this_insn = next_nonnote_insn (this_insn);
13774
13775 if (this_insn && this_insn == label
13776 && insns_skipped < max_insns_skipped)
13777 {
13778 if (jump_clobbers)
13779 {
13780 arm_ccfsm_state = 2;
13781 this_insn = next_nonnote_insn (this_insn);
13782 }
13783 else
13784 arm_ccfsm_state = 1;
13785 succeed = TRUE;
13786 }
13787 else
13788 fail = TRUE;
13789 break;
13790
13791 case JUMP_INSN:
13792 /* If this is an unconditional branch to the same label, succeed.
13793 If it is to another label, do nothing. If it is conditional,
13794 fail. */
13795 /* XXX Probably, the tests for SET and the PC are
13796 unnecessary. */
13797
13798 scanbody = PATTERN (this_insn);
13799 if (GET_CODE (scanbody) == SET
13800 && GET_CODE (SET_DEST (scanbody)) == PC)
13801 {
13802 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
13803 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
13804 {
13805 arm_ccfsm_state = 2;
13806 succeed = TRUE;
13807 }
13808 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
13809 fail = TRUE;
13810 }
13811 /* Fail if a conditional return is undesirable (e.g. on a
13812 StrongARM), but still allow this if optimizing for size. */
13813 else if (GET_CODE (scanbody) == RETURN
13814 && !use_return_insn (TRUE, NULL)
13815 && !optimize_size)
13816 fail = TRUE;
13817 else if (GET_CODE (scanbody) == RETURN
13818 && seeking_return)
13819 {
13820 arm_ccfsm_state = 2;
13821 succeed = TRUE;
13822 }
13823 else if (GET_CODE (scanbody) == PARALLEL)
13824 {
13825 switch (get_attr_conds (this_insn))
13826 {
13827 case CONDS_NOCOND:
13828 break;
13829 default:
13830 fail = TRUE;
13831 break;
13832 }
13833 }
13834 else
13835 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
13836
13837 break;
13838
13839 case INSN:
13840 /* Instructions using or affecting the condition codes make it
13841 fail. */
13842 scanbody = PATTERN (this_insn);
13843 if (!(GET_CODE (scanbody) == SET
13844 || GET_CODE (scanbody) == PARALLEL)
13845 || get_attr_conds (this_insn) != CONDS_NOCOND)
13846 fail = TRUE;
13847
13848 /* A conditional cirrus instruction must be followed by
13849 a non Cirrus instruction. However, since we
13850 conditionalize instructions in this function and by
13851 the time we get here we can't add instructions
13852 (nops), because shorten_branches() has already been
13853 called, we will disable conditionalizing Cirrus
13854 instructions to be safe. */
13855 if (GET_CODE (scanbody) != USE
13856 && GET_CODE (scanbody) != CLOBBER
13857 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
13858 fail = TRUE;
13859 break;
13860
13861 default:
13862 break;
13863 }
13864 }
13865 if (succeed)
13866 {
13867 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
13868 arm_target_label = CODE_LABEL_NUMBER (label);
13869 else
13870 {
13871 gcc_assert (seeking_return || arm_ccfsm_state == 2);
13872
13873 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
13874 {
13875 this_insn = next_nonnote_insn (this_insn);
13876 gcc_assert (!this_insn
13877 || (GET_CODE (this_insn) != BARRIER
13878 && GET_CODE (this_insn) != CODE_LABEL));
13879 }
13880 if (!this_insn)
13881 {
13882 /* Oh, dear! we ran off the end.. give up. */
13883 extract_constrain_insn_cached (insn);
13884 arm_ccfsm_state = 0;
13885 arm_target_insn = NULL;
13886 return;
13887 }
13888 arm_target_insn = this_insn;
13889 }
13890 if (jump_clobbers)
13891 {
13892 gcc_assert (!reverse);
13893 arm_current_cc =
13894 get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),
13895 0), 0), 1));
13896 if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)
13897 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13898 if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)
13899 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13900 }
13901 else
13902 {
13903 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
13904 what it was. */
13905 if (!reverse)
13906 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),
13907 0));
13908 }
13909
13910 if (reverse || then_not_else)
13911 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13912 }
13913
13914 /* Restore recog_data (getting the attributes of other insns can
13915 destroy this array, but final.c assumes that it remains intact
13916 across this call. */
13917 extract_constrain_insn_cached (insn);
13918 }
13919 }
13920
13921 /* Output IT instructions. */
13922 void
13923 thumb2_asm_output_opcode (FILE * stream)
13924 {
13925 char buff[5];
13926 int n;
13927
13928 if (arm_condexec_mask)
13929 {
13930 for (n = 0; n < arm_condexec_masklen; n++)
13931 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
13932 buff[n] = 0;
13933 asm_fprintf(stream, "i%s\t%s\n\t", buff,
13934 arm_condition_codes[arm_current_cc]);
13935 arm_condexec_mask = 0;
13936 }
13937 }
13938
13939 /* Returns true if REGNO is a valid register
13940 for holding a quantity of type MODE. */
13941 int
13942 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
13943 {
13944 if (GET_MODE_CLASS (mode) == MODE_CC)
13945 return (regno == CC_REGNUM
13946 || (TARGET_HARD_FLOAT && TARGET_VFP
13947 && regno == VFPCC_REGNUM));
13948
13949 if (TARGET_THUMB1)
13950 /* For the Thumb we only allow values bigger than SImode in
13951 registers 0 - 6, so that there is always a second low
13952 register available to hold the upper part of the value.
13953 We probably we ought to ensure that the register is the
13954 start of an even numbered register pair. */
13955 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
13956
13957 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
13958 && IS_CIRRUS_REGNUM (regno))
13959 /* We have outlawed SI values in Cirrus registers because they
13960 reside in the lower 32 bits, but SF values reside in the
13961 upper 32 bits. This causes gcc all sorts of grief. We can't
13962 even split the registers into pairs because Cirrus SI values
13963 get sign extended to 64bits-- aldyh. */
13964 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
13965
13966 if (TARGET_HARD_FLOAT && TARGET_VFP
13967 && IS_VFP_REGNUM (regno))
13968 {
13969 if (mode == SFmode || mode == SImode)
13970 return VFP_REGNO_OK_FOR_SINGLE (regno);
13971
13972 if (mode == DFmode)
13973 return VFP_REGNO_OK_FOR_DOUBLE (regno);
13974
13975 if (TARGET_NEON)
13976 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
13977 || (VALID_NEON_QREG_MODE (mode)
13978 && NEON_REGNO_OK_FOR_QUAD (regno))
13979 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
13980 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
13981 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
13982 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
13983 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
13984
13985 return FALSE;
13986 }
13987
13988 if (TARGET_REALLY_IWMMXT)
13989 {
13990 if (IS_IWMMXT_GR_REGNUM (regno))
13991 return mode == SImode;
13992
13993 if (IS_IWMMXT_REGNUM (regno))
13994 return VALID_IWMMXT_REG_MODE (mode);
13995 }
13996
13997 /* We allow any value to be stored in the general registers.
13998 Restrict doubleword quantities to even register pairs so that we can
13999 use ldrd. Do not allow Neon structure opaque modes in general registers;
14000 they would use too many. */
14001 if (regno <= LAST_ARM_REGNUM)
14002 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
14003 && !VALID_NEON_STRUCT_MODE (mode);
14004
14005 if (regno == FRAME_POINTER_REGNUM
14006 || regno == ARG_POINTER_REGNUM)
14007 /* We only allow integers in the fake hard registers. */
14008 return GET_MODE_CLASS (mode) == MODE_INT;
14009
14010 /* The only registers left are the FPA registers
14011 which we only allow to hold FP values. */
14012 return (TARGET_HARD_FLOAT && TARGET_FPA
14013 && GET_MODE_CLASS (mode) == MODE_FLOAT
14014 && regno >= FIRST_FPA_REGNUM
14015 && regno <= LAST_FPA_REGNUM);
14016 }
14017
14018 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
14019 not used in arm mode. */
14020 int
14021 arm_regno_class (int regno)
14022 {
14023 if (TARGET_THUMB1)
14024 {
14025 if (regno == STACK_POINTER_REGNUM)
14026 return STACK_REG;
14027 if (regno == CC_REGNUM)
14028 return CC_REG;
14029 if (regno < 8)
14030 return LO_REGS;
14031 return HI_REGS;
14032 }
14033
14034 if (TARGET_THUMB2 && regno < 8)
14035 return LO_REGS;
14036
14037 if ( regno <= LAST_ARM_REGNUM
14038 || regno == FRAME_POINTER_REGNUM
14039 || regno == ARG_POINTER_REGNUM)
14040 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
14041
14042 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
14043 return TARGET_THUMB2 ? CC_REG : NO_REGS;
14044
14045 if (IS_CIRRUS_REGNUM (regno))
14046 return CIRRUS_REGS;
14047
14048 if (IS_VFP_REGNUM (regno))
14049 {
14050 if (regno <= D7_VFP_REGNUM)
14051 return VFP_D0_D7_REGS;
14052 else if (regno <= LAST_LO_VFP_REGNUM)
14053 return VFP_LO_REGS;
14054 else
14055 return VFP_HI_REGS;
14056 }
14057
14058 if (IS_IWMMXT_REGNUM (regno))
14059 return IWMMXT_REGS;
14060
14061 if (IS_IWMMXT_GR_REGNUM (regno))
14062 return IWMMXT_GR_REGS;
14063
14064 return FPA_REGS;
14065 }
14066
14067 /* Handle a special case when computing the offset
14068 of an argument from the frame pointer. */
14069 int
14070 arm_debugger_arg_offset (int value, rtx addr)
14071 {
14072 rtx insn;
14073
14074 /* We are only interested if dbxout_parms() failed to compute the offset. */
14075 if (value != 0)
14076 return 0;
14077
14078 /* We can only cope with the case where the address is held in a register. */
14079 if (GET_CODE (addr) != REG)
14080 return 0;
14081
14082 /* If we are using the frame pointer to point at the argument, then
14083 an offset of 0 is correct. */
14084 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
14085 return 0;
14086
14087 /* If we are using the stack pointer to point at the
14088 argument, then an offset of 0 is correct. */
14089 /* ??? Check this is consistent with thumb2 frame layout. */
14090 if ((TARGET_THUMB || !frame_pointer_needed)
14091 && REGNO (addr) == SP_REGNUM)
14092 return 0;
14093
14094 /* Oh dear. The argument is pointed to by a register rather
14095 than being held in a register, or being stored at a known
14096 offset from the frame pointer. Since GDB only understands
14097 those two kinds of argument we must translate the address
14098 held in the register into an offset from the frame pointer.
14099 We do this by searching through the insns for the function
14100 looking to see where this register gets its value. If the
14101 register is initialized from the frame pointer plus an offset
14102 then we are in luck and we can continue, otherwise we give up.
14103
14104 This code is exercised by producing debugging information
14105 for a function with arguments like this:
14106
14107 double func (double a, double b, int c, double d) {return d;}
14108
14109 Without this code the stab for parameter 'd' will be set to
14110 an offset of 0 from the frame pointer, rather than 8. */
14111
14112 /* The if() statement says:
14113
14114 If the insn is a normal instruction
14115 and if the insn is setting the value in a register
14116 and if the register being set is the register holding the address of the argument
14117 and if the address is computing by an addition
14118 that involves adding to a register
14119 which is the frame pointer
14120 a constant integer
14121
14122 then... */
14123
14124 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14125 {
14126 if ( GET_CODE (insn) == INSN
14127 && GET_CODE (PATTERN (insn)) == SET
14128 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
14129 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
14130 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
14131 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
14132 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
14133 )
14134 {
14135 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
14136
14137 break;
14138 }
14139 }
14140
14141 if (value == 0)
14142 {
14143 debug_rtx (addr);
14144 warning (0, "unable to compute real location of stacked parameter");
14145 value = 8; /* XXX magic hack */
14146 }
14147
14148 return value;
14149 }
14150 \f
14151 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
14152 do \
14153 { \
14154 if ((MASK) & insn_flags) \
14155 add_builtin_function ((NAME), (TYPE), (CODE), \
14156 BUILT_IN_MD, NULL, NULL_TREE); \
14157 } \
14158 while (0)
14159
14160 struct builtin_description
14161 {
14162 const unsigned int mask;
14163 const enum insn_code icode;
14164 const char * const name;
14165 const enum arm_builtins code;
14166 const enum rtx_code comparison;
14167 const unsigned int flag;
14168 };
14169
14170 static const struct builtin_description bdesc_2arg[] =
14171 {
14172 #define IWMMXT_BUILTIN(code, string, builtin) \
14173 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
14174 ARM_BUILTIN_##builtin, 0, 0 },
14175
14176 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
14177 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
14178 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
14179 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
14180 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
14181 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
14182 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
14183 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
14184 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
14185 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
14186 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
14187 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
14188 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
14189 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
14190 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
14191 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
14192 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
14193 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
14194 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
14195 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
14196 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
14197 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
14198 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
14199 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
14200 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
14201 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
14202 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
14203 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
14204 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
14205 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
14206 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
14207 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
14208 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
14209 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
14210 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
14211 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
14212 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
14213 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
14214 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
14215 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
14216 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
14217 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
14218 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
14219 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
14220 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
14221 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
14222 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
14223 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
14224 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
14225 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
14226 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
14227 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
14228 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
14229 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
14230 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
14231 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
14232 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
14233 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
14234
14235 #define IWMMXT_BUILTIN2(code, builtin) \
14236 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 },
14237
14238 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
14239 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
14240 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
14241 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
14242 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
14243 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
14244 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
14245 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
14246 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
14247 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
14248 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
14249 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
14250 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
14251 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
14252 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
14253 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
14254 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
14255 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
14256 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
14257 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
14258 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
14259 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
14260 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
14261 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
14262 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
14263 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
14264 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
14265 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
14266 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
14267 IWMMXT_BUILTIN2 (rordi3, WRORDI)
14268 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
14269 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
14270 };
14271
14272 static const struct builtin_description bdesc_1arg[] =
14273 {
14274 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
14275 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
14276 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
14277 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
14278 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
14279 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
14280 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
14281 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
14282 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
14283 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
14284 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
14285 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
14286 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
14287 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
14288 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
14289 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
14290 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
14291 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
14292 };
14293
14294 /* Set up all the iWMMXt builtins. This is
14295 not called if TARGET_IWMMXT is zero. */
14296
14297 static void
14298 arm_init_iwmmxt_builtins (void)
14299 {
14300 const struct builtin_description * d;
14301 size_t i;
14302 tree endlink = void_list_node;
14303
14304 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14305 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14306 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14307
14308 tree int_ftype_int
14309 = build_function_type (integer_type_node,
14310 tree_cons (NULL_TREE, integer_type_node, endlink));
14311 tree v8qi_ftype_v8qi_v8qi_int
14312 = build_function_type (V8QI_type_node,
14313 tree_cons (NULL_TREE, V8QI_type_node,
14314 tree_cons (NULL_TREE, V8QI_type_node,
14315 tree_cons (NULL_TREE,
14316 integer_type_node,
14317 endlink))));
14318 tree v4hi_ftype_v4hi_int
14319 = build_function_type (V4HI_type_node,
14320 tree_cons (NULL_TREE, V4HI_type_node,
14321 tree_cons (NULL_TREE, integer_type_node,
14322 endlink)));
14323 tree v2si_ftype_v2si_int
14324 = build_function_type (V2SI_type_node,
14325 tree_cons (NULL_TREE, V2SI_type_node,
14326 tree_cons (NULL_TREE, integer_type_node,
14327 endlink)));
14328 tree v2si_ftype_di_di
14329 = build_function_type (V2SI_type_node,
14330 tree_cons (NULL_TREE, long_long_integer_type_node,
14331 tree_cons (NULL_TREE, long_long_integer_type_node,
14332 endlink)));
14333 tree di_ftype_di_int
14334 = build_function_type (long_long_integer_type_node,
14335 tree_cons (NULL_TREE, long_long_integer_type_node,
14336 tree_cons (NULL_TREE, integer_type_node,
14337 endlink)));
14338 tree di_ftype_di_int_int
14339 = build_function_type (long_long_integer_type_node,
14340 tree_cons (NULL_TREE, long_long_integer_type_node,
14341 tree_cons (NULL_TREE, integer_type_node,
14342 tree_cons (NULL_TREE,
14343 integer_type_node,
14344 endlink))));
14345 tree int_ftype_v8qi
14346 = build_function_type (integer_type_node,
14347 tree_cons (NULL_TREE, V8QI_type_node,
14348 endlink));
14349 tree int_ftype_v4hi
14350 = build_function_type (integer_type_node,
14351 tree_cons (NULL_TREE, V4HI_type_node,
14352 endlink));
14353 tree int_ftype_v2si
14354 = build_function_type (integer_type_node,
14355 tree_cons (NULL_TREE, V2SI_type_node,
14356 endlink));
14357 tree int_ftype_v8qi_int
14358 = build_function_type (integer_type_node,
14359 tree_cons (NULL_TREE, V8QI_type_node,
14360 tree_cons (NULL_TREE, integer_type_node,
14361 endlink)));
14362 tree int_ftype_v4hi_int
14363 = build_function_type (integer_type_node,
14364 tree_cons (NULL_TREE, V4HI_type_node,
14365 tree_cons (NULL_TREE, integer_type_node,
14366 endlink)));
14367 tree int_ftype_v2si_int
14368 = build_function_type (integer_type_node,
14369 tree_cons (NULL_TREE, V2SI_type_node,
14370 tree_cons (NULL_TREE, integer_type_node,
14371 endlink)));
14372 tree v8qi_ftype_v8qi_int_int
14373 = build_function_type (V8QI_type_node,
14374 tree_cons (NULL_TREE, V8QI_type_node,
14375 tree_cons (NULL_TREE, integer_type_node,
14376 tree_cons (NULL_TREE,
14377 integer_type_node,
14378 endlink))));
14379 tree v4hi_ftype_v4hi_int_int
14380 = build_function_type (V4HI_type_node,
14381 tree_cons (NULL_TREE, V4HI_type_node,
14382 tree_cons (NULL_TREE, integer_type_node,
14383 tree_cons (NULL_TREE,
14384 integer_type_node,
14385 endlink))));
14386 tree v2si_ftype_v2si_int_int
14387 = build_function_type (V2SI_type_node,
14388 tree_cons (NULL_TREE, V2SI_type_node,
14389 tree_cons (NULL_TREE, integer_type_node,
14390 tree_cons (NULL_TREE,
14391 integer_type_node,
14392 endlink))));
14393 /* Miscellaneous. */
14394 tree v8qi_ftype_v4hi_v4hi
14395 = build_function_type (V8QI_type_node,
14396 tree_cons (NULL_TREE, V4HI_type_node,
14397 tree_cons (NULL_TREE, V4HI_type_node,
14398 endlink)));
14399 tree v4hi_ftype_v2si_v2si
14400 = build_function_type (V4HI_type_node,
14401 tree_cons (NULL_TREE, V2SI_type_node,
14402 tree_cons (NULL_TREE, V2SI_type_node,
14403 endlink)));
14404 tree v2si_ftype_v4hi_v4hi
14405 = build_function_type (V2SI_type_node,
14406 tree_cons (NULL_TREE, V4HI_type_node,
14407 tree_cons (NULL_TREE, V4HI_type_node,
14408 endlink)));
14409 tree v2si_ftype_v8qi_v8qi
14410 = build_function_type (V2SI_type_node,
14411 tree_cons (NULL_TREE, V8QI_type_node,
14412 tree_cons (NULL_TREE, V8QI_type_node,
14413 endlink)));
14414 tree v4hi_ftype_v4hi_di
14415 = build_function_type (V4HI_type_node,
14416 tree_cons (NULL_TREE, V4HI_type_node,
14417 tree_cons (NULL_TREE,
14418 long_long_integer_type_node,
14419 endlink)));
14420 tree v2si_ftype_v2si_di
14421 = build_function_type (V2SI_type_node,
14422 tree_cons (NULL_TREE, V2SI_type_node,
14423 tree_cons (NULL_TREE,
14424 long_long_integer_type_node,
14425 endlink)));
14426 tree void_ftype_int_int
14427 = build_function_type (void_type_node,
14428 tree_cons (NULL_TREE, integer_type_node,
14429 tree_cons (NULL_TREE, integer_type_node,
14430 endlink)));
14431 tree di_ftype_void
14432 = build_function_type (long_long_unsigned_type_node, endlink);
14433 tree di_ftype_v8qi
14434 = build_function_type (long_long_integer_type_node,
14435 tree_cons (NULL_TREE, V8QI_type_node,
14436 endlink));
14437 tree di_ftype_v4hi
14438 = build_function_type (long_long_integer_type_node,
14439 tree_cons (NULL_TREE, V4HI_type_node,
14440 endlink));
14441 tree di_ftype_v2si
14442 = build_function_type (long_long_integer_type_node,
14443 tree_cons (NULL_TREE, V2SI_type_node,
14444 endlink));
14445 tree v2si_ftype_v4hi
14446 = build_function_type (V2SI_type_node,
14447 tree_cons (NULL_TREE, V4HI_type_node,
14448 endlink));
14449 tree v4hi_ftype_v8qi
14450 = build_function_type (V4HI_type_node,
14451 tree_cons (NULL_TREE, V8QI_type_node,
14452 endlink));
14453
14454 tree di_ftype_di_v4hi_v4hi
14455 = build_function_type (long_long_unsigned_type_node,
14456 tree_cons (NULL_TREE,
14457 long_long_unsigned_type_node,
14458 tree_cons (NULL_TREE, V4HI_type_node,
14459 tree_cons (NULL_TREE,
14460 V4HI_type_node,
14461 endlink))));
14462
14463 tree di_ftype_v4hi_v4hi
14464 = build_function_type (long_long_unsigned_type_node,
14465 tree_cons (NULL_TREE, V4HI_type_node,
14466 tree_cons (NULL_TREE, V4HI_type_node,
14467 endlink)));
14468
14469 /* Normal vector binops. */
14470 tree v8qi_ftype_v8qi_v8qi
14471 = build_function_type (V8QI_type_node,
14472 tree_cons (NULL_TREE, V8QI_type_node,
14473 tree_cons (NULL_TREE, V8QI_type_node,
14474 endlink)));
14475 tree v4hi_ftype_v4hi_v4hi
14476 = build_function_type (V4HI_type_node,
14477 tree_cons (NULL_TREE, V4HI_type_node,
14478 tree_cons (NULL_TREE, V4HI_type_node,
14479 endlink)));
14480 tree v2si_ftype_v2si_v2si
14481 = build_function_type (V2SI_type_node,
14482 tree_cons (NULL_TREE, V2SI_type_node,
14483 tree_cons (NULL_TREE, V2SI_type_node,
14484 endlink)));
14485 tree di_ftype_di_di
14486 = build_function_type (long_long_unsigned_type_node,
14487 tree_cons (NULL_TREE, long_long_unsigned_type_node,
14488 tree_cons (NULL_TREE,
14489 long_long_unsigned_type_node,
14490 endlink)));
14491
14492 /* Add all builtins that are more or less simple operations on two
14493 operands. */
14494 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14495 {
14496 /* Use one of the operands; the target can have a different mode for
14497 mask-generating compares. */
14498 enum machine_mode mode;
14499 tree type;
14500
14501 if (d->name == 0)
14502 continue;
14503
14504 mode = insn_data[d->icode].operand[1].mode;
14505
14506 switch (mode)
14507 {
14508 case V8QImode:
14509 type = v8qi_ftype_v8qi_v8qi;
14510 break;
14511 case V4HImode:
14512 type = v4hi_ftype_v4hi_v4hi;
14513 break;
14514 case V2SImode:
14515 type = v2si_ftype_v2si_v2si;
14516 break;
14517 case DImode:
14518 type = di_ftype_di_di;
14519 break;
14520
14521 default:
14522 gcc_unreachable ();
14523 }
14524
14525 def_mbuiltin (d->mask, d->name, type, d->code);
14526 }
14527
14528 /* Add the remaining MMX insns with somewhat more complicated types. */
14529 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
14530 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
14531 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
14532
14533 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
14534 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
14535 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
14536 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
14537 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
14538 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
14539
14540 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
14541 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
14542 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
14543 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
14544 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
14545 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
14546
14547 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
14548 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
14549 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
14550 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
14551 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
14552 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
14553
14554 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
14555 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
14556 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
14557 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
14558 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
14559 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
14560
14561 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
14562
14563 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
14564 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
14565 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
14566 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
14567
14568 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
14569 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
14570 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
14571 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
14572 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
14573 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
14574 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
14575 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
14576 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
14577
14578 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
14579 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
14580 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
14581
14582 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
14583 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
14584 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
14585
14586 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
14587 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
14588 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
14589 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
14590 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
14591 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
14592
14593 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
14594 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
14595 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
14596 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
14597 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
14598 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
14599 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
14600 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
14601 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
14602 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
14603 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
14604 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
14605
14606 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
14607 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
14608 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
14609 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
14610
14611 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
14612 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
14613 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
14614 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
14615 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
14616 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
14617 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
14618 }
14619
14620 static void
14621 arm_init_tls_builtins (void)
14622 {
14623 tree ftype, decl;
14624
14625 ftype = build_function_type (ptr_type_node, void_list_node);
14626 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
14627 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
14628 NULL, NULL_TREE);
14629 TREE_NOTHROW (decl) = 1;
14630 TREE_READONLY (decl) = 1;
14631 }
14632
14633 typedef enum {
14634 T_V8QI = 0x0001,
14635 T_V4HI = 0x0002,
14636 T_V2SI = 0x0004,
14637 T_V2SF = 0x0008,
14638 T_DI = 0x0010,
14639 T_V16QI = 0x0020,
14640 T_V8HI = 0x0040,
14641 T_V4SI = 0x0080,
14642 T_V4SF = 0x0100,
14643 T_V2DI = 0x0200,
14644 T_TI = 0x0400,
14645 T_EI = 0x0800,
14646 T_OI = 0x1000
14647 } neon_builtin_type_bits;
14648
14649 #define v8qi_UP T_V8QI
14650 #define v4hi_UP T_V4HI
14651 #define v2si_UP T_V2SI
14652 #define v2sf_UP T_V2SF
14653 #define di_UP T_DI
14654 #define v16qi_UP T_V16QI
14655 #define v8hi_UP T_V8HI
14656 #define v4si_UP T_V4SI
14657 #define v4sf_UP T_V4SF
14658 #define v2di_UP T_V2DI
14659 #define ti_UP T_TI
14660 #define ei_UP T_EI
14661 #define oi_UP T_OI
14662
14663 #define UP(X) X##_UP
14664
14665 #define T_MAX 13
14666
14667 typedef enum {
14668 NEON_BINOP,
14669 NEON_TERNOP,
14670 NEON_UNOP,
14671 NEON_GETLANE,
14672 NEON_SETLANE,
14673 NEON_CREATE,
14674 NEON_DUP,
14675 NEON_DUPLANE,
14676 NEON_COMBINE,
14677 NEON_SPLIT,
14678 NEON_LANEMUL,
14679 NEON_LANEMULL,
14680 NEON_LANEMULH,
14681 NEON_LANEMAC,
14682 NEON_SCALARMUL,
14683 NEON_SCALARMULL,
14684 NEON_SCALARMULH,
14685 NEON_SCALARMAC,
14686 NEON_CONVERT,
14687 NEON_FIXCONV,
14688 NEON_SELECT,
14689 NEON_RESULTPAIR,
14690 NEON_REINTERP,
14691 NEON_VTBL,
14692 NEON_VTBX,
14693 NEON_LOAD1,
14694 NEON_LOAD1LANE,
14695 NEON_STORE1,
14696 NEON_STORE1LANE,
14697 NEON_LOADSTRUCT,
14698 NEON_LOADSTRUCTLANE,
14699 NEON_STORESTRUCT,
14700 NEON_STORESTRUCTLANE,
14701 NEON_LOGICBINOP,
14702 NEON_SHIFTINSERT,
14703 NEON_SHIFTIMM,
14704 NEON_SHIFTACC
14705 } neon_itype;
14706
14707 typedef struct {
14708 const char *name;
14709 const neon_itype itype;
14710 const neon_builtin_type_bits bits;
14711 const enum insn_code codes[T_MAX];
14712 const unsigned int num_vars;
14713 unsigned int base_fcode;
14714 } neon_builtin_datum;
14715
14716 #define CF(N,X) CODE_FOR_neon_##N##X
14717
14718 #define VAR1(T, N, A) \
14719 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
14720 #define VAR2(T, N, A, B) \
14721 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
14722 #define VAR3(T, N, A, B, C) \
14723 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
14724 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
14725 #define VAR4(T, N, A, B, C, D) \
14726 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
14727 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
14728 #define VAR5(T, N, A, B, C, D, E) \
14729 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
14730 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
14731 #define VAR6(T, N, A, B, C, D, E, F) \
14732 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
14733 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
14734 #define VAR7(T, N, A, B, C, D, E, F, G) \
14735 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
14736 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14737 CF (N, G) }, 7, 0
14738 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
14739 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14740 | UP (H), \
14741 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14742 CF (N, G), CF (N, H) }, 8, 0
14743 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
14744 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14745 | UP (H) | UP (I), \
14746 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14747 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
14748 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
14749 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14750 | UP (H) | UP (I) | UP (J), \
14751 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14752 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
14753
14754 /* The mode entries in the following table correspond to the "key" type of the
14755 instruction variant, i.e. equivalent to that which would be specified after
14756 the assembler mnemonic, which usually refers to the last vector operand.
14757 (Signed/unsigned/polynomial types are not differentiated between though, and
14758 are all mapped onto the same mode for a given element size.) The modes
14759 listed per instruction should be the same as those defined for that
14760 instruction's pattern in neon.md.
14761 WARNING: Variants should be listed in the same increasing order as
14762 neon_builtin_type_bits. */
14763
14764 static neon_builtin_datum neon_builtin_data[] =
14765 {
14766 { VAR10 (BINOP, vadd,
14767 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14768 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
14769 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
14770 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14771 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14772 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
14773 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14774 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14775 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
14776 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14777 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
14778 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
14779 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
14780 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
14781 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
14782 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
14783 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
14784 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
14785 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
14786 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
14787 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
14788 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
14789 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14790 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14791 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14792 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
14793 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
14794 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
14795 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14796 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14797 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14798 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
14799 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14800 { VAR10 (BINOP, vsub,
14801 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14802 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
14803 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
14804 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14805 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14806 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
14807 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14808 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14809 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14810 { VAR2 (BINOP, vcage, v2sf, v4sf) },
14811 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
14812 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14813 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14814 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
14815 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14816 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
14817 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14818 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14819 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
14820 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14821 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14822 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
14823 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
14824 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
14825 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
14826 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14827 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14828 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14829 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14830 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14831 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14832 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14833 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14834 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
14835 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
14836 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
14837 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14838 /* FIXME: vget_lane supports more variants than this! */
14839 { VAR10 (GETLANE, vget_lane,
14840 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14841 { VAR10 (SETLANE, vset_lane,
14842 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14843 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
14844 { VAR10 (DUP, vdup_n,
14845 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14846 { VAR10 (DUPLANE, vdup_lane,
14847 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14848 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
14849 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
14850 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
14851 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
14852 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
14853 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
14854 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
14855 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14856 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14857 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
14858 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
14859 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14860 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
14861 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
14862 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14863 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14864 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
14865 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
14866 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14867 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
14868 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
14869 { VAR10 (BINOP, vext,
14870 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14871 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14872 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
14873 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
14874 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
14875 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
14876 { VAR10 (SELECT, vbsl,
14877 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14878 { VAR1 (VTBL, vtbl1, v8qi) },
14879 { VAR1 (VTBL, vtbl2, v8qi) },
14880 { VAR1 (VTBL, vtbl3, v8qi) },
14881 { VAR1 (VTBL, vtbl4, v8qi) },
14882 { VAR1 (VTBX, vtbx1, v8qi) },
14883 { VAR1 (VTBX, vtbx2, v8qi) },
14884 { VAR1 (VTBX, vtbx3, v8qi) },
14885 { VAR1 (VTBX, vtbx4, v8qi) },
14886 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14887 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14888 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14889 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
14890 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
14891 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
14892 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
14893 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
14894 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
14895 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
14896 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
14897 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
14898 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
14899 { VAR10 (LOAD1, vld1,
14900 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14901 { VAR10 (LOAD1LANE, vld1_lane,
14902 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14903 { VAR10 (LOAD1, vld1_dup,
14904 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14905 { VAR10 (STORE1, vst1,
14906 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14907 { VAR10 (STORE1LANE, vst1_lane,
14908 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14909 { VAR9 (LOADSTRUCT,
14910 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14911 { VAR7 (LOADSTRUCTLANE, vld2_lane,
14912 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14913 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
14914 { VAR9 (STORESTRUCT, vst2,
14915 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14916 { VAR7 (STORESTRUCTLANE, vst2_lane,
14917 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14918 { VAR9 (LOADSTRUCT,
14919 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14920 { VAR7 (LOADSTRUCTLANE, vld3_lane,
14921 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14922 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
14923 { VAR9 (STORESTRUCT, vst3,
14924 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14925 { VAR7 (STORESTRUCTLANE, vst3_lane,
14926 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14927 { VAR9 (LOADSTRUCT, vld4,
14928 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14929 { VAR7 (LOADSTRUCTLANE, vld4_lane,
14930 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14931 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
14932 { VAR9 (STORESTRUCT, vst4,
14933 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14934 { VAR7 (STORESTRUCTLANE, vst4_lane,
14935 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14936 { VAR10 (LOGICBINOP, vand,
14937 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14938 { VAR10 (LOGICBINOP, vorr,
14939 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14940 { VAR10 (BINOP, veor,
14941 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14942 { VAR10 (LOGICBINOP, vbic,
14943 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14944 { VAR10 (LOGICBINOP, vorn,
14945 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
14946 };
14947
14948 #undef CF
14949 #undef VAR1
14950 #undef VAR2
14951 #undef VAR3
14952 #undef VAR4
14953 #undef VAR5
14954 #undef VAR6
14955 #undef VAR7
14956 #undef VAR8
14957 #undef VAR9
14958 #undef VAR10
14959
14960 static void
14961 arm_init_neon_builtins (void)
14962 {
14963 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
14964
14965 /* Create distinguished type nodes for NEON vector element types,
14966 and pointers to values of such types, so we can detect them later. */
14967 tree neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
14968 tree neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
14969 tree neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
14970 tree neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
14971 tree neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
14972 tree neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
14973 tree neon_float_type_node = make_node (REAL_TYPE);
14974
14975 tree intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
14976 tree intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
14977 tree intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
14978 tree intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
14979 tree float_pointer_node = build_pointer_type (neon_float_type_node);
14980
14981 /* Next create constant-qualified versions of the above types. */
14982 tree const_intQI_node = build_qualified_type (neon_intQI_type_node,
14983 TYPE_QUAL_CONST);
14984 tree const_intHI_node = build_qualified_type (neon_intHI_type_node,
14985 TYPE_QUAL_CONST);
14986 tree const_intSI_node = build_qualified_type (neon_intSI_type_node,
14987 TYPE_QUAL_CONST);
14988 tree const_intDI_node = build_qualified_type (neon_intDI_type_node,
14989 TYPE_QUAL_CONST);
14990 tree const_float_node = build_qualified_type (neon_float_type_node,
14991 TYPE_QUAL_CONST);
14992
14993 tree const_intQI_pointer_node = build_pointer_type (const_intQI_node);
14994 tree const_intHI_pointer_node = build_pointer_type (const_intHI_node);
14995 tree const_intSI_pointer_node = build_pointer_type (const_intSI_node);
14996 tree const_intDI_pointer_node = build_pointer_type (const_intDI_node);
14997 tree const_float_pointer_node = build_pointer_type (const_float_node);
14998
14999 /* Now create vector types based on our NEON element types. */
15000 /* 64-bit vectors. */
15001 tree V8QI_type_node =
15002 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
15003 tree V4HI_type_node =
15004 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
15005 tree V2SI_type_node =
15006 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
15007 tree V2SF_type_node =
15008 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
15009 /* 128-bit vectors. */
15010 tree V16QI_type_node =
15011 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
15012 tree V8HI_type_node =
15013 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
15014 tree V4SI_type_node =
15015 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
15016 tree V4SF_type_node =
15017 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
15018 tree V2DI_type_node =
15019 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
15020
15021 /* Unsigned integer types for various mode sizes. */
15022 tree intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
15023 tree intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
15024 tree intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
15025 tree intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
15026
15027 /* Opaque integer types for structures of vectors. */
15028 tree intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
15029 tree intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
15030 tree intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
15031 tree intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
15032
15033 /* Pointers to vector types. */
15034 tree V8QI_pointer_node = build_pointer_type (V8QI_type_node);
15035 tree V4HI_pointer_node = build_pointer_type (V4HI_type_node);
15036 tree V2SI_pointer_node = build_pointer_type (V2SI_type_node);
15037 tree V2SF_pointer_node = build_pointer_type (V2SF_type_node);
15038 tree V16QI_pointer_node = build_pointer_type (V16QI_type_node);
15039 tree V8HI_pointer_node = build_pointer_type (V8HI_type_node);
15040 tree V4SI_pointer_node = build_pointer_type (V4SI_type_node);
15041 tree V4SF_pointer_node = build_pointer_type (V4SF_type_node);
15042 tree V2DI_pointer_node = build_pointer_type (V2DI_type_node);
15043
15044 /* Operations which return results as pairs. */
15045 tree void_ftype_pv8qi_v8qi_v8qi =
15046 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
15047 V8QI_type_node, NULL);
15048 tree void_ftype_pv4hi_v4hi_v4hi =
15049 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
15050 V4HI_type_node, NULL);
15051 tree void_ftype_pv2si_v2si_v2si =
15052 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
15053 V2SI_type_node, NULL);
15054 tree void_ftype_pv2sf_v2sf_v2sf =
15055 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
15056 V2SF_type_node, NULL);
15057 tree void_ftype_pdi_di_di =
15058 build_function_type_list (void_type_node, intDI_pointer_node,
15059 neon_intDI_type_node, neon_intDI_type_node, NULL);
15060 tree void_ftype_pv16qi_v16qi_v16qi =
15061 build_function_type_list (void_type_node, V16QI_pointer_node,
15062 V16QI_type_node, V16QI_type_node, NULL);
15063 tree void_ftype_pv8hi_v8hi_v8hi =
15064 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
15065 V8HI_type_node, NULL);
15066 tree void_ftype_pv4si_v4si_v4si =
15067 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
15068 V4SI_type_node, NULL);
15069 tree void_ftype_pv4sf_v4sf_v4sf =
15070 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
15071 V4SF_type_node, NULL);
15072 tree void_ftype_pv2di_v2di_v2di =
15073 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
15074 V2DI_type_node, NULL);
15075
15076 tree reinterp_ftype_dreg[5][5];
15077 tree reinterp_ftype_qreg[5][5];
15078 tree dreg_types[5], qreg_types[5];
15079
15080 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
15081 layout_type (neon_float_type_node);
15082
15083 /* Define typedefs which exactly correspond to the modes we are basing vector
15084 types on. If you change these names you'll need to change
15085 the table used by arm_mangle_type too. */
15086 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
15087 "__builtin_neon_qi");
15088 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
15089 "__builtin_neon_hi");
15090 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
15091 "__builtin_neon_si");
15092 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
15093 "__builtin_neon_sf");
15094 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
15095 "__builtin_neon_di");
15096
15097 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
15098 "__builtin_neon_poly8");
15099 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
15100 "__builtin_neon_poly16");
15101 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
15102 "__builtin_neon_uqi");
15103 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
15104 "__builtin_neon_uhi");
15105 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
15106 "__builtin_neon_usi");
15107 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
15108 "__builtin_neon_udi");
15109
15110 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
15111 "__builtin_neon_ti");
15112 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
15113 "__builtin_neon_ei");
15114 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
15115 "__builtin_neon_oi");
15116 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
15117 "__builtin_neon_ci");
15118 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
15119 "__builtin_neon_xi");
15120
15121 dreg_types[0] = V8QI_type_node;
15122 dreg_types[1] = V4HI_type_node;
15123 dreg_types[2] = V2SI_type_node;
15124 dreg_types[3] = V2SF_type_node;
15125 dreg_types[4] = neon_intDI_type_node;
15126
15127 qreg_types[0] = V16QI_type_node;
15128 qreg_types[1] = V8HI_type_node;
15129 qreg_types[2] = V4SI_type_node;
15130 qreg_types[3] = V4SF_type_node;
15131 qreg_types[4] = V2DI_type_node;
15132
15133 for (i = 0; i < 5; i++)
15134 {
15135 int j;
15136 for (j = 0; j < 5; j++)
15137 {
15138 reinterp_ftype_dreg[i][j]
15139 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
15140 reinterp_ftype_qreg[i][j]
15141 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
15142 }
15143 }
15144
15145 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
15146 {
15147 neon_builtin_datum *d = &neon_builtin_data[i];
15148 unsigned int j, codeidx = 0;
15149
15150 d->base_fcode = fcode;
15151
15152 for (j = 0; j < T_MAX; j++)
15153 {
15154 const char* const modenames[] = {
15155 "v8qi", "v4hi", "v2si", "v2sf", "di",
15156 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
15157 };
15158 char namebuf[60];
15159 tree ftype = NULL;
15160 enum insn_code icode;
15161 int is_load = 0, is_store = 0;
15162
15163 if ((d->bits & (1 << j)) == 0)
15164 continue;
15165
15166 icode = d->codes[codeidx++];
15167
15168 switch (d->itype)
15169 {
15170 case NEON_LOAD1:
15171 case NEON_LOAD1LANE:
15172 case NEON_LOADSTRUCT:
15173 case NEON_LOADSTRUCTLANE:
15174 is_load = 1;
15175 /* Fall through. */
15176 case NEON_STORE1:
15177 case NEON_STORE1LANE:
15178 case NEON_STORESTRUCT:
15179 case NEON_STORESTRUCTLANE:
15180 if (!is_load)
15181 is_store = 1;
15182 /* Fall through. */
15183 case NEON_UNOP:
15184 case NEON_BINOP:
15185 case NEON_LOGICBINOP:
15186 case NEON_SHIFTINSERT:
15187 case NEON_TERNOP:
15188 case NEON_GETLANE:
15189 case NEON_SETLANE:
15190 case NEON_CREATE:
15191 case NEON_DUP:
15192 case NEON_DUPLANE:
15193 case NEON_SHIFTIMM:
15194 case NEON_SHIFTACC:
15195 case NEON_COMBINE:
15196 case NEON_SPLIT:
15197 case NEON_CONVERT:
15198 case NEON_FIXCONV:
15199 case NEON_LANEMUL:
15200 case NEON_LANEMULL:
15201 case NEON_LANEMULH:
15202 case NEON_LANEMAC:
15203 case NEON_SCALARMUL:
15204 case NEON_SCALARMULL:
15205 case NEON_SCALARMULH:
15206 case NEON_SCALARMAC:
15207 case NEON_SELECT:
15208 case NEON_VTBL:
15209 case NEON_VTBX:
15210 {
15211 int k;
15212 tree return_type = void_type_node, args = void_list_node;
15213
15214 /* Build a function type directly from the insn_data for this
15215 builtin. The build_function_type() function takes care of
15216 removing duplicates for us. */
15217 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
15218 {
15219 tree eltype;
15220
15221 if (is_load && k == 1)
15222 {
15223 /* Neon load patterns always have the memory operand
15224 (a SImode pointer) in the operand 1 position. We
15225 want a const pointer to the element type in that
15226 position. */
15227 gcc_assert (insn_data[icode].operand[k].mode == SImode);
15228
15229 switch (1 << j)
15230 {
15231 case T_V8QI:
15232 case T_V16QI:
15233 eltype = const_intQI_pointer_node;
15234 break;
15235
15236 case T_V4HI:
15237 case T_V8HI:
15238 eltype = const_intHI_pointer_node;
15239 break;
15240
15241 case T_V2SI:
15242 case T_V4SI:
15243 eltype = const_intSI_pointer_node;
15244 break;
15245
15246 case T_V2SF:
15247 case T_V4SF:
15248 eltype = const_float_pointer_node;
15249 break;
15250
15251 case T_DI:
15252 case T_V2DI:
15253 eltype = const_intDI_pointer_node;
15254 break;
15255
15256 default: gcc_unreachable ();
15257 }
15258 }
15259 else if (is_store && k == 0)
15260 {
15261 /* Similarly, Neon store patterns use operand 0 as
15262 the memory location to store to (a SImode pointer).
15263 Use a pointer to the element type of the store in
15264 that position. */
15265 gcc_assert (insn_data[icode].operand[k].mode == SImode);
15266
15267 switch (1 << j)
15268 {
15269 case T_V8QI:
15270 case T_V16QI:
15271 eltype = intQI_pointer_node;
15272 break;
15273
15274 case T_V4HI:
15275 case T_V8HI:
15276 eltype = intHI_pointer_node;
15277 break;
15278
15279 case T_V2SI:
15280 case T_V4SI:
15281 eltype = intSI_pointer_node;
15282 break;
15283
15284 case T_V2SF:
15285 case T_V4SF:
15286 eltype = float_pointer_node;
15287 break;
15288
15289 case T_DI:
15290 case T_V2DI:
15291 eltype = intDI_pointer_node;
15292 break;
15293
15294 default: gcc_unreachable ();
15295 }
15296 }
15297 else
15298 {
15299 switch (insn_data[icode].operand[k].mode)
15300 {
15301 case VOIDmode: eltype = void_type_node; break;
15302 /* Scalars. */
15303 case QImode: eltype = neon_intQI_type_node; break;
15304 case HImode: eltype = neon_intHI_type_node; break;
15305 case SImode: eltype = neon_intSI_type_node; break;
15306 case SFmode: eltype = neon_float_type_node; break;
15307 case DImode: eltype = neon_intDI_type_node; break;
15308 case TImode: eltype = intTI_type_node; break;
15309 case EImode: eltype = intEI_type_node; break;
15310 case OImode: eltype = intOI_type_node; break;
15311 case CImode: eltype = intCI_type_node; break;
15312 case XImode: eltype = intXI_type_node; break;
15313 /* 64-bit vectors. */
15314 case V8QImode: eltype = V8QI_type_node; break;
15315 case V4HImode: eltype = V4HI_type_node; break;
15316 case V2SImode: eltype = V2SI_type_node; break;
15317 case V2SFmode: eltype = V2SF_type_node; break;
15318 /* 128-bit vectors. */
15319 case V16QImode: eltype = V16QI_type_node; break;
15320 case V8HImode: eltype = V8HI_type_node; break;
15321 case V4SImode: eltype = V4SI_type_node; break;
15322 case V4SFmode: eltype = V4SF_type_node; break;
15323 case V2DImode: eltype = V2DI_type_node; break;
15324 default: gcc_unreachable ();
15325 }
15326 }
15327
15328 if (k == 0 && !is_store)
15329 return_type = eltype;
15330 else
15331 args = tree_cons (NULL_TREE, eltype, args);
15332 }
15333
15334 ftype = build_function_type (return_type, args);
15335 }
15336 break;
15337
15338 case NEON_RESULTPAIR:
15339 {
15340 switch (insn_data[icode].operand[1].mode)
15341 {
15342 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
15343 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
15344 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
15345 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
15346 case DImode: ftype = void_ftype_pdi_di_di; break;
15347 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
15348 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
15349 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
15350 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
15351 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
15352 default: gcc_unreachable ();
15353 }
15354 }
15355 break;
15356
15357 case NEON_REINTERP:
15358 {
15359 /* We iterate over 5 doubleword types, then 5 quadword
15360 types. */
15361 int rhs = j % 5;
15362 switch (insn_data[icode].operand[0].mode)
15363 {
15364 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
15365 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
15366 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
15367 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
15368 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
15369 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
15370 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
15371 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
15372 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
15373 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
15374 default: gcc_unreachable ();
15375 }
15376 }
15377 break;
15378
15379 default:
15380 gcc_unreachable ();
15381 }
15382
15383 gcc_assert (ftype != NULL);
15384
15385 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
15386
15387 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
15388 NULL_TREE);
15389 }
15390 }
15391 }
15392
15393 static void
15394 arm_init_builtins (void)
15395 {
15396 arm_init_tls_builtins ();
15397
15398 if (TARGET_REALLY_IWMMXT)
15399 arm_init_iwmmxt_builtins ();
15400
15401 if (TARGET_NEON)
15402 arm_init_neon_builtins ();
15403 }
15404
15405 /* Errors in the source file can cause expand_expr to return const0_rtx
15406 where we expect a vector. To avoid crashing, use one of the vector
15407 clear instructions. */
15408
15409 static rtx
15410 safe_vector_operand (rtx x, enum machine_mode mode)
15411 {
15412 if (x != const0_rtx)
15413 return x;
15414 x = gen_reg_rtx (mode);
15415
15416 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
15417 : gen_rtx_SUBREG (DImode, x, 0)));
15418 return x;
15419 }
15420
15421 /* Subroutine of arm_expand_builtin to take care of binop insns. */
15422
15423 static rtx
15424 arm_expand_binop_builtin (enum insn_code icode,
15425 tree exp, rtx target)
15426 {
15427 rtx pat;
15428 tree arg0 = CALL_EXPR_ARG (exp, 0);
15429 tree arg1 = CALL_EXPR_ARG (exp, 1);
15430 rtx op0 = expand_normal (arg0);
15431 rtx op1 = expand_normal (arg1);
15432 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15433 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15434 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15435
15436 if (VECTOR_MODE_P (mode0))
15437 op0 = safe_vector_operand (op0, mode0);
15438 if (VECTOR_MODE_P (mode1))
15439 op1 = safe_vector_operand (op1, mode1);
15440
15441 if (! target
15442 || GET_MODE (target) != tmode
15443 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15444 target = gen_reg_rtx (tmode);
15445
15446 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
15447
15448 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15449 op0 = copy_to_mode_reg (mode0, op0);
15450 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15451 op1 = copy_to_mode_reg (mode1, op1);
15452
15453 pat = GEN_FCN (icode) (target, op0, op1);
15454 if (! pat)
15455 return 0;
15456 emit_insn (pat);
15457 return target;
15458 }
15459
15460 /* Subroutine of arm_expand_builtin to take care of unop insns. */
15461
15462 static rtx
15463 arm_expand_unop_builtin (enum insn_code icode,
15464 tree exp, rtx target, int do_load)
15465 {
15466 rtx pat;
15467 tree arg0 = CALL_EXPR_ARG (exp, 0);
15468 rtx op0 = expand_normal (arg0);
15469 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15470 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15471
15472 if (! target
15473 || GET_MODE (target) != tmode
15474 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15475 target = gen_reg_rtx (tmode);
15476 if (do_load)
15477 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15478 else
15479 {
15480 if (VECTOR_MODE_P (mode0))
15481 op0 = safe_vector_operand (op0, mode0);
15482
15483 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15484 op0 = copy_to_mode_reg (mode0, op0);
15485 }
15486
15487 pat = GEN_FCN (icode) (target, op0);
15488 if (! pat)
15489 return 0;
15490 emit_insn (pat);
15491 return target;
15492 }
15493
15494 static int
15495 neon_builtin_compare (const void *a, const void *b)
15496 {
15497 const neon_builtin_datum *key = a;
15498 const neon_builtin_datum *memb = b;
15499 unsigned int soughtcode = key->base_fcode;
15500
15501 if (soughtcode >= memb->base_fcode
15502 && soughtcode < memb->base_fcode + memb->num_vars)
15503 return 0;
15504 else if (soughtcode < memb->base_fcode)
15505 return -1;
15506 else
15507 return 1;
15508 }
15509
15510 static enum insn_code
15511 locate_neon_builtin_icode (int fcode, neon_itype *itype)
15512 {
15513 neon_builtin_datum key, *found;
15514 int idx;
15515
15516 key.base_fcode = fcode;
15517 found = bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
15518 sizeof (neon_builtin_data[0]), neon_builtin_compare);
15519 gcc_assert (found);
15520 idx = fcode - (int) found->base_fcode;
15521 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
15522
15523 if (itype)
15524 *itype = found->itype;
15525
15526 return found->codes[idx];
15527 }
15528
15529 typedef enum {
15530 NEON_ARG_COPY_TO_REG,
15531 NEON_ARG_CONSTANT,
15532 NEON_ARG_STOP
15533 } builtin_arg;
15534
15535 #define NEON_MAX_BUILTIN_ARGS 5
15536
15537 /* Expand a Neon builtin. */
15538 static rtx
15539 arm_expand_neon_args (rtx target, int icode, int have_retval,
15540 tree exp, ...)
15541 {
15542 va_list ap;
15543 rtx pat;
15544 tree arg[NEON_MAX_BUILTIN_ARGS];
15545 rtx op[NEON_MAX_BUILTIN_ARGS];
15546 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15547 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
15548 int argc = 0;
15549
15550 if (have_retval
15551 && (!target
15552 || GET_MODE (target) != tmode
15553 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
15554 target = gen_reg_rtx (tmode);
15555
15556 va_start (ap, exp);
15557
15558 for (;;)
15559 {
15560 builtin_arg thisarg = va_arg (ap, int);
15561
15562 if (thisarg == NEON_ARG_STOP)
15563 break;
15564 else
15565 {
15566 arg[argc] = CALL_EXPR_ARG (exp, argc);
15567 op[argc] = expand_normal (arg[argc]);
15568 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
15569
15570 switch (thisarg)
15571 {
15572 case NEON_ARG_COPY_TO_REG:
15573 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
15574 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
15575 (op[argc], mode[argc]))
15576 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
15577 break;
15578
15579 case NEON_ARG_CONSTANT:
15580 /* FIXME: This error message is somewhat unhelpful. */
15581 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
15582 (op[argc], mode[argc]))
15583 error ("argument must be a constant");
15584 break;
15585
15586 case NEON_ARG_STOP:
15587 gcc_unreachable ();
15588 }
15589
15590 argc++;
15591 }
15592 }
15593
15594 va_end (ap);
15595
15596 if (have_retval)
15597 switch (argc)
15598 {
15599 case 1:
15600 pat = GEN_FCN (icode) (target, op[0]);
15601 break;
15602
15603 case 2:
15604 pat = GEN_FCN (icode) (target, op[0], op[1]);
15605 break;
15606
15607 case 3:
15608 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
15609 break;
15610
15611 case 4:
15612 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
15613 break;
15614
15615 case 5:
15616 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
15617 break;
15618
15619 default:
15620 gcc_unreachable ();
15621 }
15622 else
15623 switch (argc)
15624 {
15625 case 1:
15626 pat = GEN_FCN (icode) (op[0]);
15627 break;
15628
15629 case 2:
15630 pat = GEN_FCN (icode) (op[0], op[1]);
15631 break;
15632
15633 case 3:
15634 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
15635 break;
15636
15637 case 4:
15638 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
15639 break;
15640
15641 case 5:
15642 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
15643 break;
15644
15645 default:
15646 gcc_unreachable ();
15647 }
15648
15649 if (!pat)
15650 return 0;
15651
15652 emit_insn (pat);
15653
15654 return target;
15655 }
15656
15657 /* Expand a Neon builtin. These are "special" because they don't have symbolic
15658 constants defined per-instruction or per instruction-variant. Instead, the
15659 required info is looked up in the table neon_builtin_data. */
15660 static rtx
15661 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
15662 {
15663 neon_itype itype;
15664 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
15665
15666 switch (itype)
15667 {
15668 case NEON_UNOP:
15669 case NEON_CONVERT:
15670 case NEON_DUPLANE:
15671 return arm_expand_neon_args (target, icode, 1, exp,
15672 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
15673
15674 case NEON_BINOP:
15675 case NEON_SETLANE:
15676 case NEON_SCALARMUL:
15677 case NEON_SCALARMULL:
15678 case NEON_SCALARMULH:
15679 case NEON_SHIFTINSERT:
15680 case NEON_LOGICBINOP:
15681 return arm_expand_neon_args (target, icode, 1, exp,
15682 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15683 NEON_ARG_STOP);
15684
15685 case NEON_TERNOP:
15686 return arm_expand_neon_args (target, icode, 1, exp,
15687 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15688 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15689
15690 case NEON_GETLANE:
15691 case NEON_FIXCONV:
15692 case NEON_SHIFTIMM:
15693 return arm_expand_neon_args (target, icode, 1, exp,
15694 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
15695 NEON_ARG_STOP);
15696
15697 case NEON_CREATE:
15698 return arm_expand_neon_args (target, icode, 1, exp,
15699 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15700
15701 case NEON_DUP:
15702 case NEON_SPLIT:
15703 case NEON_REINTERP:
15704 return arm_expand_neon_args (target, icode, 1, exp,
15705 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15706
15707 case NEON_COMBINE:
15708 case NEON_VTBL:
15709 return arm_expand_neon_args (target, icode, 1, exp,
15710 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15711
15712 case NEON_RESULTPAIR:
15713 return arm_expand_neon_args (target, icode, 0, exp,
15714 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15715 NEON_ARG_STOP);
15716
15717 case NEON_LANEMUL:
15718 case NEON_LANEMULL:
15719 case NEON_LANEMULH:
15720 return arm_expand_neon_args (target, icode, 1, exp,
15721 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15722 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15723
15724 case NEON_LANEMAC:
15725 return arm_expand_neon_args (target, icode, 1, exp,
15726 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15727 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
15728
15729 case NEON_SHIFTACC:
15730 return arm_expand_neon_args (target, icode, 1, exp,
15731 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15732 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15733
15734 case NEON_SCALARMAC:
15735 return arm_expand_neon_args (target, icode, 1, exp,
15736 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15737 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15738
15739 case NEON_SELECT:
15740 case NEON_VTBX:
15741 return arm_expand_neon_args (target, icode, 1, exp,
15742 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15743 NEON_ARG_STOP);
15744
15745 case NEON_LOAD1:
15746 case NEON_LOADSTRUCT:
15747 return arm_expand_neon_args (target, icode, 1, exp,
15748 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15749
15750 case NEON_LOAD1LANE:
15751 case NEON_LOADSTRUCTLANE:
15752 return arm_expand_neon_args (target, icode, 1, exp,
15753 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15754 NEON_ARG_STOP);
15755
15756 case NEON_STORE1:
15757 case NEON_STORESTRUCT:
15758 return arm_expand_neon_args (target, icode, 0, exp,
15759 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15760
15761 case NEON_STORE1LANE:
15762 case NEON_STORESTRUCTLANE:
15763 return arm_expand_neon_args (target, icode, 0, exp,
15764 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15765 NEON_ARG_STOP);
15766 }
15767
15768 gcc_unreachable ();
15769 }
15770
15771 /* Emit code to reinterpret one Neon type as another, without altering bits. */
15772 void
15773 neon_reinterpret (rtx dest, rtx src)
15774 {
15775 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
15776 }
15777
15778 /* Emit code to place a Neon pair result in memory locations (with equal
15779 registers). */
15780 void
15781 neon_emit_pair_result_insn (enum machine_mode mode,
15782 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
15783 rtx op1, rtx op2)
15784 {
15785 rtx mem = gen_rtx_MEM (mode, destaddr);
15786 rtx tmp1 = gen_reg_rtx (mode);
15787 rtx tmp2 = gen_reg_rtx (mode);
15788
15789 emit_insn (intfn (tmp1, op1, tmp2, op2));
15790
15791 emit_move_insn (mem, tmp1);
15792 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
15793 emit_move_insn (mem, tmp2);
15794 }
15795
15796 /* Set up operands for a register copy from src to dest, taking care not to
15797 clobber registers in the process.
15798 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
15799 be called with a large N, so that should be OK. */
15800
15801 void
15802 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
15803 {
15804 unsigned int copied = 0, opctr = 0;
15805 unsigned int done = (1 << count) - 1;
15806 unsigned int i, j;
15807
15808 while (copied != done)
15809 {
15810 for (i = 0; i < count; i++)
15811 {
15812 int good = 1;
15813
15814 for (j = 0; good && j < count; j++)
15815 if (i != j && (copied & (1 << j)) == 0
15816 && reg_overlap_mentioned_p (src[j], dest[i]))
15817 good = 0;
15818
15819 if (good)
15820 {
15821 operands[opctr++] = dest[i];
15822 operands[opctr++] = src[i];
15823 copied |= 1 << i;
15824 }
15825 }
15826 }
15827
15828 gcc_assert (opctr == count * 2);
15829 }
15830
15831 /* Expand an expression EXP that calls a built-in function,
15832 with result going to TARGET if that's convenient
15833 (and in mode MODE if that's convenient).
15834 SUBTARGET may be used as the target for computing one of EXP's operands.
15835 IGNORE is nonzero if the value is to be ignored. */
15836
15837 static rtx
15838 arm_expand_builtin (tree exp,
15839 rtx target,
15840 rtx subtarget ATTRIBUTE_UNUSED,
15841 enum machine_mode mode ATTRIBUTE_UNUSED,
15842 int ignore ATTRIBUTE_UNUSED)
15843 {
15844 const struct builtin_description * d;
15845 enum insn_code icode;
15846 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15847 tree arg0;
15848 tree arg1;
15849 tree arg2;
15850 rtx op0;
15851 rtx op1;
15852 rtx op2;
15853 rtx pat;
15854 int fcode = DECL_FUNCTION_CODE (fndecl);
15855 size_t i;
15856 enum machine_mode tmode;
15857 enum machine_mode mode0;
15858 enum machine_mode mode1;
15859 enum machine_mode mode2;
15860
15861 if (fcode >= ARM_BUILTIN_NEON_BASE)
15862 return arm_expand_neon_builtin (fcode, exp, target);
15863
15864 switch (fcode)
15865 {
15866 case ARM_BUILTIN_TEXTRMSB:
15867 case ARM_BUILTIN_TEXTRMUB:
15868 case ARM_BUILTIN_TEXTRMSH:
15869 case ARM_BUILTIN_TEXTRMUH:
15870 case ARM_BUILTIN_TEXTRMSW:
15871 case ARM_BUILTIN_TEXTRMUW:
15872 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
15873 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
15874 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
15875 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
15876 : CODE_FOR_iwmmxt_textrmw);
15877
15878 arg0 = CALL_EXPR_ARG (exp, 0);
15879 arg1 = CALL_EXPR_ARG (exp, 1);
15880 op0 = expand_normal (arg0);
15881 op1 = expand_normal (arg1);
15882 tmode = insn_data[icode].operand[0].mode;
15883 mode0 = insn_data[icode].operand[1].mode;
15884 mode1 = insn_data[icode].operand[2].mode;
15885
15886 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15887 op0 = copy_to_mode_reg (mode0, op0);
15888 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15889 {
15890 /* @@@ better error message */
15891 error ("selector must be an immediate");
15892 return gen_reg_rtx (tmode);
15893 }
15894 if (target == 0
15895 || GET_MODE (target) != tmode
15896 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15897 target = gen_reg_rtx (tmode);
15898 pat = GEN_FCN (icode) (target, op0, op1);
15899 if (! pat)
15900 return 0;
15901 emit_insn (pat);
15902 return target;
15903
15904 case ARM_BUILTIN_TINSRB:
15905 case ARM_BUILTIN_TINSRH:
15906 case ARM_BUILTIN_TINSRW:
15907 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
15908 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
15909 : CODE_FOR_iwmmxt_tinsrw);
15910 arg0 = CALL_EXPR_ARG (exp, 0);
15911 arg1 = CALL_EXPR_ARG (exp, 1);
15912 arg2 = CALL_EXPR_ARG (exp, 2);
15913 op0 = expand_normal (arg0);
15914 op1 = expand_normal (arg1);
15915 op2 = expand_normal (arg2);
15916 tmode = insn_data[icode].operand[0].mode;
15917 mode0 = insn_data[icode].operand[1].mode;
15918 mode1 = insn_data[icode].operand[2].mode;
15919 mode2 = insn_data[icode].operand[3].mode;
15920
15921 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15922 op0 = copy_to_mode_reg (mode0, op0);
15923 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15924 op1 = copy_to_mode_reg (mode1, op1);
15925 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15926 {
15927 /* @@@ better error message */
15928 error ("selector must be an immediate");
15929 return const0_rtx;
15930 }
15931 if (target == 0
15932 || GET_MODE (target) != tmode
15933 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15934 target = gen_reg_rtx (tmode);
15935 pat = GEN_FCN (icode) (target, op0, op1, op2);
15936 if (! pat)
15937 return 0;
15938 emit_insn (pat);
15939 return target;
15940
15941 case ARM_BUILTIN_SETWCX:
15942 arg0 = CALL_EXPR_ARG (exp, 0);
15943 arg1 = CALL_EXPR_ARG (exp, 1);
15944 op0 = force_reg (SImode, expand_normal (arg0));
15945 op1 = expand_normal (arg1);
15946 emit_insn (gen_iwmmxt_tmcr (op1, op0));
15947 return 0;
15948
15949 case ARM_BUILTIN_GETWCX:
15950 arg0 = CALL_EXPR_ARG (exp, 0);
15951 op0 = expand_normal (arg0);
15952 target = gen_reg_rtx (SImode);
15953 emit_insn (gen_iwmmxt_tmrc (target, op0));
15954 return target;
15955
15956 case ARM_BUILTIN_WSHUFH:
15957 icode = CODE_FOR_iwmmxt_wshufh;
15958 arg0 = CALL_EXPR_ARG (exp, 0);
15959 arg1 = CALL_EXPR_ARG (exp, 1);
15960 op0 = expand_normal (arg0);
15961 op1 = expand_normal (arg1);
15962 tmode = insn_data[icode].operand[0].mode;
15963 mode1 = insn_data[icode].operand[1].mode;
15964 mode2 = insn_data[icode].operand[2].mode;
15965
15966 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15967 op0 = copy_to_mode_reg (mode1, op0);
15968 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
15969 {
15970 /* @@@ better error message */
15971 error ("mask must be an immediate");
15972 return const0_rtx;
15973 }
15974 if (target == 0
15975 || GET_MODE (target) != tmode
15976 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15977 target = gen_reg_rtx (tmode);
15978 pat = GEN_FCN (icode) (target, op0, op1);
15979 if (! pat)
15980 return 0;
15981 emit_insn (pat);
15982 return target;
15983
15984 case ARM_BUILTIN_WSADB:
15985 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
15986 case ARM_BUILTIN_WSADH:
15987 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
15988 case ARM_BUILTIN_WSADBZ:
15989 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
15990 case ARM_BUILTIN_WSADHZ:
15991 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
15992
15993 /* Several three-argument builtins. */
15994 case ARM_BUILTIN_WMACS:
15995 case ARM_BUILTIN_WMACU:
15996 case ARM_BUILTIN_WALIGN:
15997 case ARM_BUILTIN_TMIA:
15998 case ARM_BUILTIN_TMIAPH:
15999 case ARM_BUILTIN_TMIATT:
16000 case ARM_BUILTIN_TMIATB:
16001 case ARM_BUILTIN_TMIABT:
16002 case ARM_BUILTIN_TMIABB:
16003 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
16004 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
16005 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
16006 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
16007 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
16008 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
16009 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
16010 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
16011 : CODE_FOR_iwmmxt_walign);
16012 arg0 = CALL_EXPR_ARG (exp, 0);
16013 arg1 = CALL_EXPR_ARG (exp, 1);
16014 arg2 = CALL_EXPR_ARG (exp, 2);
16015 op0 = expand_normal (arg0);
16016 op1 = expand_normal (arg1);
16017 op2 = expand_normal (arg2);
16018 tmode = insn_data[icode].operand[0].mode;
16019 mode0 = insn_data[icode].operand[1].mode;
16020 mode1 = insn_data[icode].operand[2].mode;
16021 mode2 = insn_data[icode].operand[3].mode;
16022
16023 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16024 op0 = copy_to_mode_reg (mode0, op0);
16025 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16026 op1 = copy_to_mode_reg (mode1, op1);
16027 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16028 op2 = copy_to_mode_reg (mode2, op2);
16029 if (target == 0
16030 || GET_MODE (target) != tmode
16031 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16032 target = gen_reg_rtx (tmode);
16033 pat = GEN_FCN (icode) (target, op0, op1, op2);
16034 if (! pat)
16035 return 0;
16036 emit_insn (pat);
16037 return target;
16038
16039 case ARM_BUILTIN_WZERO:
16040 target = gen_reg_rtx (DImode);
16041 emit_insn (gen_iwmmxt_clrdi (target));
16042 return target;
16043
16044 case ARM_BUILTIN_THREAD_POINTER:
16045 return arm_load_tp (target);
16046
16047 default:
16048 break;
16049 }
16050
16051 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16052 if (d->code == (const enum arm_builtins) fcode)
16053 return arm_expand_binop_builtin (d->icode, exp, target);
16054
16055 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16056 if (d->code == (const enum arm_builtins) fcode)
16057 return arm_expand_unop_builtin (d->icode, exp, target, 0);
16058
16059 /* @@@ Should really do something sensible here. */
16060 return NULL_RTX;
16061 }
16062 \f
16063 /* Return the number (counting from 0) of
16064 the least significant set bit in MASK. */
16065
16066 inline static int
16067 number_of_first_bit_set (unsigned mask)
16068 {
16069 int bit;
16070
16071 for (bit = 0;
16072 (mask & (1 << bit)) == 0;
16073 ++bit)
16074 continue;
16075
16076 return bit;
16077 }
16078
16079 /* Emit code to push or pop registers to or from the stack. F is the
16080 assembly file. MASK is the registers to push or pop. PUSH is
16081 nonzero if we should push, and zero if we should pop. For debugging
16082 output, if pushing, adjust CFA_OFFSET by the amount of space added
16083 to the stack. REAL_REGS should have the same number of bits set as
16084 MASK, and will be used instead (in the same order) to describe which
16085 registers were saved - this is used to mark the save slots when we
16086 push high registers after moving them to low registers. */
16087 static void
16088 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
16089 unsigned long real_regs)
16090 {
16091 int regno;
16092 int lo_mask = mask & 0xFF;
16093 int pushed_words = 0;
16094
16095 gcc_assert (mask);
16096
16097 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
16098 {
16099 /* Special case. Do not generate a POP PC statement here, do it in
16100 thumb_exit() */
16101 thumb_exit (f, -1);
16102 return;
16103 }
16104
16105 if (ARM_EABI_UNWIND_TABLES && push)
16106 {
16107 fprintf (f, "\t.save\t{");
16108 for (regno = 0; regno < 15; regno++)
16109 {
16110 if (real_regs & (1 << regno))
16111 {
16112 if (real_regs & ((1 << regno) -1))
16113 fprintf (f, ", ");
16114 asm_fprintf (f, "%r", regno);
16115 }
16116 }
16117 fprintf (f, "}\n");
16118 }
16119
16120 fprintf (f, "\t%s\t{", push ? "push" : "pop");
16121
16122 /* Look at the low registers first. */
16123 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
16124 {
16125 if (lo_mask & 1)
16126 {
16127 asm_fprintf (f, "%r", regno);
16128
16129 if ((lo_mask & ~1) != 0)
16130 fprintf (f, ", ");
16131
16132 pushed_words++;
16133 }
16134 }
16135
16136 if (push && (mask & (1 << LR_REGNUM)))
16137 {
16138 /* Catch pushing the LR. */
16139 if (mask & 0xFF)
16140 fprintf (f, ", ");
16141
16142 asm_fprintf (f, "%r", LR_REGNUM);
16143
16144 pushed_words++;
16145 }
16146 else if (!push && (mask & (1 << PC_REGNUM)))
16147 {
16148 /* Catch popping the PC. */
16149 if (TARGET_INTERWORK || TARGET_BACKTRACE
16150 || current_function_calls_eh_return)
16151 {
16152 /* The PC is never poped directly, instead
16153 it is popped into r3 and then BX is used. */
16154 fprintf (f, "}\n");
16155
16156 thumb_exit (f, -1);
16157
16158 return;
16159 }
16160 else
16161 {
16162 if (mask & 0xFF)
16163 fprintf (f, ", ");
16164
16165 asm_fprintf (f, "%r", PC_REGNUM);
16166 }
16167 }
16168
16169 fprintf (f, "}\n");
16170
16171 if (push && pushed_words && dwarf2out_do_frame ())
16172 {
16173 char *l = dwarf2out_cfi_label ();
16174 int pushed_mask = real_regs;
16175
16176 *cfa_offset += pushed_words * 4;
16177 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
16178
16179 pushed_words = 0;
16180 pushed_mask = real_regs;
16181 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
16182 {
16183 if (pushed_mask & 1)
16184 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
16185 }
16186 }
16187 }
16188
16189 /* Generate code to return from a thumb function.
16190 If 'reg_containing_return_addr' is -1, then the return address is
16191 actually on the stack, at the stack pointer. */
16192 static void
16193 thumb_exit (FILE *f, int reg_containing_return_addr)
16194 {
16195 unsigned regs_available_for_popping;
16196 unsigned regs_to_pop;
16197 int pops_needed;
16198 unsigned available;
16199 unsigned required;
16200 int mode;
16201 int size;
16202 int restore_a4 = FALSE;
16203
16204 /* Compute the registers we need to pop. */
16205 regs_to_pop = 0;
16206 pops_needed = 0;
16207
16208 if (reg_containing_return_addr == -1)
16209 {
16210 regs_to_pop |= 1 << LR_REGNUM;
16211 ++pops_needed;
16212 }
16213
16214 if (TARGET_BACKTRACE)
16215 {
16216 /* Restore the (ARM) frame pointer and stack pointer. */
16217 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
16218 pops_needed += 2;
16219 }
16220
16221 /* If there is nothing to pop then just emit the BX instruction and
16222 return. */
16223 if (pops_needed == 0)
16224 {
16225 if (current_function_calls_eh_return)
16226 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
16227
16228 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
16229 return;
16230 }
16231 /* Otherwise if we are not supporting interworking and we have not created
16232 a backtrace structure and the function was not entered in ARM mode then
16233 just pop the return address straight into the PC. */
16234 else if (!TARGET_INTERWORK
16235 && !TARGET_BACKTRACE
16236 && !is_called_in_ARM_mode (current_function_decl)
16237 && !current_function_calls_eh_return)
16238 {
16239 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
16240 return;
16241 }
16242
16243 /* Find out how many of the (return) argument registers we can corrupt. */
16244 regs_available_for_popping = 0;
16245
16246 /* If returning via __builtin_eh_return, the bottom three registers
16247 all contain information needed for the return. */
16248 if (current_function_calls_eh_return)
16249 size = 12;
16250 else
16251 {
16252 /* If we can deduce the registers used from the function's
16253 return value. This is more reliable that examining
16254 df_regs_ever_live_p () because that will be set if the register is
16255 ever used in the function, not just if the register is used
16256 to hold a return value. */
16257
16258 if (current_function_return_rtx != 0)
16259 mode = GET_MODE (current_function_return_rtx);
16260 else
16261 mode = DECL_MODE (DECL_RESULT (current_function_decl));
16262
16263 size = GET_MODE_SIZE (mode);
16264
16265 if (size == 0)
16266 {
16267 /* In a void function we can use any argument register.
16268 In a function that returns a structure on the stack
16269 we can use the second and third argument registers. */
16270 if (mode == VOIDmode)
16271 regs_available_for_popping =
16272 (1 << ARG_REGISTER (1))
16273 | (1 << ARG_REGISTER (2))
16274 | (1 << ARG_REGISTER (3));
16275 else
16276 regs_available_for_popping =
16277 (1 << ARG_REGISTER (2))
16278 | (1 << ARG_REGISTER (3));
16279 }
16280 else if (size <= 4)
16281 regs_available_for_popping =
16282 (1 << ARG_REGISTER (2))
16283 | (1 << ARG_REGISTER (3));
16284 else if (size <= 8)
16285 regs_available_for_popping =
16286 (1 << ARG_REGISTER (3));
16287 }
16288
16289 /* Match registers to be popped with registers into which we pop them. */
16290 for (available = regs_available_for_popping,
16291 required = regs_to_pop;
16292 required != 0 && available != 0;
16293 available &= ~(available & - available),
16294 required &= ~(required & - required))
16295 -- pops_needed;
16296
16297 /* If we have any popping registers left over, remove them. */
16298 if (available > 0)
16299 regs_available_for_popping &= ~available;
16300
16301 /* Otherwise if we need another popping register we can use
16302 the fourth argument register. */
16303 else if (pops_needed)
16304 {
16305 /* If we have not found any free argument registers and
16306 reg a4 contains the return address, we must move it. */
16307 if (regs_available_for_popping == 0
16308 && reg_containing_return_addr == LAST_ARG_REGNUM)
16309 {
16310 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
16311 reg_containing_return_addr = LR_REGNUM;
16312 }
16313 else if (size > 12)
16314 {
16315 /* Register a4 is being used to hold part of the return value,
16316 but we have dire need of a free, low register. */
16317 restore_a4 = TRUE;
16318
16319 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
16320 }
16321
16322 if (reg_containing_return_addr != LAST_ARG_REGNUM)
16323 {
16324 /* The fourth argument register is available. */
16325 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
16326
16327 --pops_needed;
16328 }
16329 }
16330
16331 /* Pop as many registers as we can. */
16332 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16333 regs_available_for_popping);
16334
16335 /* Process the registers we popped. */
16336 if (reg_containing_return_addr == -1)
16337 {
16338 /* The return address was popped into the lowest numbered register. */
16339 regs_to_pop &= ~(1 << LR_REGNUM);
16340
16341 reg_containing_return_addr =
16342 number_of_first_bit_set (regs_available_for_popping);
16343
16344 /* Remove this register for the mask of available registers, so that
16345 the return address will not be corrupted by further pops. */
16346 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
16347 }
16348
16349 /* If we popped other registers then handle them here. */
16350 if (regs_available_for_popping)
16351 {
16352 int frame_pointer;
16353
16354 /* Work out which register currently contains the frame pointer. */
16355 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
16356
16357 /* Move it into the correct place. */
16358 asm_fprintf (f, "\tmov\t%r, %r\n",
16359 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
16360
16361 /* (Temporarily) remove it from the mask of popped registers. */
16362 regs_available_for_popping &= ~(1 << frame_pointer);
16363 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
16364
16365 if (regs_available_for_popping)
16366 {
16367 int stack_pointer;
16368
16369 /* We popped the stack pointer as well,
16370 find the register that contains it. */
16371 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
16372
16373 /* Move it into the stack register. */
16374 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
16375
16376 /* At this point we have popped all necessary registers, so
16377 do not worry about restoring regs_available_for_popping
16378 to its correct value:
16379
16380 assert (pops_needed == 0)
16381 assert (regs_available_for_popping == (1 << frame_pointer))
16382 assert (regs_to_pop == (1 << STACK_POINTER)) */
16383 }
16384 else
16385 {
16386 /* Since we have just move the popped value into the frame
16387 pointer, the popping register is available for reuse, and
16388 we know that we still have the stack pointer left to pop. */
16389 regs_available_for_popping |= (1 << frame_pointer);
16390 }
16391 }
16392
16393 /* If we still have registers left on the stack, but we no longer have
16394 any registers into which we can pop them, then we must move the return
16395 address into the link register and make available the register that
16396 contained it. */
16397 if (regs_available_for_popping == 0 && pops_needed > 0)
16398 {
16399 regs_available_for_popping |= 1 << reg_containing_return_addr;
16400
16401 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
16402 reg_containing_return_addr);
16403
16404 reg_containing_return_addr = LR_REGNUM;
16405 }
16406
16407 /* If we have registers left on the stack then pop some more.
16408 We know that at most we will want to pop FP and SP. */
16409 if (pops_needed > 0)
16410 {
16411 int popped_into;
16412 int move_to;
16413
16414 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16415 regs_available_for_popping);
16416
16417 /* We have popped either FP or SP.
16418 Move whichever one it is into the correct register. */
16419 popped_into = number_of_first_bit_set (regs_available_for_popping);
16420 move_to = number_of_first_bit_set (regs_to_pop);
16421
16422 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
16423
16424 regs_to_pop &= ~(1 << move_to);
16425
16426 --pops_needed;
16427 }
16428
16429 /* If we still have not popped everything then we must have only
16430 had one register available to us and we are now popping the SP. */
16431 if (pops_needed > 0)
16432 {
16433 int popped_into;
16434
16435 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16436 regs_available_for_popping);
16437
16438 popped_into = number_of_first_bit_set (regs_available_for_popping);
16439
16440 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
16441 /*
16442 assert (regs_to_pop == (1 << STACK_POINTER))
16443 assert (pops_needed == 1)
16444 */
16445 }
16446
16447 /* If necessary restore the a4 register. */
16448 if (restore_a4)
16449 {
16450 if (reg_containing_return_addr != LR_REGNUM)
16451 {
16452 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
16453 reg_containing_return_addr = LR_REGNUM;
16454 }
16455
16456 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
16457 }
16458
16459 if (current_function_calls_eh_return)
16460 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
16461
16462 /* Return to caller. */
16463 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
16464 }
16465
16466 \f
16467 void
16468 thumb1_final_prescan_insn (rtx insn)
16469 {
16470 if (flag_print_asm_name)
16471 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
16472 INSN_ADDRESSES (INSN_UID (insn)));
16473 }
16474
16475 int
16476 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
16477 {
16478 unsigned HOST_WIDE_INT mask = 0xff;
16479 int i;
16480
16481 if (val == 0) /* XXX */
16482 return 0;
16483
16484 for (i = 0; i < 25; i++)
16485 if ((val & (mask << i)) == val)
16486 return 1;
16487
16488 return 0;
16489 }
16490
16491 /* Returns nonzero if the current function contains,
16492 or might contain a far jump. */
16493 static int
16494 thumb_far_jump_used_p (void)
16495 {
16496 rtx insn;
16497
16498 /* This test is only important for leaf functions. */
16499 /* assert (!leaf_function_p ()); */
16500
16501 /* If we have already decided that far jumps may be used,
16502 do not bother checking again, and always return true even if
16503 it turns out that they are not being used. Once we have made
16504 the decision that far jumps are present (and that hence the link
16505 register will be pushed onto the stack) we cannot go back on it. */
16506 if (cfun->machine->far_jump_used)
16507 return 1;
16508
16509 /* If this function is not being called from the prologue/epilogue
16510 generation code then it must be being called from the
16511 INITIAL_ELIMINATION_OFFSET macro. */
16512 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
16513 {
16514 /* In this case we know that we are being asked about the elimination
16515 of the arg pointer register. If that register is not being used,
16516 then there are no arguments on the stack, and we do not have to
16517 worry that a far jump might force the prologue to push the link
16518 register, changing the stack offsets. In this case we can just
16519 return false, since the presence of far jumps in the function will
16520 not affect stack offsets.
16521
16522 If the arg pointer is live (or if it was live, but has now been
16523 eliminated and so set to dead) then we do have to test to see if
16524 the function might contain a far jump. This test can lead to some
16525 false negatives, since before reload is completed, then length of
16526 branch instructions is not known, so gcc defaults to returning their
16527 longest length, which in turn sets the far jump attribute to true.
16528
16529 A false negative will not result in bad code being generated, but it
16530 will result in a needless push and pop of the link register. We
16531 hope that this does not occur too often.
16532
16533 If we need doubleword stack alignment this could affect the other
16534 elimination offsets so we can't risk getting it wrong. */
16535 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
16536 cfun->machine->arg_pointer_live = 1;
16537 else if (!cfun->machine->arg_pointer_live)
16538 return 0;
16539 }
16540
16541 /* Check to see if the function contains a branch
16542 insn with the far jump attribute set. */
16543 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16544 {
16545 if (GET_CODE (insn) == JUMP_INSN
16546 /* Ignore tablejump patterns. */
16547 && GET_CODE (PATTERN (insn)) != ADDR_VEC
16548 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
16549 && get_attr_far_jump (insn) == FAR_JUMP_YES
16550 )
16551 {
16552 /* Record the fact that we have decided that
16553 the function does use far jumps. */
16554 cfun->machine->far_jump_used = 1;
16555 return 1;
16556 }
16557 }
16558
16559 return 0;
16560 }
16561
16562 /* Return nonzero if FUNC must be entered in ARM mode. */
16563 int
16564 is_called_in_ARM_mode (tree func)
16565 {
16566 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
16567
16568 /* Ignore the problem about functions whose address is taken. */
16569 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
16570 return TRUE;
16571
16572 #ifdef ARM_PE
16573 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
16574 #else
16575 return FALSE;
16576 #endif
16577 }
16578
16579 /* The bits which aren't usefully expanded as rtl. */
16580 const char *
16581 thumb_unexpanded_epilogue (void)
16582 {
16583 arm_stack_offsets *offsets;
16584 int regno;
16585 unsigned long live_regs_mask = 0;
16586 int high_regs_pushed = 0;
16587 int had_to_push_lr;
16588 int size;
16589
16590 if (return_used_this_function)
16591 return "";
16592
16593 if (IS_NAKED (arm_current_func_type ()))
16594 return "";
16595
16596 offsets = arm_get_frame_offsets ();
16597 live_regs_mask = offsets->saved_regs_mask;
16598 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
16599
16600 /* If we can deduce the registers used from the function's return value.
16601 This is more reliable that examining df_regs_ever_live_p () because that
16602 will be set if the register is ever used in the function, not just if
16603 the register is used to hold a return value. */
16604 size = arm_size_return_regs ();
16605
16606 /* The prolog may have pushed some high registers to use as
16607 work registers. e.g. the testsuite file:
16608 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
16609 compiles to produce:
16610 push {r4, r5, r6, r7, lr}
16611 mov r7, r9
16612 mov r6, r8
16613 push {r6, r7}
16614 as part of the prolog. We have to undo that pushing here. */
16615
16616 if (high_regs_pushed)
16617 {
16618 unsigned long mask = live_regs_mask & 0xff;
16619 int next_hi_reg;
16620
16621 /* The available low registers depend on the size of the value we are
16622 returning. */
16623 if (size <= 12)
16624 mask |= 1 << 3;
16625 if (size <= 8)
16626 mask |= 1 << 2;
16627
16628 if (mask == 0)
16629 /* Oh dear! We have no low registers into which we can pop
16630 high registers! */
16631 internal_error
16632 ("no low registers available for popping high registers");
16633
16634 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
16635 if (live_regs_mask & (1 << next_hi_reg))
16636 break;
16637
16638 while (high_regs_pushed)
16639 {
16640 /* Find lo register(s) into which the high register(s) can
16641 be popped. */
16642 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
16643 {
16644 if (mask & (1 << regno))
16645 high_regs_pushed--;
16646 if (high_regs_pushed == 0)
16647 break;
16648 }
16649
16650 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
16651
16652 /* Pop the values into the low register(s). */
16653 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
16654
16655 /* Move the value(s) into the high registers. */
16656 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
16657 {
16658 if (mask & (1 << regno))
16659 {
16660 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
16661 regno);
16662
16663 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
16664 if (live_regs_mask & (1 << next_hi_reg))
16665 break;
16666 }
16667 }
16668 }
16669 live_regs_mask &= ~0x0f00;
16670 }
16671
16672 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
16673 live_regs_mask &= 0xff;
16674
16675 if (current_function_pretend_args_size == 0 || TARGET_BACKTRACE)
16676 {
16677 /* Pop the return address into the PC. */
16678 if (had_to_push_lr)
16679 live_regs_mask |= 1 << PC_REGNUM;
16680
16681 /* Either no argument registers were pushed or a backtrace
16682 structure was created which includes an adjusted stack
16683 pointer, so just pop everything. */
16684 if (live_regs_mask)
16685 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
16686 live_regs_mask);
16687
16688 /* We have either just popped the return address into the
16689 PC or it is was kept in LR for the entire function. */
16690 if (!had_to_push_lr)
16691 thumb_exit (asm_out_file, LR_REGNUM);
16692 }
16693 else
16694 {
16695 /* Pop everything but the return address. */
16696 if (live_regs_mask)
16697 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
16698 live_regs_mask);
16699
16700 if (had_to_push_lr)
16701 {
16702 if (size > 12)
16703 {
16704 /* We have no free low regs, so save one. */
16705 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
16706 LAST_ARG_REGNUM);
16707 }
16708
16709 /* Get the return address into a temporary register. */
16710 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
16711 1 << LAST_ARG_REGNUM);
16712
16713 if (size > 12)
16714 {
16715 /* Move the return address to lr. */
16716 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
16717 LAST_ARG_REGNUM);
16718 /* Restore the low register. */
16719 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
16720 IP_REGNUM);
16721 regno = LR_REGNUM;
16722 }
16723 else
16724 regno = LAST_ARG_REGNUM;
16725 }
16726 else
16727 regno = LR_REGNUM;
16728
16729 /* Remove the argument registers that were pushed onto the stack. */
16730 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
16731 SP_REGNUM, SP_REGNUM,
16732 current_function_pretend_args_size);
16733
16734 thumb_exit (asm_out_file, regno);
16735 }
16736
16737 return "";
16738 }
16739
16740 /* Functions to save and restore machine-specific function data. */
16741 static struct machine_function *
16742 arm_init_machine_status (void)
16743 {
16744 struct machine_function *machine;
16745 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
16746
16747 #if ARM_FT_UNKNOWN != 0
16748 machine->func_type = ARM_FT_UNKNOWN;
16749 #endif
16750 return machine;
16751 }
16752
16753 /* Return an RTX indicating where the return address to the
16754 calling function can be found. */
16755 rtx
16756 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
16757 {
16758 if (count != 0)
16759 return NULL_RTX;
16760
16761 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
16762 }
16763
16764 /* Do anything needed before RTL is emitted for each function. */
16765 void
16766 arm_init_expanders (void)
16767 {
16768 /* Arrange to initialize and mark the machine per-function status. */
16769 init_machine_status = arm_init_machine_status;
16770
16771 /* This is to stop the combine pass optimizing away the alignment
16772 adjustment of va_arg. */
16773 /* ??? It is claimed that this should not be necessary. */
16774 if (cfun)
16775 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
16776 }
16777
16778
16779 /* Like arm_compute_initial_elimination offset. Simpler because there
16780 isn't an ABI specified frame pointer for Thumb. Instead, we set it
16781 to point at the base of the local variables after static stack
16782 space for a function has been allocated. */
16783
16784 HOST_WIDE_INT
16785 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16786 {
16787 arm_stack_offsets *offsets;
16788
16789 offsets = arm_get_frame_offsets ();
16790
16791 switch (from)
16792 {
16793 case ARG_POINTER_REGNUM:
16794 switch (to)
16795 {
16796 case STACK_POINTER_REGNUM:
16797 return offsets->outgoing_args - offsets->saved_args;
16798
16799 case FRAME_POINTER_REGNUM:
16800 return offsets->soft_frame - offsets->saved_args;
16801
16802 case ARM_HARD_FRAME_POINTER_REGNUM:
16803 return offsets->saved_regs - offsets->saved_args;
16804
16805 case THUMB_HARD_FRAME_POINTER_REGNUM:
16806 return offsets->locals_base - offsets->saved_args;
16807
16808 default:
16809 gcc_unreachable ();
16810 }
16811 break;
16812
16813 case FRAME_POINTER_REGNUM:
16814 switch (to)
16815 {
16816 case STACK_POINTER_REGNUM:
16817 return offsets->outgoing_args - offsets->soft_frame;
16818
16819 case ARM_HARD_FRAME_POINTER_REGNUM:
16820 return offsets->saved_regs - offsets->soft_frame;
16821
16822 case THUMB_HARD_FRAME_POINTER_REGNUM:
16823 return offsets->locals_base - offsets->soft_frame;
16824
16825 default:
16826 gcc_unreachable ();
16827 }
16828 break;
16829
16830 default:
16831 gcc_unreachable ();
16832 }
16833 }
16834
16835 /* Generate the rest of a function's prologue. */
16836 void
16837 thumb1_expand_prologue (void)
16838 {
16839 rtx insn, dwarf;
16840
16841 HOST_WIDE_INT amount;
16842 arm_stack_offsets *offsets;
16843 unsigned long func_type;
16844 int regno;
16845 unsigned long live_regs_mask;
16846
16847 func_type = arm_current_func_type ();
16848
16849 /* Naked functions don't have prologues. */
16850 if (IS_NAKED (func_type))
16851 return;
16852
16853 if (IS_INTERRUPT (func_type))
16854 {
16855 error ("interrupt Service Routines cannot be coded in Thumb mode");
16856 return;
16857 }
16858
16859 offsets = arm_get_frame_offsets ();
16860 live_regs_mask = offsets->saved_regs_mask;
16861 /* Load the pic register before setting the frame pointer,
16862 so we can use r7 as a temporary work register. */
16863 if (flag_pic && arm_pic_register != INVALID_REGNUM)
16864 arm_load_pic_register (live_regs_mask);
16865
16866 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
16867 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
16868 stack_pointer_rtx);
16869
16870 amount = offsets->outgoing_args - offsets->saved_regs;
16871 if (amount)
16872 {
16873 if (amount < 512)
16874 {
16875 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16876 GEN_INT (- amount)));
16877 RTX_FRAME_RELATED_P (insn) = 1;
16878 }
16879 else
16880 {
16881 rtx reg;
16882
16883 /* The stack decrement is too big for an immediate value in a single
16884 insn. In theory we could issue multiple subtracts, but after
16885 three of them it becomes more space efficient to place the full
16886 value in the constant pool and load into a register. (Also the
16887 ARM debugger really likes to see only one stack decrement per
16888 function). So instead we look for a scratch register into which
16889 we can load the decrement, and then we subtract this from the
16890 stack pointer. Unfortunately on the thumb the only available
16891 scratch registers are the argument registers, and we cannot use
16892 these as they may hold arguments to the function. Instead we
16893 attempt to locate a call preserved register which is used by this
16894 function. If we can find one, then we know that it will have
16895 been pushed at the start of the prologue and so we can corrupt
16896 it now. */
16897 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
16898 if (live_regs_mask & (1 << regno)
16899 && !(frame_pointer_needed
16900 && (regno == THUMB_HARD_FRAME_POINTER_REGNUM)))
16901 break;
16902
16903 if (regno > LAST_LO_REGNUM) /* Very unlikely. */
16904 {
16905 rtx spare = gen_rtx_REG (SImode, IP_REGNUM);
16906
16907 /* Choose an arbitrary, non-argument low register. */
16908 reg = gen_rtx_REG (SImode, LAST_LO_REGNUM);
16909
16910 /* Save it by copying it into a high, scratch register. */
16911 emit_insn (gen_movsi (spare, reg));
16912 /* Add a USE to stop propagate_one_insn() from barfing. */
16913 emit_insn (gen_prologue_use (spare));
16914
16915 /* Decrement the stack. */
16916 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
16917 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
16918 stack_pointer_rtx, reg));
16919 RTX_FRAME_RELATED_P (insn) = 1;
16920 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16921 plus_constant (stack_pointer_rtx,
16922 -amount));
16923 RTX_FRAME_RELATED_P (dwarf) = 1;
16924 REG_NOTES (insn)
16925 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
16926 REG_NOTES (insn));
16927
16928 /* Restore the low register's original value. */
16929 emit_insn (gen_movsi (reg, spare));
16930
16931 /* Emit a USE of the restored scratch register, so that flow
16932 analysis will not consider the restore redundant. The
16933 register won't be used again in this function and isn't
16934 restored by the epilogue. */
16935 emit_insn (gen_prologue_use (reg));
16936 }
16937 else
16938 {
16939 reg = gen_rtx_REG (SImode, regno);
16940
16941 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
16942
16943 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
16944 stack_pointer_rtx, reg));
16945 RTX_FRAME_RELATED_P (insn) = 1;
16946 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16947 plus_constant (stack_pointer_rtx,
16948 -amount));
16949 RTX_FRAME_RELATED_P (dwarf) = 1;
16950 REG_NOTES (insn)
16951 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
16952 REG_NOTES (insn));
16953 }
16954 }
16955 }
16956
16957 if (frame_pointer_needed)
16958 thumb_set_frame_pointer (offsets);
16959
16960 /* If we are profiling, make sure no instructions are scheduled before
16961 the call to mcount. Similarly if the user has requested no
16962 scheduling in the prolog. Similarly if we want non-call exceptions
16963 using the EABI unwinder, to prevent faulting instructions from being
16964 swapped with a stack adjustment. */
16965 if (current_function_profile || !TARGET_SCHED_PROLOG
16966 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
16967 emit_insn (gen_blockage ());
16968
16969 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
16970 if (live_regs_mask & 0xff)
16971 cfun->machine->lr_save_eliminated = 0;
16972 }
16973
16974
16975 void
16976 thumb1_expand_epilogue (void)
16977 {
16978 HOST_WIDE_INT amount;
16979 arm_stack_offsets *offsets;
16980 int regno;
16981
16982 /* Naked functions don't have prologues. */
16983 if (IS_NAKED (arm_current_func_type ()))
16984 return;
16985
16986 offsets = arm_get_frame_offsets ();
16987 amount = offsets->outgoing_args - offsets->saved_regs;
16988
16989 if (frame_pointer_needed)
16990 {
16991 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
16992 amount = offsets->locals_base - offsets->saved_regs;
16993 }
16994
16995 gcc_assert (amount >= 0);
16996 if (amount)
16997 {
16998 if (amount < 512)
16999 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17000 GEN_INT (amount)));
17001 else
17002 {
17003 /* r3 is always free in the epilogue. */
17004 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
17005
17006 emit_insn (gen_movsi (reg, GEN_INT (amount)));
17007 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
17008 }
17009 }
17010
17011 /* Emit a USE (stack_pointer_rtx), so that
17012 the stack adjustment will not be deleted. */
17013 emit_insn (gen_prologue_use (stack_pointer_rtx));
17014
17015 if (current_function_profile || !TARGET_SCHED_PROLOG)
17016 emit_insn (gen_blockage ());
17017
17018 /* Emit a clobber for each insn that will be restored in the epilogue,
17019 so that flow2 will get register lifetimes correct. */
17020 for (regno = 0; regno < 13; regno++)
17021 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
17022 emit_insn (gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, regno)));
17023
17024 if (! df_regs_ever_live_p (LR_REGNUM))
17025 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, LR_REGNUM)));
17026 }
17027
17028 static void
17029 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
17030 {
17031 arm_stack_offsets *offsets;
17032 unsigned long live_regs_mask = 0;
17033 unsigned long l_mask;
17034 unsigned high_regs_pushed = 0;
17035 int cfa_offset = 0;
17036 int regno;
17037
17038 if (IS_NAKED (arm_current_func_type ()))
17039 return;
17040
17041 if (is_called_in_ARM_mode (current_function_decl))
17042 {
17043 const char * name;
17044
17045 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
17046 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
17047 == SYMBOL_REF);
17048 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
17049
17050 /* Generate code sequence to switch us into Thumb mode. */
17051 /* The .code 32 directive has already been emitted by
17052 ASM_DECLARE_FUNCTION_NAME. */
17053 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
17054 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
17055
17056 /* Generate a label, so that the debugger will notice the
17057 change in instruction sets. This label is also used by
17058 the assembler to bypass the ARM code when this function
17059 is called from a Thumb encoded function elsewhere in the
17060 same file. Hence the definition of STUB_NAME here must
17061 agree with the definition in gas/config/tc-arm.c. */
17062
17063 #define STUB_NAME ".real_start_of"
17064
17065 fprintf (f, "\t.code\t16\n");
17066 #ifdef ARM_PE
17067 if (arm_dllexport_name_p (name))
17068 name = arm_strip_name_encoding (name);
17069 #endif
17070 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
17071 fprintf (f, "\t.thumb_func\n");
17072 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
17073 }
17074
17075 if (current_function_pretend_args_size)
17076 {
17077 /* Output unwind directive for the stack adjustment. */
17078 if (ARM_EABI_UNWIND_TABLES)
17079 fprintf (f, "\t.pad #%d\n",
17080 current_function_pretend_args_size);
17081
17082 if (cfun->machine->uses_anonymous_args)
17083 {
17084 int num_pushes;
17085
17086 fprintf (f, "\tpush\t{");
17087
17088 num_pushes = ARM_NUM_INTS (current_function_pretend_args_size);
17089
17090 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
17091 regno <= LAST_ARG_REGNUM;
17092 regno++)
17093 asm_fprintf (f, "%r%s", regno,
17094 regno == LAST_ARG_REGNUM ? "" : ", ");
17095
17096 fprintf (f, "}\n");
17097 }
17098 else
17099 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
17100 SP_REGNUM, SP_REGNUM,
17101 current_function_pretend_args_size);
17102
17103 /* We don't need to record the stores for unwinding (would it
17104 help the debugger any if we did?), but record the change in
17105 the stack pointer. */
17106 if (dwarf2out_do_frame ())
17107 {
17108 char *l = dwarf2out_cfi_label ();
17109
17110 cfa_offset = cfa_offset + current_function_pretend_args_size;
17111 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
17112 }
17113 }
17114
17115 /* Get the registers we are going to push. */
17116 offsets = arm_get_frame_offsets ();
17117 live_regs_mask = offsets->saved_regs_mask;
17118 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
17119 l_mask = live_regs_mask & 0x40ff;
17120 /* Then count how many other high registers will need to be pushed. */
17121 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
17122
17123 if (TARGET_BACKTRACE)
17124 {
17125 unsigned offset;
17126 unsigned work_register;
17127
17128 /* We have been asked to create a stack backtrace structure.
17129 The code looks like this:
17130
17131 0 .align 2
17132 0 func:
17133 0 sub SP, #16 Reserve space for 4 registers.
17134 2 push {R7} Push low registers.
17135 4 add R7, SP, #20 Get the stack pointer before the push.
17136 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
17137 8 mov R7, PC Get hold of the start of this code plus 12.
17138 10 str R7, [SP, #16] Store it.
17139 12 mov R7, FP Get hold of the current frame pointer.
17140 14 str R7, [SP, #4] Store it.
17141 16 mov R7, LR Get hold of the current return address.
17142 18 str R7, [SP, #12] Store it.
17143 20 add R7, SP, #16 Point at the start of the backtrace structure.
17144 22 mov FP, R7 Put this value into the frame pointer. */
17145
17146 work_register = thumb_find_work_register (live_regs_mask);
17147
17148 if (ARM_EABI_UNWIND_TABLES)
17149 asm_fprintf (f, "\t.pad #16\n");
17150
17151 asm_fprintf
17152 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
17153 SP_REGNUM, SP_REGNUM);
17154
17155 if (dwarf2out_do_frame ())
17156 {
17157 char *l = dwarf2out_cfi_label ();
17158
17159 cfa_offset = cfa_offset + 16;
17160 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
17161 }
17162
17163 if (l_mask)
17164 {
17165 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17166 offset = bit_count (l_mask) * UNITS_PER_WORD;
17167 }
17168 else
17169 offset = 0;
17170
17171 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17172 offset + 16 + current_function_pretend_args_size);
17173
17174 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17175 offset + 4);
17176
17177 /* Make sure that the instruction fetching the PC is in the right place
17178 to calculate "start of backtrace creation code + 12". */
17179 if (l_mask)
17180 {
17181 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17182 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17183 offset + 12);
17184 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17185 ARM_HARD_FRAME_POINTER_REGNUM);
17186 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17187 offset);
17188 }
17189 else
17190 {
17191 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17192 ARM_HARD_FRAME_POINTER_REGNUM);
17193 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17194 offset);
17195 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17196 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17197 offset + 12);
17198 }
17199
17200 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
17201 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17202 offset + 8);
17203 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17204 offset + 12);
17205 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
17206 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
17207 }
17208 /* Optimization: If we are not pushing any low registers but we are going
17209 to push some high registers then delay our first push. This will just
17210 be a push of LR and we can combine it with the push of the first high
17211 register. */
17212 else if ((l_mask & 0xff) != 0
17213 || (high_regs_pushed == 0 && l_mask))
17214 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17215
17216 if (high_regs_pushed)
17217 {
17218 unsigned pushable_regs;
17219 unsigned next_hi_reg;
17220
17221 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
17222 if (live_regs_mask & (1 << next_hi_reg))
17223 break;
17224
17225 pushable_regs = l_mask & 0xff;
17226
17227 if (pushable_regs == 0)
17228 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
17229
17230 while (high_regs_pushed > 0)
17231 {
17232 unsigned long real_regs_mask = 0;
17233
17234 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
17235 {
17236 if (pushable_regs & (1 << regno))
17237 {
17238 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
17239
17240 high_regs_pushed --;
17241 real_regs_mask |= (1 << next_hi_reg);
17242
17243 if (high_regs_pushed)
17244 {
17245 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
17246 next_hi_reg --)
17247 if (live_regs_mask & (1 << next_hi_reg))
17248 break;
17249 }
17250 else
17251 {
17252 pushable_regs &= ~((1 << regno) - 1);
17253 break;
17254 }
17255 }
17256 }
17257
17258 /* If we had to find a work register and we have not yet
17259 saved the LR then add it to the list of regs to push. */
17260 if (l_mask == (1 << LR_REGNUM))
17261 {
17262 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
17263 1, &cfa_offset,
17264 real_regs_mask | (1 << LR_REGNUM));
17265 l_mask = 0;
17266 }
17267 else
17268 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
17269 }
17270 }
17271 }
17272
17273 /* Handle the case of a double word load into a low register from
17274 a computed memory address. The computed address may involve a
17275 register which is overwritten by the load. */
17276 const char *
17277 thumb_load_double_from_address (rtx *operands)
17278 {
17279 rtx addr;
17280 rtx base;
17281 rtx offset;
17282 rtx arg1;
17283 rtx arg2;
17284
17285 gcc_assert (GET_CODE (operands[0]) == REG);
17286 gcc_assert (GET_CODE (operands[1]) == MEM);
17287
17288 /* Get the memory address. */
17289 addr = XEXP (operands[1], 0);
17290
17291 /* Work out how the memory address is computed. */
17292 switch (GET_CODE (addr))
17293 {
17294 case REG:
17295 operands[2] = adjust_address (operands[1], SImode, 4);
17296
17297 if (REGNO (operands[0]) == REGNO (addr))
17298 {
17299 output_asm_insn ("ldr\t%H0, %2", operands);
17300 output_asm_insn ("ldr\t%0, %1", operands);
17301 }
17302 else
17303 {
17304 output_asm_insn ("ldr\t%0, %1", operands);
17305 output_asm_insn ("ldr\t%H0, %2", operands);
17306 }
17307 break;
17308
17309 case CONST:
17310 /* Compute <address> + 4 for the high order load. */
17311 operands[2] = adjust_address (operands[1], SImode, 4);
17312
17313 output_asm_insn ("ldr\t%0, %1", operands);
17314 output_asm_insn ("ldr\t%H0, %2", operands);
17315 break;
17316
17317 case PLUS:
17318 arg1 = XEXP (addr, 0);
17319 arg2 = XEXP (addr, 1);
17320
17321 if (CONSTANT_P (arg1))
17322 base = arg2, offset = arg1;
17323 else
17324 base = arg1, offset = arg2;
17325
17326 gcc_assert (GET_CODE (base) == REG);
17327
17328 /* Catch the case of <address> = <reg> + <reg> */
17329 if (GET_CODE (offset) == REG)
17330 {
17331 int reg_offset = REGNO (offset);
17332 int reg_base = REGNO (base);
17333 int reg_dest = REGNO (operands[0]);
17334
17335 /* Add the base and offset registers together into the
17336 higher destination register. */
17337 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
17338 reg_dest + 1, reg_base, reg_offset);
17339
17340 /* Load the lower destination register from the address in
17341 the higher destination register. */
17342 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
17343 reg_dest, reg_dest + 1);
17344
17345 /* Load the higher destination register from its own address
17346 plus 4. */
17347 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
17348 reg_dest + 1, reg_dest + 1);
17349 }
17350 else
17351 {
17352 /* Compute <address> + 4 for the high order load. */
17353 operands[2] = adjust_address (operands[1], SImode, 4);
17354
17355 /* If the computed address is held in the low order register
17356 then load the high order register first, otherwise always
17357 load the low order register first. */
17358 if (REGNO (operands[0]) == REGNO (base))
17359 {
17360 output_asm_insn ("ldr\t%H0, %2", operands);
17361 output_asm_insn ("ldr\t%0, %1", operands);
17362 }
17363 else
17364 {
17365 output_asm_insn ("ldr\t%0, %1", operands);
17366 output_asm_insn ("ldr\t%H0, %2", operands);
17367 }
17368 }
17369 break;
17370
17371 case LABEL_REF:
17372 /* With no registers to worry about we can just load the value
17373 directly. */
17374 operands[2] = adjust_address (operands[1], SImode, 4);
17375
17376 output_asm_insn ("ldr\t%H0, %2", operands);
17377 output_asm_insn ("ldr\t%0, %1", operands);
17378 break;
17379
17380 default:
17381 gcc_unreachable ();
17382 }
17383
17384 return "";
17385 }
17386
17387 const char *
17388 thumb_output_move_mem_multiple (int n, rtx *operands)
17389 {
17390 rtx tmp;
17391
17392 switch (n)
17393 {
17394 case 2:
17395 if (REGNO (operands[4]) > REGNO (operands[5]))
17396 {
17397 tmp = operands[4];
17398 operands[4] = operands[5];
17399 operands[5] = tmp;
17400 }
17401 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
17402 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
17403 break;
17404
17405 case 3:
17406 if (REGNO (operands[4]) > REGNO (operands[5]))
17407 {
17408 tmp = operands[4];
17409 operands[4] = operands[5];
17410 operands[5] = tmp;
17411 }
17412 if (REGNO (operands[5]) > REGNO (operands[6]))
17413 {
17414 tmp = operands[5];
17415 operands[5] = operands[6];
17416 operands[6] = tmp;
17417 }
17418 if (REGNO (operands[4]) > REGNO (operands[5]))
17419 {
17420 tmp = operands[4];
17421 operands[4] = operands[5];
17422 operands[5] = tmp;
17423 }
17424
17425 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
17426 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
17427 break;
17428
17429 default:
17430 gcc_unreachable ();
17431 }
17432
17433 return "";
17434 }
17435
17436 /* Output a call-via instruction for thumb state. */
17437 const char *
17438 thumb_call_via_reg (rtx reg)
17439 {
17440 int regno = REGNO (reg);
17441 rtx *labelp;
17442
17443 gcc_assert (regno < LR_REGNUM);
17444
17445 /* If we are in the normal text section we can use a single instance
17446 per compilation unit. If we are doing function sections, then we need
17447 an entry per section, since we can't rely on reachability. */
17448 if (in_section == text_section)
17449 {
17450 thumb_call_reg_needed = 1;
17451
17452 if (thumb_call_via_label[regno] == NULL)
17453 thumb_call_via_label[regno] = gen_label_rtx ();
17454 labelp = thumb_call_via_label + regno;
17455 }
17456 else
17457 {
17458 if (cfun->machine->call_via[regno] == NULL)
17459 cfun->machine->call_via[regno] = gen_label_rtx ();
17460 labelp = cfun->machine->call_via + regno;
17461 }
17462
17463 output_asm_insn ("bl\t%a0", labelp);
17464 return "";
17465 }
17466
17467 /* Routines for generating rtl. */
17468 void
17469 thumb_expand_movmemqi (rtx *operands)
17470 {
17471 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
17472 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
17473 HOST_WIDE_INT len = INTVAL (operands[2]);
17474 HOST_WIDE_INT offset = 0;
17475
17476 while (len >= 12)
17477 {
17478 emit_insn (gen_movmem12b (out, in, out, in));
17479 len -= 12;
17480 }
17481
17482 if (len >= 8)
17483 {
17484 emit_insn (gen_movmem8b (out, in, out, in));
17485 len -= 8;
17486 }
17487
17488 if (len >= 4)
17489 {
17490 rtx reg = gen_reg_rtx (SImode);
17491 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
17492 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
17493 len -= 4;
17494 offset += 4;
17495 }
17496
17497 if (len >= 2)
17498 {
17499 rtx reg = gen_reg_rtx (HImode);
17500 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
17501 plus_constant (in, offset))));
17502 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
17503 reg));
17504 len -= 2;
17505 offset += 2;
17506 }
17507
17508 if (len)
17509 {
17510 rtx reg = gen_reg_rtx (QImode);
17511 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
17512 plus_constant (in, offset))));
17513 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
17514 reg));
17515 }
17516 }
17517
17518 void
17519 thumb_reload_out_hi (rtx *operands)
17520 {
17521 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
17522 }
17523
17524 /* Handle reading a half-word from memory during reload. */
17525 void
17526 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
17527 {
17528 gcc_unreachable ();
17529 }
17530
17531 /* Return the length of a function name prefix
17532 that starts with the character 'c'. */
17533 static int
17534 arm_get_strip_length (int c)
17535 {
17536 switch (c)
17537 {
17538 ARM_NAME_ENCODING_LENGTHS
17539 default: return 0;
17540 }
17541 }
17542
17543 /* Return a pointer to a function's name with any
17544 and all prefix encodings stripped from it. */
17545 const char *
17546 arm_strip_name_encoding (const char *name)
17547 {
17548 int skip;
17549
17550 while ((skip = arm_get_strip_length (* name)))
17551 name += skip;
17552
17553 return name;
17554 }
17555
17556 /* If there is a '*' anywhere in the name's prefix, then
17557 emit the stripped name verbatim, otherwise prepend an
17558 underscore if leading underscores are being used. */
17559 void
17560 arm_asm_output_labelref (FILE *stream, const char *name)
17561 {
17562 int skip;
17563 int verbatim = 0;
17564
17565 while ((skip = arm_get_strip_length (* name)))
17566 {
17567 verbatim |= (*name == '*');
17568 name += skip;
17569 }
17570
17571 if (verbatim)
17572 fputs (name, stream);
17573 else
17574 asm_fprintf (stream, "%U%s", name);
17575 }
17576
17577 static void
17578 arm_file_start (void)
17579 {
17580 int val;
17581
17582 if (TARGET_UNIFIED_ASM)
17583 asm_fprintf (asm_out_file, "\t.syntax unified\n");
17584
17585 if (TARGET_BPABI)
17586 {
17587 const char *fpu_name;
17588 if (arm_select[0].string)
17589 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
17590 else if (arm_select[1].string)
17591 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
17592 else
17593 asm_fprintf (asm_out_file, "\t.cpu %s\n",
17594 all_cores[arm_default_cpu].name);
17595
17596 if (TARGET_SOFT_FLOAT)
17597 {
17598 if (TARGET_VFP)
17599 fpu_name = "softvfp";
17600 else
17601 fpu_name = "softfpa";
17602 }
17603 else
17604 {
17605 int set_float_abi_attributes = 0;
17606 switch (arm_fpu_arch)
17607 {
17608 case FPUTYPE_FPA:
17609 fpu_name = "fpa";
17610 break;
17611 case FPUTYPE_FPA_EMU2:
17612 fpu_name = "fpe2";
17613 break;
17614 case FPUTYPE_FPA_EMU3:
17615 fpu_name = "fpe3";
17616 break;
17617 case FPUTYPE_MAVERICK:
17618 fpu_name = "maverick";
17619 break;
17620 case FPUTYPE_VFP:
17621 fpu_name = "vfp";
17622 set_float_abi_attributes = 1;
17623 break;
17624 case FPUTYPE_VFP3:
17625 fpu_name = "vfp3";
17626 set_float_abi_attributes = 1;
17627 break;
17628 case FPUTYPE_NEON:
17629 fpu_name = "neon";
17630 set_float_abi_attributes = 1;
17631 break;
17632 default:
17633 abort();
17634 }
17635 if (set_float_abi_attributes)
17636 {
17637 if (TARGET_HARD_FLOAT)
17638 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
17639 if (TARGET_HARD_FLOAT_ABI)
17640 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
17641 }
17642 }
17643 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
17644
17645 /* Some of these attributes only apply when the corresponding features
17646 are used. However we don't have any easy way of figuring this out.
17647 Conservatively record the setting that would have been used. */
17648
17649 /* Tag_ABI_FP_rounding. */
17650 if (flag_rounding_math)
17651 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
17652 if (!flag_unsafe_math_optimizations)
17653 {
17654 /* Tag_ABI_FP_denomal. */
17655 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
17656 /* Tag_ABI_FP_exceptions. */
17657 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
17658 }
17659 /* Tag_ABI_FP_user_exceptions. */
17660 if (flag_signaling_nans)
17661 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
17662 /* Tag_ABI_FP_number_model. */
17663 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
17664 flag_finite_math_only ? 1 : 3);
17665
17666 /* Tag_ABI_align8_needed. */
17667 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
17668 /* Tag_ABI_align8_preserved. */
17669 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
17670 /* Tag_ABI_enum_size. */
17671 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
17672 flag_short_enums ? 1 : 2);
17673
17674 /* Tag_ABI_optimization_goals. */
17675 if (optimize_size)
17676 val = 4;
17677 else if (optimize >= 2)
17678 val = 2;
17679 else if (optimize)
17680 val = 1;
17681 else
17682 val = 6;
17683 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
17684
17685 if (arm_lang_output_object_attributes_hook)
17686 arm_lang_output_object_attributes_hook();
17687 }
17688 default_file_start();
17689 }
17690
17691 static void
17692 arm_file_end (void)
17693 {
17694 int regno;
17695
17696 if (NEED_INDICATE_EXEC_STACK)
17697 /* Add .note.GNU-stack. */
17698 file_end_indicate_exec_stack ();
17699
17700 if (! thumb_call_reg_needed)
17701 return;
17702
17703 switch_to_section (text_section);
17704 asm_fprintf (asm_out_file, "\t.code 16\n");
17705 ASM_OUTPUT_ALIGN (asm_out_file, 1);
17706
17707 for (regno = 0; regno < LR_REGNUM; regno++)
17708 {
17709 rtx label = thumb_call_via_label[regno];
17710
17711 if (label != 0)
17712 {
17713 targetm.asm_out.internal_label (asm_out_file, "L",
17714 CODE_LABEL_NUMBER (label));
17715 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
17716 }
17717 }
17718 }
17719
17720 #ifndef ARM_PE
17721 /* Symbols in the text segment can be accessed without indirecting via the
17722 constant pool; it may take an extra binary operation, but this is still
17723 faster than indirecting via memory. Don't do this when not optimizing,
17724 since we won't be calculating al of the offsets necessary to do this
17725 simplification. */
17726
17727 static void
17728 arm_encode_section_info (tree decl, rtx rtl, int first)
17729 {
17730 if (optimize > 0 && TREE_CONSTANT (decl))
17731 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
17732
17733 default_encode_section_info (decl, rtl, first);
17734 }
17735 #endif /* !ARM_PE */
17736
17737 static void
17738 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
17739 {
17740 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
17741 && !strcmp (prefix, "L"))
17742 {
17743 arm_ccfsm_state = 0;
17744 arm_target_insn = NULL;
17745 }
17746 default_internal_label (stream, prefix, labelno);
17747 }
17748
17749 /* Output code to add DELTA to the first argument, and then jump
17750 to FUNCTION. Used for C++ multiple inheritance. */
17751 static void
17752 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
17753 HOST_WIDE_INT delta,
17754 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
17755 tree function)
17756 {
17757 static int thunk_label = 0;
17758 char label[256];
17759 char labelpc[256];
17760 int mi_delta = delta;
17761 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
17762 int shift = 0;
17763 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
17764 ? 1 : 0);
17765 if (mi_delta < 0)
17766 mi_delta = - mi_delta;
17767
17768 if (TARGET_THUMB1)
17769 {
17770 int labelno = thunk_label++;
17771 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
17772 /* Thunks are entered in arm mode when avaiable. */
17773 if (TARGET_THUMB1_ONLY)
17774 {
17775 /* push r3 so we can use it as a temporary. */
17776 /* TODO: Omit this save if r3 is not used. */
17777 fputs ("\tpush {r3}\n", file);
17778 fputs ("\tldr\tr3, ", file);
17779 }
17780 else
17781 {
17782 fputs ("\tldr\tr12, ", file);
17783 }
17784 assemble_name (file, label);
17785 fputc ('\n', file);
17786 if (flag_pic)
17787 {
17788 /* If we are generating PIC, the ldr instruction below loads
17789 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
17790 the address of the add + 8, so we have:
17791
17792 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
17793 = target + 1.
17794
17795 Note that we have "+ 1" because some versions of GNU ld
17796 don't set the low bit of the result for R_ARM_REL32
17797 relocations against thumb function symbols.
17798 On ARMv6M this is +4, not +8. */
17799 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
17800 assemble_name (file, labelpc);
17801 fputs (":\n", file);
17802 if (TARGET_THUMB1_ONLY)
17803 {
17804 /* This is 2 insns after the start of the thunk, so we know it
17805 is 4-byte aligned. */
17806 fputs ("\tadd\tr3, pc, r3\n", file);
17807 fputs ("\tmov r12, r3\n", file);
17808 }
17809 else
17810 fputs ("\tadd\tr12, pc, r12\n", file);
17811 }
17812 else if (TARGET_THUMB1_ONLY)
17813 fputs ("\tmov r12, r3\n", file);
17814 }
17815 if (TARGET_THUMB1_ONLY)
17816 {
17817 if (mi_delta > 255)
17818 {
17819 fputs ("\tldr\tr3, ", file);
17820 assemble_name (file, label);
17821 fputs ("+4\n", file);
17822 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
17823 mi_op, this_regno, this_regno);
17824 }
17825 else if (mi_delta != 0)
17826 {
17827 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
17828 mi_op, this_regno, this_regno,
17829 mi_delta);
17830 }
17831 }
17832 else
17833 {
17834 /* TODO: Use movw/movt for large constants when available. */
17835 while (mi_delta != 0)
17836 {
17837 if ((mi_delta & (3 << shift)) == 0)
17838 shift += 2;
17839 else
17840 {
17841 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
17842 mi_op, this_regno, this_regno,
17843 mi_delta & (0xff << shift));
17844 mi_delta &= ~(0xff << shift);
17845 shift += 8;
17846 }
17847 }
17848 }
17849 if (TARGET_THUMB1)
17850 {
17851 if (TARGET_THUMB1_ONLY)
17852 fputs ("\tpop\t{r3}\n", file);
17853
17854 fprintf (file, "\tbx\tr12\n");
17855 ASM_OUTPUT_ALIGN (file, 2);
17856 assemble_name (file, label);
17857 fputs (":\n", file);
17858 if (flag_pic)
17859 {
17860 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
17861 rtx tem = XEXP (DECL_RTL (function), 0);
17862 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
17863 tem = gen_rtx_MINUS (GET_MODE (tem),
17864 tem,
17865 gen_rtx_SYMBOL_REF (Pmode,
17866 ggc_strdup (labelpc)));
17867 assemble_integer (tem, 4, BITS_PER_WORD, 1);
17868 }
17869 else
17870 /* Output ".word .LTHUNKn". */
17871 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
17872
17873 if (TARGET_THUMB1_ONLY && mi_delta > 255)
17874 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
17875 }
17876 else
17877 {
17878 fputs ("\tb\t", file);
17879 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
17880 if (NEED_PLT_RELOC)
17881 fputs ("(PLT)", file);
17882 fputc ('\n', file);
17883 }
17884 }
17885
17886 int
17887 arm_emit_vector_const (FILE *file, rtx x)
17888 {
17889 int i;
17890 const char * pattern;
17891
17892 gcc_assert (GET_CODE (x) == CONST_VECTOR);
17893
17894 switch (GET_MODE (x))
17895 {
17896 case V2SImode: pattern = "%08x"; break;
17897 case V4HImode: pattern = "%04x"; break;
17898 case V8QImode: pattern = "%02x"; break;
17899 default: gcc_unreachable ();
17900 }
17901
17902 fprintf (file, "0x");
17903 for (i = CONST_VECTOR_NUNITS (x); i--;)
17904 {
17905 rtx element;
17906
17907 element = CONST_VECTOR_ELT (x, i);
17908 fprintf (file, pattern, INTVAL (element));
17909 }
17910
17911 return 1;
17912 }
17913
17914 const char *
17915 arm_output_load_gr (rtx *operands)
17916 {
17917 rtx reg;
17918 rtx offset;
17919 rtx wcgr;
17920 rtx sum;
17921
17922 if (GET_CODE (operands [1]) != MEM
17923 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
17924 || GET_CODE (reg = XEXP (sum, 0)) != REG
17925 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
17926 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
17927 return "wldrw%?\t%0, %1";
17928
17929 /* Fix up an out-of-range load of a GR register. */
17930 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
17931 wcgr = operands[0];
17932 operands[0] = reg;
17933 output_asm_insn ("ldr%?\t%0, %1", operands);
17934
17935 operands[0] = wcgr;
17936 operands[1] = reg;
17937 output_asm_insn ("tmcr%?\t%0, %1", operands);
17938 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
17939
17940 return "";
17941 }
17942
17943 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
17944
17945 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
17946 named arg and all anonymous args onto the stack.
17947 XXX I know the prologue shouldn't be pushing registers, but it is faster
17948 that way. */
17949
17950 static void
17951 arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
17952 enum machine_mode mode,
17953 tree type,
17954 int *pretend_size,
17955 int second_time ATTRIBUTE_UNUSED)
17956 {
17957 int nregs = cum->nregs;
17958 if (nregs & 1
17959 && ARM_DOUBLEWORD_ALIGN
17960 && arm_needs_doubleword_align (mode, type))
17961 nregs++;
17962
17963 cfun->machine->uses_anonymous_args = 1;
17964 if (nregs < NUM_ARG_REGS)
17965 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
17966 }
17967
17968 /* Return nonzero if the CONSUMER instruction (a store) does not need
17969 PRODUCER's value to calculate the address. */
17970
17971 int
17972 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
17973 {
17974 rtx value = PATTERN (producer);
17975 rtx addr = PATTERN (consumer);
17976
17977 if (GET_CODE (value) == COND_EXEC)
17978 value = COND_EXEC_CODE (value);
17979 if (GET_CODE (value) == PARALLEL)
17980 value = XVECEXP (value, 0, 0);
17981 value = XEXP (value, 0);
17982 if (GET_CODE (addr) == COND_EXEC)
17983 addr = COND_EXEC_CODE (addr);
17984 if (GET_CODE (addr) == PARALLEL)
17985 addr = XVECEXP (addr, 0, 0);
17986 addr = XEXP (addr, 0);
17987
17988 return !reg_overlap_mentioned_p (value, addr);
17989 }
17990
17991 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
17992 have an early register shift value or amount dependency on the
17993 result of PRODUCER. */
17994
17995 int
17996 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
17997 {
17998 rtx value = PATTERN (producer);
17999 rtx op = PATTERN (consumer);
18000 rtx early_op;
18001
18002 if (GET_CODE (value) == COND_EXEC)
18003 value = COND_EXEC_CODE (value);
18004 if (GET_CODE (value) == PARALLEL)
18005 value = XVECEXP (value, 0, 0);
18006 value = XEXP (value, 0);
18007 if (GET_CODE (op) == COND_EXEC)
18008 op = COND_EXEC_CODE (op);
18009 if (GET_CODE (op) == PARALLEL)
18010 op = XVECEXP (op, 0, 0);
18011 op = XEXP (op, 1);
18012
18013 early_op = XEXP (op, 0);
18014 /* This is either an actual independent shift, or a shift applied to
18015 the first operand of another operation. We want the whole shift
18016 operation. */
18017 if (GET_CODE (early_op) == REG)
18018 early_op = op;
18019
18020 return !reg_overlap_mentioned_p (value, early_op);
18021 }
18022
18023 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
18024 have an early register shift value dependency on the result of
18025 PRODUCER. */
18026
18027 int
18028 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
18029 {
18030 rtx value = PATTERN (producer);
18031 rtx op = PATTERN (consumer);
18032 rtx early_op;
18033
18034 if (GET_CODE (value) == COND_EXEC)
18035 value = COND_EXEC_CODE (value);
18036 if (GET_CODE (value) == PARALLEL)
18037 value = XVECEXP (value, 0, 0);
18038 value = XEXP (value, 0);
18039 if (GET_CODE (op) == COND_EXEC)
18040 op = COND_EXEC_CODE (op);
18041 if (GET_CODE (op) == PARALLEL)
18042 op = XVECEXP (op, 0, 0);
18043 op = XEXP (op, 1);
18044
18045 early_op = XEXP (op, 0);
18046
18047 /* This is either an actual independent shift, or a shift applied to
18048 the first operand of another operation. We want the value being
18049 shifted, in either case. */
18050 if (GET_CODE (early_op) != REG)
18051 early_op = XEXP (early_op, 0);
18052
18053 return !reg_overlap_mentioned_p (value, early_op);
18054 }
18055
18056 /* Return nonzero if the CONSUMER (a mul or mac op) does not
18057 have an early register mult dependency on the result of
18058 PRODUCER. */
18059
18060 int
18061 arm_no_early_mul_dep (rtx producer, rtx consumer)
18062 {
18063 rtx value = PATTERN (producer);
18064 rtx op = PATTERN (consumer);
18065
18066 if (GET_CODE (value) == COND_EXEC)
18067 value = COND_EXEC_CODE (value);
18068 if (GET_CODE (value) == PARALLEL)
18069 value = XVECEXP (value, 0, 0);
18070 value = XEXP (value, 0);
18071 if (GET_CODE (op) == COND_EXEC)
18072 op = COND_EXEC_CODE (op);
18073 if (GET_CODE (op) == PARALLEL)
18074 op = XVECEXP (op, 0, 0);
18075 op = XEXP (op, 1);
18076
18077 return (GET_CODE (op) == PLUS
18078 && !reg_overlap_mentioned_p (value, XEXP (op, 0)));
18079 }
18080
18081 /* We can't rely on the caller doing the proper promotion when
18082 using APCS or ATPCS. */
18083
18084 static bool
18085 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
18086 {
18087 return !TARGET_AAPCS_BASED;
18088 }
18089
18090
18091 /* AAPCS based ABIs use short enums by default. */
18092
18093 static bool
18094 arm_default_short_enums (void)
18095 {
18096 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
18097 }
18098
18099
18100 /* AAPCS requires that anonymous bitfields affect structure alignment. */
18101
18102 static bool
18103 arm_align_anon_bitfield (void)
18104 {
18105 return TARGET_AAPCS_BASED;
18106 }
18107
18108
18109 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
18110
18111 static tree
18112 arm_cxx_guard_type (void)
18113 {
18114 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
18115 }
18116
18117 /* Return non-zero if the consumer (a multiply-accumulate instruction)
18118 has an accumulator dependency on the result of the producer (a
18119 multiplication instruction) and no other dependency on that result. */
18120 int
18121 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
18122 {
18123 rtx mul = PATTERN (producer);
18124 rtx mac = PATTERN (consumer);
18125 rtx mul_result;
18126 rtx mac_op0, mac_op1, mac_acc;
18127
18128 if (GET_CODE (mul) == COND_EXEC)
18129 mul = COND_EXEC_CODE (mul);
18130 if (GET_CODE (mac) == COND_EXEC)
18131 mac = COND_EXEC_CODE (mac);
18132
18133 /* Check that mul is of the form (set (...) (mult ...))
18134 and mla is of the form (set (...) (plus (mult ...) (...))). */
18135 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
18136 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
18137 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
18138 return 0;
18139
18140 mul_result = XEXP (mul, 0);
18141 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
18142 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
18143 mac_acc = XEXP (XEXP (mac, 1), 1);
18144
18145 return (reg_overlap_mentioned_p (mul_result, mac_acc)
18146 && !reg_overlap_mentioned_p (mul_result, mac_op0)
18147 && !reg_overlap_mentioned_p (mul_result, mac_op1));
18148 }
18149
18150
18151 /* The EABI says test the least significant bit of a guard variable. */
18152
18153 static bool
18154 arm_cxx_guard_mask_bit (void)
18155 {
18156 return TARGET_AAPCS_BASED;
18157 }
18158
18159
18160 /* The EABI specifies that all array cookies are 8 bytes long. */
18161
18162 static tree
18163 arm_get_cookie_size (tree type)
18164 {
18165 tree size;
18166
18167 if (!TARGET_AAPCS_BASED)
18168 return default_cxx_get_cookie_size (type);
18169
18170 size = build_int_cst (sizetype, 8);
18171 return size;
18172 }
18173
18174
18175 /* The EABI says that array cookies should also contain the element size. */
18176
18177 static bool
18178 arm_cookie_has_size (void)
18179 {
18180 return TARGET_AAPCS_BASED;
18181 }
18182
18183
18184 /* The EABI says constructors and destructors should return a pointer to
18185 the object constructed/destroyed. */
18186
18187 static bool
18188 arm_cxx_cdtor_returns_this (void)
18189 {
18190 return TARGET_AAPCS_BASED;
18191 }
18192
18193 /* The EABI says that an inline function may never be the key
18194 method. */
18195
18196 static bool
18197 arm_cxx_key_method_may_be_inline (void)
18198 {
18199 return !TARGET_AAPCS_BASED;
18200 }
18201
18202 static void
18203 arm_cxx_determine_class_data_visibility (tree decl)
18204 {
18205 if (!TARGET_AAPCS_BASED)
18206 return;
18207
18208 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
18209 is exported. However, on systems without dynamic vague linkage,
18210 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
18211 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
18212 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
18213 else
18214 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
18215 DECL_VISIBILITY_SPECIFIED (decl) = 1;
18216 }
18217
18218 static bool
18219 arm_cxx_class_data_always_comdat (void)
18220 {
18221 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
18222 vague linkage if the class has no key function. */
18223 return !TARGET_AAPCS_BASED;
18224 }
18225
18226
18227 /* The EABI says __aeabi_atexit should be used to register static
18228 destructors. */
18229
18230 static bool
18231 arm_cxx_use_aeabi_atexit (void)
18232 {
18233 return TARGET_AAPCS_BASED;
18234 }
18235
18236
18237 void
18238 arm_set_return_address (rtx source, rtx scratch)
18239 {
18240 arm_stack_offsets *offsets;
18241 HOST_WIDE_INT delta;
18242 rtx addr;
18243 unsigned long saved_regs;
18244
18245 offsets = arm_get_frame_offsets ();
18246 saved_regs = offsets->saved_regs_mask;
18247
18248 if ((saved_regs & (1 << LR_REGNUM)) == 0)
18249 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
18250 else
18251 {
18252 if (frame_pointer_needed)
18253 addr = plus_constant(hard_frame_pointer_rtx, -4);
18254 else
18255 {
18256 /* LR will be the first saved register. */
18257 delta = offsets->outgoing_args - (offsets->frame + 4);
18258
18259
18260 if (delta >= 4096)
18261 {
18262 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
18263 GEN_INT (delta & ~4095)));
18264 addr = scratch;
18265 delta &= 4095;
18266 }
18267 else
18268 addr = stack_pointer_rtx;
18269
18270 addr = plus_constant (addr, delta);
18271 }
18272 emit_move_insn (gen_frame_mem (Pmode, addr), source);
18273 }
18274 }
18275
18276
18277 void
18278 thumb_set_return_address (rtx source, rtx scratch)
18279 {
18280 arm_stack_offsets *offsets;
18281 HOST_WIDE_INT delta;
18282 HOST_WIDE_INT limit;
18283 int reg;
18284 rtx addr;
18285 unsigned long mask;
18286
18287 emit_insn (gen_rtx_USE (VOIDmode, source));
18288
18289 offsets = arm_get_frame_offsets ();
18290 mask = offsets->saved_regs_mask;
18291 if (mask & (1 << LR_REGNUM))
18292 {
18293 limit = 1024;
18294 /* Find the saved regs. */
18295 if (frame_pointer_needed)
18296 {
18297 delta = offsets->soft_frame - offsets->saved_args;
18298 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
18299 if (TARGET_THUMB1)
18300 limit = 128;
18301 }
18302 else
18303 {
18304 delta = offsets->outgoing_args - offsets->saved_args;
18305 reg = SP_REGNUM;
18306 }
18307 /* Allow for the stack frame. */
18308 if (TARGET_THUMB1 && TARGET_BACKTRACE)
18309 delta -= 16;
18310 /* The link register is always the first saved register. */
18311 delta -= 4;
18312
18313 /* Construct the address. */
18314 addr = gen_rtx_REG (SImode, reg);
18315 if (delta > limit)
18316 {
18317 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
18318 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
18319 addr = scratch;
18320 }
18321 else
18322 addr = plus_constant (addr, delta);
18323
18324 emit_move_insn (gen_frame_mem (Pmode, addr), source);
18325 }
18326 else
18327 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
18328 }
18329
18330 /* Implements target hook vector_mode_supported_p. */
18331 bool
18332 arm_vector_mode_supported_p (enum machine_mode mode)
18333 {
18334 /* Neon also supports V2SImode, etc. listed in the clause below. */
18335 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
18336 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
18337 return true;
18338
18339 if ((mode == V2SImode)
18340 || (mode == V4HImode)
18341 || (mode == V8QImode))
18342 return true;
18343
18344 return false;
18345 }
18346
18347 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
18348 ARM insns and therefore guarantee that the shift count is modulo 256.
18349 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
18350 guarantee no particular behavior for out-of-range counts. */
18351
18352 static unsigned HOST_WIDE_INT
18353 arm_shift_truncation_mask (enum machine_mode mode)
18354 {
18355 return mode == SImode ? 255 : 0;
18356 }
18357
18358
18359 /* Map internal gcc register numbers to DWARF2 register numbers. */
18360
18361 unsigned int
18362 arm_dbx_register_number (unsigned int regno)
18363 {
18364 if (regno < 16)
18365 return regno;
18366
18367 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
18368 compatibility. The EABI defines them as registers 96-103. */
18369 if (IS_FPA_REGNUM (regno))
18370 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
18371
18372 /* FIXME: VFPv3 register numbering. */
18373 if (IS_VFP_REGNUM (regno))
18374 return 64 + regno - FIRST_VFP_REGNUM;
18375
18376 if (IS_IWMMXT_GR_REGNUM (regno))
18377 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
18378
18379 if (IS_IWMMXT_REGNUM (regno))
18380 return 112 + regno - FIRST_IWMMXT_REGNUM;
18381
18382 gcc_unreachable ();
18383 }
18384
18385
18386 #ifdef TARGET_UNWIND_INFO
18387 /* Emit unwind directives for a store-multiple instruction or stack pointer
18388 push during alignment.
18389 These should only ever be generated by the function prologue code, so
18390 expect them to have a particular form. */
18391
18392 static void
18393 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
18394 {
18395 int i;
18396 HOST_WIDE_INT offset;
18397 HOST_WIDE_INT nregs;
18398 int reg_size;
18399 unsigned reg;
18400 unsigned lastreg;
18401 rtx e;
18402
18403 e = XVECEXP (p, 0, 0);
18404 if (GET_CODE (e) != SET)
18405 abort ();
18406
18407 /* First insn will adjust the stack pointer. */
18408 if (GET_CODE (e) != SET
18409 || GET_CODE (XEXP (e, 0)) != REG
18410 || REGNO (XEXP (e, 0)) != SP_REGNUM
18411 || GET_CODE (XEXP (e, 1)) != PLUS)
18412 abort ();
18413
18414 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
18415 nregs = XVECLEN (p, 0) - 1;
18416
18417 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
18418 if (reg < 16)
18419 {
18420 /* The function prologue may also push pc, but not annotate it as it is
18421 never restored. We turn this into a stack pointer adjustment. */
18422 if (nregs * 4 == offset - 4)
18423 {
18424 fprintf (asm_out_file, "\t.pad #4\n");
18425 offset -= 4;
18426 }
18427 reg_size = 4;
18428 fprintf (asm_out_file, "\t.save {");
18429 }
18430 else if (IS_VFP_REGNUM (reg))
18431 {
18432 reg_size = 8;
18433 fprintf (asm_out_file, "\t.vsave {");
18434 }
18435 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
18436 {
18437 /* FPA registers are done differently. */
18438 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
18439 return;
18440 }
18441 else
18442 /* Unknown register type. */
18443 abort ();
18444
18445 /* If the stack increment doesn't match the size of the saved registers,
18446 something has gone horribly wrong. */
18447 if (offset != nregs * reg_size)
18448 abort ();
18449
18450 offset = 0;
18451 lastreg = 0;
18452 /* The remaining insns will describe the stores. */
18453 for (i = 1; i <= nregs; i++)
18454 {
18455 /* Expect (set (mem <addr>) (reg)).
18456 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
18457 e = XVECEXP (p, 0, i);
18458 if (GET_CODE (e) != SET
18459 || GET_CODE (XEXP (e, 0)) != MEM
18460 || GET_CODE (XEXP (e, 1)) != REG)
18461 abort ();
18462
18463 reg = REGNO (XEXP (e, 1));
18464 if (reg < lastreg)
18465 abort ();
18466
18467 if (i != 1)
18468 fprintf (asm_out_file, ", ");
18469 /* We can't use %r for vfp because we need to use the
18470 double precision register names. */
18471 if (IS_VFP_REGNUM (reg))
18472 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
18473 else
18474 asm_fprintf (asm_out_file, "%r", reg);
18475
18476 #ifdef ENABLE_CHECKING
18477 /* Check that the addresses are consecutive. */
18478 e = XEXP (XEXP (e, 0), 0);
18479 if (GET_CODE (e) == PLUS)
18480 {
18481 offset += reg_size;
18482 if (GET_CODE (XEXP (e, 0)) != REG
18483 || REGNO (XEXP (e, 0)) != SP_REGNUM
18484 || GET_CODE (XEXP (e, 1)) != CONST_INT
18485 || offset != INTVAL (XEXP (e, 1)))
18486 abort ();
18487 }
18488 else if (i != 1
18489 || GET_CODE (e) != REG
18490 || REGNO (e) != SP_REGNUM)
18491 abort ();
18492 #endif
18493 }
18494 fprintf (asm_out_file, "}\n");
18495 }
18496
18497 /* Emit unwind directives for a SET. */
18498
18499 static void
18500 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
18501 {
18502 rtx e0;
18503 rtx e1;
18504 unsigned reg;
18505
18506 e0 = XEXP (p, 0);
18507 e1 = XEXP (p, 1);
18508 switch (GET_CODE (e0))
18509 {
18510 case MEM:
18511 /* Pushing a single register. */
18512 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
18513 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
18514 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
18515 abort ();
18516
18517 asm_fprintf (asm_out_file, "\t.save ");
18518 if (IS_VFP_REGNUM (REGNO (e1)))
18519 asm_fprintf(asm_out_file, "{d%d}\n",
18520 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
18521 else
18522 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
18523 break;
18524
18525 case REG:
18526 if (REGNO (e0) == SP_REGNUM)
18527 {
18528 /* A stack increment. */
18529 if (GET_CODE (e1) != PLUS
18530 || GET_CODE (XEXP (e1, 0)) != REG
18531 || REGNO (XEXP (e1, 0)) != SP_REGNUM
18532 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
18533 abort ();
18534
18535 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
18536 -INTVAL (XEXP (e1, 1)));
18537 }
18538 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
18539 {
18540 HOST_WIDE_INT offset;
18541
18542 if (GET_CODE (e1) == PLUS)
18543 {
18544 if (GET_CODE (XEXP (e1, 0)) != REG
18545 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
18546 abort ();
18547 reg = REGNO (XEXP (e1, 0));
18548 offset = INTVAL (XEXP (e1, 1));
18549 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
18550 HARD_FRAME_POINTER_REGNUM, reg,
18551 INTVAL (XEXP (e1, 1)));
18552 }
18553 else if (GET_CODE (e1) == REG)
18554 {
18555 reg = REGNO (e1);
18556 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
18557 HARD_FRAME_POINTER_REGNUM, reg);
18558 }
18559 else
18560 abort ();
18561 }
18562 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
18563 {
18564 /* Move from sp to reg. */
18565 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
18566 }
18567 else if (GET_CODE (e1) == PLUS
18568 && GET_CODE (XEXP (e1, 0)) == REG
18569 && REGNO (XEXP (e1, 0)) == SP_REGNUM
18570 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
18571 {
18572 /* Set reg to offset from sp. */
18573 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
18574 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
18575 }
18576 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
18577 {
18578 /* Stack pointer save before alignment. */
18579 reg = REGNO (e0);
18580 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
18581 reg + 0x90, reg);
18582 }
18583 else
18584 abort ();
18585 break;
18586
18587 default:
18588 abort ();
18589 }
18590 }
18591
18592
18593 /* Emit unwind directives for the given insn. */
18594
18595 static void
18596 arm_unwind_emit (FILE * asm_out_file, rtx insn)
18597 {
18598 rtx pat;
18599
18600 if (!ARM_EABI_UNWIND_TABLES)
18601 return;
18602
18603 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
18604 return;
18605
18606 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
18607 if (pat)
18608 pat = XEXP (pat, 0);
18609 else
18610 pat = PATTERN (insn);
18611
18612 switch (GET_CODE (pat))
18613 {
18614 case SET:
18615 arm_unwind_emit_set (asm_out_file, pat);
18616 break;
18617
18618 case SEQUENCE:
18619 /* Store multiple. */
18620 arm_unwind_emit_sequence (asm_out_file, pat);
18621 break;
18622
18623 default:
18624 abort();
18625 }
18626 }
18627
18628
18629 /* Output a reference from a function exception table to the type_info
18630 object X. The EABI specifies that the symbol should be relocated by
18631 an R_ARM_TARGET2 relocation. */
18632
18633 static bool
18634 arm_output_ttype (rtx x)
18635 {
18636 fputs ("\t.word\t", asm_out_file);
18637 output_addr_const (asm_out_file, x);
18638 /* Use special relocations for symbol references. */
18639 if (GET_CODE (x) != CONST_INT)
18640 fputs ("(TARGET2)", asm_out_file);
18641 fputc ('\n', asm_out_file);
18642
18643 return TRUE;
18644 }
18645 #endif /* TARGET_UNWIND_INFO */
18646
18647
18648 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
18649 stack alignment. */
18650
18651 static void
18652 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
18653 {
18654 rtx unspec = SET_SRC (pattern);
18655 gcc_assert (GET_CODE (unspec) == UNSPEC);
18656
18657 switch (index)
18658 {
18659 case UNSPEC_STACK_ALIGN:
18660 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
18661 put anything on the stack, so hopefully it won't matter.
18662 CFA = SP will be correct after alignment. */
18663 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
18664 SET_DEST (pattern));
18665 break;
18666 default:
18667 gcc_unreachable ();
18668 }
18669 }
18670
18671
18672 /* Output unwind directives for the start/end of a function. */
18673
18674 void
18675 arm_output_fn_unwind (FILE * f, bool prologue)
18676 {
18677 if (!ARM_EABI_UNWIND_TABLES)
18678 return;
18679
18680 if (prologue)
18681 fputs ("\t.fnstart\n", f);
18682 else
18683 fputs ("\t.fnend\n", f);
18684 }
18685
18686 static bool
18687 arm_emit_tls_decoration (FILE *fp, rtx x)
18688 {
18689 enum tls_reloc reloc;
18690 rtx val;
18691
18692 val = XVECEXP (x, 0, 0);
18693 reloc = INTVAL (XVECEXP (x, 0, 1));
18694
18695 output_addr_const (fp, val);
18696
18697 switch (reloc)
18698 {
18699 case TLS_GD32:
18700 fputs ("(tlsgd)", fp);
18701 break;
18702 case TLS_LDM32:
18703 fputs ("(tlsldm)", fp);
18704 break;
18705 case TLS_LDO32:
18706 fputs ("(tlsldo)", fp);
18707 break;
18708 case TLS_IE32:
18709 fputs ("(gottpoff)", fp);
18710 break;
18711 case TLS_LE32:
18712 fputs ("(tpoff)", fp);
18713 break;
18714 default:
18715 gcc_unreachable ();
18716 }
18717
18718 switch (reloc)
18719 {
18720 case TLS_GD32:
18721 case TLS_LDM32:
18722 case TLS_IE32:
18723 fputs (" + (. - ", fp);
18724 output_addr_const (fp, XVECEXP (x, 0, 2));
18725 fputs (" - ", fp);
18726 output_addr_const (fp, XVECEXP (x, 0, 3));
18727 fputc (')', fp);
18728 break;
18729 default:
18730 break;
18731 }
18732
18733 return TRUE;
18734 }
18735
18736 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
18737
18738 static void
18739 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
18740 {
18741 gcc_assert (size == 4);
18742 fputs ("\t.word\t", file);
18743 output_addr_const (file, x);
18744 fputs ("(tlsldo)", file);
18745 }
18746
18747 bool
18748 arm_output_addr_const_extra (FILE *fp, rtx x)
18749 {
18750 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
18751 return arm_emit_tls_decoration (fp, x);
18752 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
18753 {
18754 char label[256];
18755 int labelno = INTVAL (XVECEXP (x, 0, 0));
18756
18757 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
18758 assemble_name_raw (fp, label);
18759
18760 return TRUE;
18761 }
18762 else if (GET_CODE (x) == CONST_VECTOR)
18763 return arm_emit_vector_const (fp, x);
18764
18765 return FALSE;
18766 }
18767
18768 /* Output assembly for a shift instruction.
18769 SET_FLAGS determines how the instruction modifies the condition codes.
18770 0 - Do not set condition codes.
18771 1 - Set condition codes.
18772 2 - Use smallest instruction. */
18773 const char *
18774 arm_output_shift(rtx * operands, int set_flags)
18775 {
18776 char pattern[100];
18777 static const char flag_chars[3] = {'?', '.', '!'};
18778 const char *shift;
18779 HOST_WIDE_INT val;
18780 char c;
18781
18782 c = flag_chars[set_flags];
18783 if (TARGET_UNIFIED_ASM)
18784 {
18785 shift = shift_op(operands[3], &val);
18786 if (shift)
18787 {
18788 if (val != -1)
18789 operands[2] = GEN_INT(val);
18790 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
18791 }
18792 else
18793 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
18794 }
18795 else
18796 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
18797 output_asm_insn (pattern, operands);
18798 return "";
18799 }
18800
18801 /* Output a Thumb-2 casesi instruction. */
18802 const char *
18803 thumb2_output_casesi (rtx *operands)
18804 {
18805 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
18806
18807 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
18808
18809 output_asm_insn ("cmp\t%0, %1", operands);
18810 output_asm_insn ("bhi\t%l3", operands);
18811 switch (GET_MODE(diff_vec))
18812 {
18813 case QImode:
18814 return "tbb\t[%|pc, %0]";
18815 case HImode:
18816 return "tbh\t[%|pc, %0, lsl #1]";
18817 case SImode:
18818 if (flag_pic)
18819 {
18820 output_asm_insn ("adr\t%4, %l2", operands);
18821 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
18822 output_asm_insn ("add\t%4, %4, %5", operands);
18823 return "bx\t%4";
18824 }
18825 else
18826 {
18827 output_asm_insn ("adr\t%4, %l2", operands);
18828 return "ldr\t%|pc, [%4, %0, lsl #2]";
18829 }
18830 default:
18831 gcc_unreachable ();
18832 }
18833 }
18834
18835 /* Most ARM cores are single issue, but some newer ones can dual issue.
18836 The scheduler descriptions rely on this being correct. */
18837 static int
18838 arm_issue_rate (void)
18839 {
18840 switch (arm_tune)
18841 {
18842 case cortexr4:
18843 case cortexa8:
18844 return 2;
18845
18846 default:
18847 return 1;
18848 }
18849 }
18850
18851 /* A table and a function to perform ARM-specific name mangling for
18852 NEON vector types in order to conform to the AAPCS (see "Procedure
18853 Call Standard for the ARM Architecture", Appendix A). To qualify
18854 for emission with the mangled names defined in that document, a
18855 vector type must not only be of the correct mode but also be
18856 composed of NEON vector element types (e.g. __builtin_neon_qi). */
18857 typedef struct
18858 {
18859 enum machine_mode mode;
18860 const char *element_type_name;
18861 const char *aapcs_name;
18862 } arm_mangle_map_entry;
18863
18864 static arm_mangle_map_entry arm_mangle_map[] = {
18865 /* 64-bit containerized types. */
18866 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
18867 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
18868 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
18869 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
18870 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
18871 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
18872 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
18873 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
18874 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
18875 /* 128-bit containerized types. */
18876 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
18877 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
18878 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
18879 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
18880 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
18881 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
18882 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
18883 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
18884 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
18885 { VOIDmode, NULL, NULL }
18886 };
18887
18888 const char *
18889 arm_mangle_type (const_tree type)
18890 {
18891 arm_mangle_map_entry *pos = arm_mangle_map;
18892
18893 if (TREE_CODE (type) != VECTOR_TYPE)
18894 return NULL;
18895
18896 /* Check the mode of the vector type, and the name of the vector
18897 element type, against the table. */
18898 while (pos->mode != VOIDmode)
18899 {
18900 tree elt_type = TREE_TYPE (type);
18901
18902 if (pos->mode == TYPE_MODE (type)
18903 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
18904 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
18905 pos->element_type_name))
18906 return pos->aapcs_name;
18907
18908 pos++;
18909 }
18910
18911 /* Use the default mangling for unrecognized (possibly user-defined)
18912 vector types. */
18913 return NULL;
18914 }
18915
18916 #include "gt-arm.h"