6ec0f8c648a4b73adffa445e49a26557809ae8b0
[gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
5 and Martin Simmons (@harleqn.co.uk).
6 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published
12 by the Free Software Foundation; either version 3, or (at your
13 option) any later version.
14
15 GCC is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
18 License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "obstack.h"
31 #include "regs.h"
32 #include "hard-reg-set.h"
33 #include "real.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "flags.h"
39 #include "reload.h"
40 #include "function.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "recog.h"
45 #include "ggc.h"
46 #include "except.h"
47 #include "c-pragma.h"
48 #include "integrate.h"
49 #include "tm_p.h"
50 #include "target.h"
51 #include "target-def.h"
52 #include "debug.h"
53 #include "langhooks.h"
54 #include "df.h"
55
56 /* Forward definitions of types. */
57 typedef struct minipool_node Mnode;
58 typedef struct minipool_fixup Mfix;
59
60 const struct attribute_spec arm_attribute_table[];
61
62 /* Forward function declarations. */
63 static arm_stack_offsets *arm_get_frame_offsets (void);
64 static void arm_add_gc_roots (void);
65 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
66 HOST_WIDE_INT, rtx, rtx, int, int);
67 static unsigned bit_count (unsigned long);
68 static int arm_address_register_rtx_p (rtx, int);
69 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
70 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
71 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
72 inline static int thumb1_index_register_rtx_p (rtx, int);
73 static int thumb_far_jump_used_p (void);
74 static bool thumb_force_lr_save (void);
75 static unsigned long thumb1_compute_save_reg_mask (void);
76 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
77 static rtx emit_sfm (int, int);
78 static int arm_size_return_regs (void);
79 #ifndef AOF_ASSEMBLER
80 static bool arm_assemble_integer (rtx, unsigned int, int);
81 #endif
82 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
83 static arm_cc get_arm_condition_code (rtx);
84 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
85 static rtx is_jump_table (rtx);
86 static const char *output_multi_immediate (rtx *, const char *, const char *,
87 int, HOST_WIDE_INT);
88 static const char *shift_op (rtx, HOST_WIDE_INT *);
89 static struct machine_function *arm_init_machine_status (void);
90 static void thumb_exit (FILE *, int);
91 static rtx is_jump_table (rtx);
92 static HOST_WIDE_INT get_jump_table_size (rtx);
93 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
94 static Mnode *add_minipool_forward_ref (Mfix *);
95 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
96 static Mnode *add_minipool_backward_ref (Mfix *);
97 static void assign_minipool_offsets (Mfix *);
98 static void arm_print_value (FILE *, rtx);
99 static void dump_minipool (rtx);
100 static int arm_barrier_cost (rtx);
101 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
102 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
103 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
104 rtx);
105 static void arm_reorg (void);
106 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
107 static unsigned long arm_compute_save_reg0_reg12_mask (void);
108 static unsigned long arm_compute_save_reg_mask (void);
109 static unsigned long arm_isr_value (tree);
110 static unsigned long arm_compute_func_type (void);
111 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
112 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
113 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
114 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
115 #endif
116 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
117 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
118 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
119 static int arm_comp_type_attributes (tree, tree);
120 static void arm_set_default_type_attributes (tree);
121 static int arm_adjust_cost (rtx, rtx, rtx, int);
122 static int count_insns_for_constant (HOST_WIDE_INT, int);
123 static int arm_get_strip_length (int);
124 static bool arm_function_ok_for_sibcall (tree, tree);
125 static void arm_internal_label (FILE *, const char *, unsigned long);
126 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
127 tree);
128 static int arm_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code);
129 static bool arm_size_rtx_costs (rtx, int, int, int *);
130 static bool arm_slowmul_rtx_costs (rtx, int, int, int *);
131 static bool arm_fastmul_rtx_costs (rtx, int, int, int *);
132 static bool arm_xscale_rtx_costs (rtx, int, int, int *);
133 static bool arm_9e_rtx_costs (rtx, int, int, int *);
134 static int arm_address_cost (rtx);
135 static bool arm_memory_load_p (rtx);
136 static bool arm_cirrus_insn_p (rtx);
137 static void cirrus_reorg (rtx);
138 static void arm_init_builtins (void);
139 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
140 static void arm_init_iwmmxt_builtins (void);
141 static rtx safe_vector_operand (rtx, enum machine_mode);
142 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
143 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
144 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
145 static void emit_constant_insn (rtx cond, rtx pattern);
146 static rtx emit_set_insn (rtx, rtx);
147 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
148 tree, bool);
149
150 #ifdef OBJECT_FORMAT_ELF
151 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
152 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
153 #endif
154 #ifndef ARM_PE
155 static void arm_encode_section_info (tree, rtx, int);
156 #endif
157
158 static void arm_file_end (void);
159 static void arm_file_start (void);
160
161 #ifdef AOF_ASSEMBLER
162 static void aof_globalize_label (FILE *, const char *);
163 static void aof_dump_imports (FILE *);
164 static void aof_dump_pic_table (FILE *);
165 static void aof_file_start (void);
166 static void aof_file_end (void);
167 static void aof_asm_init_sections (void);
168 #endif
169 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
170 tree, int *, int);
171 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
172 enum machine_mode, tree, bool);
173 static bool arm_promote_prototypes (tree);
174 static bool arm_default_short_enums (void);
175 static bool arm_align_anon_bitfield (void);
176 static bool arm_return_in_msb (tree);
177 static bool arm_must_pass_in_stack (enum machine_mode, tree);
178 #ifdef TARGET_UNWIND_INFO
179 static void arm_unwind_emit (FILE *, rtx);
180 static bool arm_output_ttype (rtx);
181 #endif
182 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
183
184 static tree arm_cxx_guard_type (void);
185 static bool arm_cxx_guard_mask_bit (void);
186 static tree arm_get_cookie_size (tree);
187 static bool arm_cookie_has_size (void);
188 static bool arm_cxx_cdtor_returns_this (void);
189 static bool arm_cxx_key_method_may_be_inline (void);
190 static void arm_cxx_determine_class_data_visibility (tree);
191 static bool arm_cxx_class_data_always_comdat (void);
192 static bool arm_cxx_use_aeabi_atexit (void);
193 static void arm_init_libfuncs (void);
194 static bool arm_handle_option (size_t, const char *, int);
195 static void arm_target_help (void);
196 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
197 static bool arm_cannot_copy_insn_p (rtx);
198 static bool arm_tls_symbol_p (rtx x);
199 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
200
201 \f
202 /* Initialize the GCC target structure. */
203 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
204 #undef TARGET_MERGE_DECL_ATTRIBUTES
205 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
206 #endif
207
208 #undef TARGET_ATTRIBUTE_TABLE
209 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
210
211 #undef TARGET_ASM_FILE_START
212 #define TARGET_ASM_FILE_START arm_file_start
213 #undef TARGET_ASM_FILE_END
214 #define TARGET_ASM_FILE_END arm_file_end
215
216 #ifdef AOF_ASSEMBLER
217 #undef TARGET_ASM_BYTE_OP
218 #define TARGET_ASM_BYTE_OP "\tDCB\t"
219 #undef TARGET_ASM_ALIGNED_HI_OP
220 #define TARGET_ASM_ALIGNED_HI_OP "\tDCW\t"
221 #undef TARGET_ASM_ALIGNED_SI_OP
222 #define TARGET_ASM_ALIGNED_SI_OP "\tDCD\t"
223 #undef TARGET_ASM_GLOBALIZE_LABEL
224 #define TARGET_ASM_GLOBALIZE_LABEL aof_globalize_label
225 #undef TARGET_ASM_FILE_START
226 #define TARGET_ASM_FILE_START aof_file_start
227 #undef TARGET_ASM_FILE_END
228 #define TARGET_ASM_FILE_END aof_file_end
229 #else
230 #undef TARGET_ASM_ALIGNED_SI_OP
231 #define TARGET_ASM_ALIGNED_SI_OP NULL
232 #undef TARGET_ASM_INTEGER
233 #define TARGET_ASM_INTEGER arm_assemble_integer
234 #endif
235
236 #undef TARGET_ASM_FUNCTION_PROLOGUE
237 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
238
239 #undef TARGET_ASM_FUNCTION_EPILOGUE
240 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
241
242 #undef TARGET_DEFAULT_TARGET_FLAGS
243 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
244 #undef TARGET_HANDLE_OPTION
245 #define TARGET_HANDLE_OPTION arm_handle_option
246 #undef TARGET_HELP
247 #define TARGET_HELP arm_target_help
248
249 #undef TARGET_COMP_TYPE_ATTRIBUTES
250 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
251
252 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
253 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
254
255 #undef TARGET_SCHED_ADJUST_COST
256 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
257
258 #undef TARGET_ENCODE_SECTION_INFO
259 #ifdef ARM_PE
260 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
261 #else
262 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
263 #endif
264
265 #undef TARGET_STRIP_NAME_ENCODING
266 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
267
268 #undef TARGET_ASM_INTERNAL_LABEL
269 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
270
271 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
272 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
273
274 #undef TARGET_ASM_OUTPUT_MI_THUNK
275 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
276 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
277 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
278
279 /* This will be overridden in arm_override_options. */
280 #undef TARGET_RTX_COSTS
281 #define TARGET_RTX_COSTS arm_slowmul_rtx_costs
282 #undef TARGET_ADDRESS_COST
283 #define TARGET_ADDRESS_COST arm_address_cost
284
285 #undef TARGET_SHIFT_TRUNCATION_MASK
286 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
287 #undef TARGET_VECTOR_MODE_SUPPORTED_P
288 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
289
290 #undef TARGET_MACHINE_DEPENDENT_REORG
291 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
292
293 #undef TARGET_INIT_BUILTINS
294 #define TARGET_INIT_BUILTINS arm_init_builtins
295 #undef TARGET_EXPAND_BUILTIN
296 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
297
298 #undef TARGET_INIT_LIBFUNCS
299 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
300
301 #undef TARGET_PROMOTE_FUNCTION_ARGS
302 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
303 #undef TARGET_PROMOTE_FUNCTION_RETURN
304 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
305 #undef TARGET_PROMOTE_PROTOTYPES
306 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
307 #undef TARGET_PASS_BY_REFERENCE
308 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
309 #undef TARGET_ARG_PARTIAL_BYTES
310 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
311
312 #undef TARGET_SETUP_INCOMING_VARARGS
313 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
314
315 #undef TARGET_DEFAULT_SHORT_ENUMS
316 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
317
318 #undef TARGET_ALIGN_ANON_BITFIELD
319 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
320
321 #undef TARGET_NARROW_VOLATILE_BITFIELD
322 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
323
324 #undef TARGET_CXX_GUARD_TYPE
325 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
326
327 #undef TARGET_CXX_GUARD_MASK_BIT
328 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
329
330 #undef TARGET_CXX_GET_COOKIE_SIZE
331 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
332
333 #undef TARGET_CXX_COOKIE_HAS_SIZE
334 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
335
336 #undef TARGET_CXX_CDTOR_RETURNS_THIS
337 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
338
339 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
340 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
341
342 #undef TARGET_CXX_USE_AEABI_ATEXIT
343 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
344
345 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
346 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
347 arm_cxx_determine_class_data_visibility
348
349 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
350 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
351
352 #undef TARGET_RETURN_IN_MSB
353 #define TARGET_RETURN_IN_MSB arm_return_in_msb
354
355 #undef TARGET_MUST_PASS_IN_STACK
356 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
357
358 #ifdef TARGET_UNWIND_INFO
359 #undef TARGET_UNWIND_EMIT
360 #define TARGET_UNWIND_EMIT arm_unwind_emit
361
362 /* EABI unwinding tables use a different format for the typeinfo tables. */
363 #undef TARGET_ASM_TTYPE
364 #define TARGET_ASM_TTYPE arm_output_ttype
365
366 #undef TARGET_ARM_EABI_UNWINDER
367 #define TARGET_ARM_EABI_UNWINDER true
368 #endif /* TARGET_UNWIND_INFO */
369
370 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
371 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
372
373 #undef TARGET_CANNOT_COPY_INSN_P
374 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
375
376 #ifdef HAVE_AS_TLS
377 #undef TARGET_HAVE_TLS
378 #define TARGET_HAVE_TLS true
379 #endif
380
381 #undef TARGET_CANNOT_FORCE_CONST_MEM
382 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
383
384 #undef TARGET_MANGLE_TYPE
385 #define TARGET_MANGLE_TYPE arm_mangle_type
386
387 #ifdef HAVE_AS_TLS
388 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
389 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
390 #endif
391
392 struct gcc_target targetm = TARGET_INITIALIZER;
393 \f
394 /* Obstack for minipool constant handling. */
395 static struct obstack minipool_obstack;
396 static char * minipool_startobj;
397
398 /* The maximum number of insns skipped which
399 will be conditionalised if possible. */
400 static int max_insns_skipped = 5;
401
402 extern FILE * asm_out_file;
403
404 /* True if we are currently building a constant table. */
405 int making_const_table;
406
407 /* Define the information needed to generate branch insns. This is
408 stored from the compare operation. */
409 rtx arm_compare_op0, arm_compare_op1;
410
411 /* The processor for which instructions should be scheduled. */
412 enum processor_type arm_tune = arm_none;
413
414 /* The default processor used if not overridden by commandline. */
415 static enum processor_type arm_default_cpu = arm_none;
416
417 /* Which floating point model to use. */
418 enum arm_fp_model arm_fp_model;
419
420 /* Which floating point hardware is available. */
421 enum fputype arm_fpu_arch;
422
423 /* Which floating point hardware to schedule for. */
424 enum fputype arm_fpu_tune;
425
426 /* Whether to use floating point hardware. */
427 enum float_abi_type arm_float_abi;
428
429 /* Which ABI to use. */
430 enum arm_abi_type arm_abi;
431
432 /* Which thread pointer model to use. */
433 enum arm_tp_type target_thread_pointer = TP_AUTO;
434
435 /* Used to parse -mstructure_size_boundary command line option. */
436 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
437
438 /* Used for Thumb call_via trampolines. */
439 rtx thumb_call_via_label[14];
440 static int thumb_call_reg_needed;
441
442 /* Bit values used to identify processor capabilities. */
443 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
444 #define FL_ARCH3M (1 << 1) /* Extended multiply */
445 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
446 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
447 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
448 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
449 #define FL_THUMB (1 << 6) /* Thumb aware */
450 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
451 #define FL_STRONG (1 << 8) /* StrongARM */
452 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
453 #define FL_XSCALE (1 << 10) /* XScale */
454 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
455 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
456 media instructions. */
457 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
458 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
459 Note: ARM6 & 7 derivatives only. */
460 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
461 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
462 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
463 profile. */
464 #define FL_DIV (1 << 18) /* Hardware divide. */
465 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
466 #define FL_NEON (1 << 20) /* Neon instructions. */
467
468 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
469
470 #define FL_FOR_ARCH2 FL_NOTM
471 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
472 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
473 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
474 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
475 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
476 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
477 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
478 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
479 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
480 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
481 #define FL_FOR_ARCH6J FL_FOR_ARCH6
482 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
483 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
484 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
485 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
486 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
487 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)
488 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
489 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
490
491 /* The bits in this mask specify which
492 instructions we are allowed to generate. */
493 static unsigned long insn_flags = 0;
494
495 /* The bits in this mask specify which instruction scheduling options should
496 be used. */
497 static unsigned long tune_flags = 0;
498
499 /* The following are used in the arm.md file as equivalents to bits
500 in the above two flag variables. */
501
502 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
503 int arm_arch3m = 0;
504
505 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
506 int arm_arch4 = 0;
507
508 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
509 int arm_arch4t = 0;
510
511 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
512 int arm_arch5 = 0;
513
514 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
515 int arm_arch5e = 0;
516
517 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
518 int arm_arch6 = 0;
519
520 /* Nonzero if this chip supports the ARM 6K extensions. */
521 int arm_arch6k = 0;
522
523 /* Nonzero if instructions not present in the 'M' profile can be used. */
524 int arm_arch_notm = 0;
525
526 /* Nonzero if this chip can benefit from load scheduling. */
527 int arm_ld_sched = 0;
528
529 /* Nonzero if this chip is a StrongARM. */
530 int arm_tune_strongarm = 0;
531
532 /* Nonzero if this chip is a Cirrus variant. */
533 int arm_arch_cirrus = 0;
534
535 /* Nonzero if this chip supports Intel Wireless MMX technology. */
536 int arm_arch_iwmmxt = 0;
537
538 /* Nonzero if this chip is an XScale. */
539 int arm_arch_xscale = 0;
540
541 /* Nonzero if tuning for XScale */
542 int arm_tune_xscale = 0;
543
544 /* Nonzero if we want to tune for stores that access the write-buffer.
545 This typically means an ARM6 or ARM7 with MMU or MPU. */
546 int arm_tune_wbuf = 0;
547
548 /* Nonzero if generating Thumb instructions. */
549 int thumb_code = 0;
550
551 /* Nonzero if we should define __THUMB_INTERWORK__ in the
552 preprocessor.
553 XXX This is a bit of a hack, it's intended to help work around
554 problems in GLD which doesn't understand that armv5t code is
555 interworking clean. */
556 int arm_cpp_interwork = 0;
557
558 /* Nonzero if chip supports Thumb 2. */
559 int arm_arch_thumb2;
560
561 /* Nonzero if chip supports integer division instruction. */
562 int arm_arch_hwdiv;
563
564 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
565 must report the mode of the memory reference from PRINT_OPERAND to
566 PRINT_OPERAND_ADDRESS. */
567 enum machine_mode output_memory_reference_mode;
568
569 /* The register number to be used for the PIC offset register. */
570 unsigned arm_pic_register = INVALID_REGNUM;
571
572 /* Set to 1 when a return insn is output, this means that the epilogue
573 is not needed. */
574 int return_used_this_function;
575
576 /* Set to 1 after arm_reorg has started. Reset to start at the start of
577 the next function. */
578 static int after_arm_reorg = 0;
579
580 /* The maximum number of insns to be used when loading a constant. */
581 static int arm_constant_limit = 3;
582
583 /* For an explanation of these variables, see final_prescan_insn below. */
584 int arm_ccfsm_state;
585 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
586 enum arm_cond_code arm_current_cc;
587 rtx arm_target_insn;
588 int arm_target_label;
589 /* The number of conditionally executed insns, including the current insn. */
590 int arm_condexec_count = 0;
591 /* A bitmask specifying the patterns for the IT block.
592 Zero means do not output an IT block before this insn. */
593 int arm_condexec_mask = 0;
594 /* The number of bits used in arm_condexec_mask. */
595 int arm_condexec_masklen = 0;
596
597 /* The condition codes of the ARM, and the inverse function. */
598 static const char * const arm_condition_codes[] =
599 {
600 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
601 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
602 };
603
604 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
605 #define streq(string1, string2) (strcmp (string1, string2) == 0)
606
607 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
608 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
609 | (1 << PIC_OFFSET_TABLE_REGNUM)))
610 \f
611 /* Initialization code. */
612
613 struct processors
614 {
615 const char *const name;
616 enum processor_type core;
617 const char *arch;
618 const unsigned long flags;
619 bool (* rtx_costs) (rtx, int, int, int *);
620 };
621
622 /* Not all of these give usefully different compilation alternatives,
623 but there is no simple way of generalizing them. */
624 static const struct processors all_cores[] =
625 {
626 /* ARM Cores */
627 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
628 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
629 #include "arm-cores.def"
630 #undef ARM_CORE
631 {NULL, arm_none, NULL, 0, NULL}
632 };
633
634 static const struct processors all_architectures[] =
635 {
636 /* ARM Architectures */
637 /* We don't specify rtx_costs here as it will be figured out
638 from the core. */
639
640 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
641 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
642 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
643 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
644 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
645 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
646 implementations that support it, so we will leave it out for now. */
647 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
648 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
649 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
650 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
651 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
652 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
653 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
654 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
655 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
656 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
657 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
658 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
659 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
660 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
661 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
662 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
663 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
664 {NULL, arm_none, NULL, 0 , NULL}
665 };
666
667 struct arm_cpu_select
668 {
669 const char * string;
670 const char * name;
671 const struct processors * processors;
672 };
673
674 /* This is a magic structure. The 'string' field is magically filled in
675 with a pointer to the value specified by the user on the command line
676 assuming that the user has specified such a value. */
677
678 static struct arm_cpu_select arm_select[] =
679 {
680 /* string name processors */
681 { NULL, "-mcpu=", all_cores },
682 { NULL, "-march=", all_architectures },
683 { NULL, "-mtune=", all_cores }
684 };
685
686 /* Defines representing the indexes into the above table. */
687 #define ARM_OPT_SET_CPU 0
688 #define ARM_OPT_SET_ARCH 1
689 #define ARM_OPT_SET_TUNE 2
690
691 /* The name of the preprocessor macro to define for this architecture. */
692
693 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
694
695 struct fpu_desc
696 {
697 const char * name;
698 enum fputype fpu;
699 };
700
701
702 /* Available values for -mfpu=. */
703
704 static const struct fpu_desc all_fpus[] =
705 {
706 {"fpa", FPUTYPE_FPA},
707 {"fpe2", FPUTYPE_FPA_EMU2},
708 {"fpe3", FPUTYPE_FPA_EMU2},
709 {"maverick", FPUTYPE_MAVERICK},
710 {"vfp", FPUTYPE_VFP},
711 {"vfp3", FPUTYPE_VFP3},
712 {"neon", FPUTYPE_NEON}
713 };
714
715
716 /* Floating point models used by the different hardware.
717 See fputype in arm.h. */
718
719 static const enum fputype fp_model_for_fpu[] =
720 {
721 /* No FP hardware. */
722 ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
723 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
724 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
725 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
726 ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
727 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
728 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
729 ARM_FP_MODEL_VFP /* FPUTYPE_NEON */
730 };
731
732
733 struct float_abi
734 {
735 const char * name;
736 enum float_abi_type abi_type;
737 };
738
739
740 /* Available values for -mfloat-abi=. */
741
742 static const struct float_abi all_float_abis[] =
743 {
744 {"soft", ARM_FLOAT_ABI_SOFT},
745 {"softfp", ARM_FLOAT_ABI_SOFTFP},
746 {"hard", ARM_FLOAT_ABI_HARD}
747 };
748
749
750 struct abi_name
751 {
752 const char *name;
753 enum arm_abi_type abi_type;
754 };
755
756
757 /* Available values for -mabi=. */
758
759 static const struct abi_name arm_all_abis[] =
760 {
761 {"apcs-gnu", ARM_ABI_APCS},
762 {"atpcs", ARM_ABI_ATPCS},
763 {"aapcs", ARM_ABI_AAPCS},
764 {"iwmmxt", ARM_ABI_IWMMXT},
765 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
766 };
767
768 /* Supported TLS relocations. */
769
770 enum tls_reloc {
771 TLS_GD32,
772 TLS_LDM32,
773 TLS_LDO32,
774 TLS_IE32,
775 TLS_LE32
776 };
777
778 /* Emit an insn that's a simple single-set. Both the operands must be known
779 to be valid. */
780 inline static rtx
781 emit_set_insn (rtx x, rtx y)
782 {
783 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
784 }
785
786 /* Return the number of bits set in VALUE. */
787 static unsigned
788 bit_count (unsigned long value)
789 {
790 unsigned long count = 0;
791
792 while (value)
793 {
794 count++;
795 value &= value - 1; /* Clear the least-significant set bit. */
796 }
797
798 return count;
799 }
800
801 /* Set up library functions unique to ARM. */
802
803 static void
804 arm_init_libfuncs (void)
805 {
806 /* There are no special library functions unless we are using the
807 ARM BPABI. */
808 if (!TARGET_BPABI)
809 return;
810
811 /* The functions below are described in Section 4 of the "Run-Time
812 ABI for the ARM architecture", Version 1.0. */
813
814 /* Double-precision floating-point arithmetic. Table 2. */
815 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
816 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
817 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
818 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
819 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
820
821 /* Double-precision comparisons. Table 3. */
822 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
823 set_optab_libfunc (ne_optab, DFmode, NULL);
824 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
825 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
826 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
827 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
828 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
829
830 /* Single-precision floating-point arithmetic. Table 4. */
831 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
832 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
833 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
834 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
835 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
836
837 /* Single-precision comparisons. Table 5. */
838 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
839 set_optab_libfunc (ne_optab, SFmode, NULL);
840 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
841 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
842 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
843 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
844 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
845
846 /* Floating-point to integer conversions. Table 6. */
847 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
848 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
849 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
850 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
851 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
852 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
853 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
854 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
855
856 /* Conversions between floating types. Table 7. */
857 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
858 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
859
860 /* Integer to floating-point conversions. Table 8. */
861 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
862 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
863 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
864 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
865 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
866 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
867 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
868 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
869
870 /* Long long. Table 9. */
871 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
872 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
873 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
874 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
875 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
876 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
877 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
878 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
879
880 /* Integer (32/32->32) division. \S 4.3.1. */
881 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
882 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
883
884 /* The divmod functions are designed so that they can be used for
885 plain division, even though they return both the quotient and the
886 remainder. The quotient is returned in the usual location (i.e.,
887 r0 for SImode, {r0, r1} for DImode), just as would be expected
888 for an ordinary division routine. Because the AAPCS calling
889 conventions specify that all of { r0, r1, r2, r3 } are
890 callee-saved registers, there is no need to tell the compiler
891 explicitly that those registers are clobbered by these
892 routines. */
893 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
894 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
895
896 /* For SImode division the ABI provides div-without-mod routines,
897 which are faster. */
898 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
899 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
900
901 /* We don't have mod libcalls. Fortunately gcc knows how to use the
902 divmod libcalls instead. */
903 set_optab_libfunc (smod_optab, DImode, NULL);
904 set_optab_libfunc (umod_optab, DImode, NULL);
905 set_optab_libfunc (smod_optab, SImode, NULL);
906 set_optab_libfunc (umod_optab, SImode, NULL);
907 }
908
909 /* Implement TARGET_HANDLE_OPTION. */
910
911 static bool
912 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
913 {
914 switch (code)
915 {
916 case OPT_march_:
917 arm_select[1].string = arg;
918 return true;
919
920 case OPT_mcpu_:
921 arm_select[0].string = arg;
922 return true;
923
924 case OPT_mhard_float:
925 target_float_abi_name = "hard";
926 return true;
927
928 case OPT_msoft_float:
929 target_float_abi_name = "soft";
930 return true;
931
932 case OPT_mtune_:
933 arm_select[2].string = arg;
934 return true;
935
936 default:
937 return true;
938 }
939 }
940
941 static void
942 arm_target_help (void)
943 {
944 int i;
945 static int columns = 0;
946 int remaining;
947
948 /* If we have not done so already, obtain the desired maximum width of
949 the output. Note - this is a duplication of the code at the start of
950 gcc/opts.c:print_specific_help() - the two copies should probably be
951 replaced by a single function. */
952 if (columns == 0)
953 {
954 const char *p;
955
956 GET_ENVIRONMENT (p, "COLUMNS");
957 if (p != NULL)
958 {
959 int value = atoi (p);
960
961 if (value > 0)
962 columns = value;
963 }
964
965 if (columns == 0)
966 /* Use a reasonable default. */
967 columns = 80;
968 }
969
970 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
971
972 /* The - 2 is because we know that the last entry in the array is NULL. */
973 i = ARRAY_SIZE (all_cores) - 2;
974 gcc_assert (i > 0);
975 printf (" %s", all_cores[i].name);
976 remaining = columns - (strlen (all_cores[i].name) + 4);
977 gcc_assert (remaining >= 0);
978
979 while (i--)
980 {
981 int len = strlen (all_cores[i].name);
982
983 if (remaining > len + 2)
984 {
985 printf (", %s", all_cores[i].name);
986 remaining -= len + 2;
987 }
988 else
989 {
990 if (remaining > 0)
991 printf (",");
992 printf ("\n %s", all_cores[i].name);
993 remaining = columns - (len + 4);
994 }
995 }
996
997 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
998
999 i = ARRAY_SIZE (all_architectures) - 2;
1000 gcc_assert (i > 0);
1001
1002 printf (" %s", all_architectures[i].name);
1003 remaining = columns - (strlen (all_architectures[i].name) + 4);
1004 gcc_assert (remaining >= 0);
1005
1006 while (i--)
1007 {
1008 int len = strlen (all_architectures[i].name);
1009
1010 if (remaining > len + 2)
1011 {
1012 printf (", %s", all_architectures[i].name);
1013 remaining -= len + 2;
1014 }
1015 else
1016 {
1017 if (remaining > 0)
1018 printf (",");
1019 printf ("\n %s", all_architectures[i].name);
1020 remaining = columns - (len + 4);
1021 }
1022 }
1023 printf ("\n");
1024
1025 }
1026
1027 /* Fix up any incompatible options that the user has specified.
1028 This has now turned into a maze. */
1029 void
1030 arm_override_options (void)
1031 {
1032 unsigned i;
1033 enum processor_type target_arch_cpu = arm_none;
1034
1035 /* Set up the flags based on the cpu/architecture selected by the user. */
1036 for (i = ARRAY_SIZE (arm_select); i--;)
1037 {
1038 struct arm_cpu_select * ptr = arm_select + i;
1039
1040 if (ptr->string != NULL && ptr->string[0] != '\0')
1041 {
1042 const struct processors * sel;
1043
1044 for (sel = ptr->processors; sel->name != NULL; sel++)
1045 if (streq (ptr->string, sel->name))
1046 {
1047 /* Set the architecture define. */
1048 if (i != ARM_OPT_SET_TUNE)
1049 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1050
1051 /* Determine the processor core for which we should
1052 tune code-generation. */
1053 if (/* -mcpu= is a sensible default. */
1054 i == ARM_OPT_SET_CPU
1055 /* -mtune= overrides -mcpu= and -march=. */
1056 || i == ARM_OPT_SET_TUNE)
1057 arm_tune = (enum processor_type) (sel - ptr->processors);
1058
1059 /* Remember the CPU associated with this architecture.
1060 If no other option is used to set the CPU type,
1061 we'll use this to guess the most suitable tuning
1062 options. */
1063 if (i == ARM_OPT_SET_ARCH)
1064 target_arch_cpu = sel->core;
1065
1066 if (i != ARM_OPT_SET_TUNE)
1067 {
1068 /* If we have been given an architecture and a processor
1069 make sure that they are compatible. We only generate
1070 a warning though, and we prefer the CPU over the
1071 architecture. */
1072 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1073 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1074 ptr->string);
1075
1076 insn_flags = sel->flags;
1077 }
1078
1079 break;
1080 }
1081
1082 if (sel->name == NULL)
1083 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1084 }
1085 }
1086
1087 /* Guess the tuning options from the architecture if necessary. */
1088 if (arm_tune == arm_none)
1089 arm_tune = target_arch_cpu;
1090
1091 /* If the user did not specify a processor, choose one for them. */
1092 if (insn_flags == 0)
1093 {
1094 const struct processors * sel;
1095 unsigned int sought;
1096 enum processor_type cpu;
1097
1098 cpu = TARGET_CPU_DEFAULT;
1099 if (cpu == arm_none)
1100 {
1101 #ifdef SUBTARGET_CPU_DEFAULT
1102 /* Use the subtarget default CPU if none was specified by
1103 configure. */
1104 cpu = SUBTARGET_CPU_DEFAULT;
1105 #endif
1106 /* Default to ARM6. */
1107 if (cpu == arm_none)
1108 cpu = arm6;
1109 }
1110 sel = &all_cores[cpu];
1111
1112 insn_flags = sel->flags;
1113
1114 /* Now check to see if the user has specified some command line
1115 switch that require certain abilities from the cpu. */
1116 sought = 0;
1117
1118 if (TARGET_INTERWORK || TARGET_THUMB)
1119 {
1120 sought |= (FL_THUMB | FL_MODE32);
1121
1122 /* There are no ARM processors that support both APCS-26 and
1123 interworking. Therefore we force FL_MODE26 to be removed
1124 from insn_flags here (if it was set), so that the search
1125 below will always be able to find a compatible processor. */
1126 insn_flags &= ~FL_MODE26;
1127 }
1128
1129 if (sought != 0 && ((sought & insn_flags) != sought))
1130 {
1131 /* Try to locate a CPU type that supports all of the abilities
1132 of the default CPU, plus the extra abilities requested by
1133 the user. */
1134 for (sel = all_cores; sel->name != NULL; sel++)
1135 if ((sel->flags & sought) == (sought | insn_flags))
1136 break;
1137
1138 if (sel->name == NULL)
1139 {
1140 unsigned current_bit_count = 0;
1141 const struct processors * best_fit = NULL;
1142
1143 /* Ideally we would like to issue an error message here
1144 saying that it was not possible to find a CPU compatible
1145 with the default CPU, but which also supports the command
1146 line options specified by the programmer, and so they
1147 ought to use the -mcpu=<name> command line option to
1148 override the default CPU type.
1149
1150 If we cannot find a cpu that has both the
1151 characteristics of the default cpu and the given
1152 command line options we scan the array again looking
1153 for a best match. */
1154 for (sel = all_cores; sel->name != NULL; sel++)
1155 if ((sel->flags & sought) == sought)
1156 {
1157 unsigned count;
1158
1159 count = bit_count (sel->flags & insn_flags);
1160
1161 if (count >= current_bit_count)
1162 {
1163 best_fit = sel;
1164 current_bit_count = count;
1165 }
1166 }
1167
1168 gcc_assert (best_fit);
1169 sel = best_fit;
1170 }
1171
1172 insn_flags = sel->flags;
1173 }
1174 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1175 arm_default_cpu = (enum processor_type) (sel - all_cores);
1176 if (arm_tune == arm_none)
1177 arm_tune = arm_default_cpu;
1178 }
1179
1180 /* The processor for which we should tune should now have been
1181 chosen. */
1182 gcc_assert (arm_tune != arm_none);
1183
1184 tune_flags = all_cores[(int)arm_tune].flags;
1185 if (optimize_size)
1186 targetm.rtx_costs = arm_size_rtx_costs;
1187 else
1188 targetm.rtx_costs = all_cores[(int)arm_tune].rtx_costs;
1189
1190 /* Make sure that the processor choice does not conflict with any of the
1191 other command line choices. */
1192 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1193 error ("target CPU does not support ARM mode");
1194
1195 if (TARGET_INTERWORK && !(insn_flags & FL_THUMB))
1196 {
1197 warning (0, "target CPU does not support interworking" );
1198 target_flags &= ~MASK_INTERWORK;
1199 }
1200
1201 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1202 {
1203 warning (0, "target CPU does not support THUMB instructions");
1204 target_flags &= ~MASK_THUMB;
1205 }
1206
1207 if (TARGET_APCS_FRAME && TARGET_THUMB)
1208 {
1209 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1210 target_flags &= ~MASK_APCS_FRAME;
1211 }
1212
1213 /* Callee super interworking implies thumb interworking. Adding
1214 this to the flags here simplifies the logic elsewhere. */
1215 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1216 target_flags |= MASK_INTERWORK;
1217
1218 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1219 from here where no function is being compiled currently. */
1220 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1221 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1222
1223 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1224 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1225
1226 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1227 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1228
1229 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1230 {
1231 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1232 target_flags |= MASK_APCS_FRAME;
1233 }
1234
1235 if (TARGET_POKE_FUNCTION_NAME)
1236 target_flags |= MASK_APCS_FRAME;
1237
1238 if (TARGET_APCS_REENT && flag_pic)
1239 error ("-fpic and -mapcs-reent are incompatible");
1240
1241 if (TARGET_APCS_REENT)
1242 warning (0, "APCS reentrant code not supported. Ignored");
1243
1244 /* If this target is normally configured to use APCS frames, warn if they
1245 are turned off and debugging is turned on. */
1246 if (TARGET_ARM
1247 && write_symbols != NO_DEBUG
1248 && !TARGET_APCS_FRAME
1249 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1250 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1251
1252 if (TARGET_APCS_FLOAT)
1253 warning (0, "passing floating point arguments in fp regs not yet supported");
1254
1255 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1256 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1257 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1258 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1259 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1260 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1261 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1262 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1263 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1264 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1265 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1266 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1267
1268 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1269 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1270 thumb_code = (TARGET_ARM == 0);
1271 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1272 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1273 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1274 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1275
1276 /* V5 code we generate is completely interworking capable, so we turn off
1277 TARGET_INTERWORK here to avoid many tests later on. */
1278
1279 /* XXX However, we must pass the right pre-processor defines to CPP
1280 or GLD can get confused. This is a hack. */
1281 if (TARGET_INTERWORK)
1282 arm_cpp_interwork = 1;
1283
1284 if (arm_arch5)
1285 target_flags &= ~MASK_INTERWORK;
1286
1287 if (target_abi_name)
1288 {
1289 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1290 {
1291 if (streq (arm_all_abis[i].name, target_abi_name))
1292 {
1293 arm_abi = arm_all_abis[i].abi_type;
1294 break;
1295 }
1296 }
1297 if (i == ARRAY_SIZE (arm_all_abis))
1298 error ("invalid ABI option: -mabi=%s", target_abi_name);
1299 }
1300 else
1301 arm_abi = ARM_DEFAULT_ABI;
1302
1303 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1304 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1305
1306 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1307 error ("iwmmxt abi requires an iwmmxt capable cpu");
1308
1309 arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1310 if (target_fpu_name == NULL && target_fpe_name != NULL)
1311 {
1312 if (streq (target_fpe_name, "2"))
1313 target_fpu_name = "fpe2";
1314 else if (streq (target_fpe_name, "3"))
1315 target_fpu_name = "fpe3";
1316 else
1317 error ("invalid floating point emulation option: -mfpe=%s",
1318 target_fpe_name);
1319 }
1320 if (target_fpu_name != NULL)
1321 {
1322 /* The user specified a FPU. */
1323 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1324 {
1325 if (streq (all_fpus[i].name, target_fpu_name))
1326 {
1327 arm_fpu_arch = all_fpus[i].fpu;
1328 arm_fpu_tune = arm_fpu_arch;
1329 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1330 break;
1331 }
1332 }
1333 if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1334 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1335 }
1336 else
1337 {
1338 #ifdef FPUTYPE_DEFAULT
1339 /* Use the default if it is specified for this platform. */
1340 arm_fpu_arch = FPUTYPE_DEFAULT;
1341 arm_fpu_tune = FPUTYPE_DEFAULT;
1342 #else
1343 /* Pick one based on CPU type. */
1344 /* ??? Some targets assume FPA is the default.
1345 if ((insn_flags & FL_VFP) != 0)
1346 arm_fpu_arch = FPUTYPE_VFP;
1347 else
1348 */
1349 if (arm_arch_cirrus)
1350 arm_fpu_arch = FPUTYPE_MAVERICK;
1351 else
1352 arm_fpu_arch = FPUTYPE_FPA_EMU2;
1353 #endif
1354 if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1355 arm_fpu_tune = FPUTYPE_FPA;
1356 else
1357 arm_fpu_tune = arm_fpu_arch;
1358 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1359 gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1360 }
1361
1362 if (target_float_abi_name != NULL)
1363 {
1364 /* The user specified a FP ABI. */
1365 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1366 {
1367 if (streq (all_float_abis[i].name, target_float_abi_name))
1368 {
1369 arm_float_abi = all_float_abis[i].abi_type;
1370 break;
1371 }
1372 }
1373 if (i == ARRAY_SIZE (all_float_abis))
1374 error ("invalid floating point abi: -mfloat-abi=%s",
1375 target_float_abi_name);
1376 }
1377 else
1378 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1379
1380 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1381 sorry ("-mfloat-abi=hard and VFP");
1382
1383 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1384 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1385 will ever exist. GCC makes no attempt to support this combination. */
1386 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1387 sorry ("iWMMXt and hardware floating point");
1388
1389 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1390 if (TARGET_THUMB2 && TARGET_IWMMXT)
1391 sorry ("Thumb-2 iWMMXt");
1392
1393 /* If soft-float is specified then don't use FPU. */
1394 if (TARGET_SOFT_FLOAT)
1395 arm_fpu_arch = FPUTYPE_NONE;
1396
1397 /* For arm2/3 there is no need to do any scheduling if there is only
1398 a floating point emulator, or we are doing software floating-point. */
1399 if ((TARGET_SOFT_FLOAT
1400 || arm_fpu_tune == FPUTYPE_FPA_EMU2
1401 || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1402 && (tune_flags & FL_MODE32) == 0)
1403 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1404
1405 if (target_thread_switch)
1406 {
1407 if (strcmp (target_thread_switch, "soft") == 0)
1408 target_thread_pointer = TP_SOFT;
1409 else if (strcmp (target_thread_switch, "auto") == 0)
1410 target_thread_pointer = TP_AUTO;
1411 else if (strcmp (target_thread_switch, "cp15") == 0)
1412 target_thread_pointer = TP_CP15;
1413 else
1414 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1415 }
1416
1417 /* Use the cp15 method if it is available. */
1418 if (target_thread_pointer == TP_AUTO)
1419 {
1420 if (arm_arch6k && !TARGET_THUMB)
1421 target_thread_pointer = TP_CP15;
1422 else
1423 target_thread_pointer = TP_SOFT;
1424 }
1425
1426 if (TARGET_HARD_TP && TARGET_THUMB1)
1427 error ("can not use -mtp=cp15 with 16-bit Thumb");
1428
1429 /* Override the default structure alignment for AAPCS ABI. */
1430 if (TARGET_AAPCS_BASED)
1431 arm_structure_size_boundary = 8;
1432
1433 if (structure_size_string != NULL)
1434 {
1435 int size = strtol (structure_size_string, NULL, 0);
1436
1437 if (size == 8 || size == 32
1438 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1439 arm_structure_size_boundary = size;
1440 else
1441 warning (0, "structure size boundary can only be set to %s",
1442 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1443 }
1444
1445 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1446 {
1447 error ("RTP PIC is incompatible with Thumb");
1448 flag_pic = 0;
1449 }
1450
1451 /* If stack checking is disabled, we can use r10 as the PIC register,
1452 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1453 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1454 {
1455 if (TARGET_VXWORKS_RTP)
1456 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1457 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1458 }
1459
1460 if (flag_pic && TARGET_VXWORKS_RTP)
1461 arm_pic_register = 9;
1462
1463 if (arm_pic_register_string != NULL)
1464 {
1465 int pic_register = decode_reg_name (arm_pic_register_string);
1466
1467 if (!flag_pic)
1468 warning (0, "-mpic-register= is useless without -fpic");
1469
1470 /* Prevent the user from choosing an obviously stupid PIC register. */
1471 else if (pic_register < 0 || call_used_regs[pic_register]
1472 || pic_register == HARD_FRAME_POINTER_REGNUM
1473 || pic_register == STACK_POINTER_REGNUM
1474 || pic_register >= PC_REGNUM
1475 || (TARGET_VXWORKS_RTP
1476 && (unsigned int) pic_register != arm_pic_register))
1477 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1478 else
1479 arm_pic_register = pic_register;
1480 }
1481
1482 /* ??? We might want scheduling for thumb2. */
1483 if (TARGET_THUMB && flag_schedule_insns)
1484 {
1485 /* Don't warn since it's on by default in -O2. */
1486 flag_schedule_insns = 0;
1487 }
1488
1489 if (optimize_size)
1490 {
1491 arm_constant_limit = 1;
1492
1493 /* If optimizing for size, bump the number of instructions that we
1494 are prepared to conditionally execute (even on a StrongARM). */
1495 max_insns_skipped = 6;
1496 }
1497 else
1498 {
1499 /* For processors with load scheduling, it never costs more than
1500 2 cycles to load a constant, and the load scheduler may well
1501 reduce that to 1. */
1502 if (arm_ld_sched)
1503 arm_constant_limit = 1;
1504
1505 /* On XScale the longer latency of a load makes it more difficult
1506 to achieve a good schedule, so it's faster to synthesize
1507 constants that can be done in two insns. */
1508 if (arm_tune_xscale)
1509 arm_constant_limit = 2;
1510
1511 /* StrongARM has early execution of branches, so a sequence
1512 that is worth skipping is shorter. */
1513 if (arm_tune_strongarm)
1514 max_insns_skipped = 3;
1515 }
1516
1517 /* Register global variables with the garbage collector. */
1518 arm_add_gc_roots ();
1519 }
1520
1521 static void
1522 arm_add_gc_roots (void)
1523 {
1524 gcc_obstack_init(&minipool_obstack);
1525 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1526 }
1527 \f
1528 /* A table of known ARM exception types.
1529 For use with the interrupt function attribute. */
1530
1531 typedef struct
1532 {
1533 const char *const arg;
1534 const unsigned long return_value;
1535 }
1536 isr_attribute_arg;
1537
1538 static const isr_attribute_arg isr_attribute_args [] =
1539 {
1540 { "IRQ", ARM_FT_ISR },
1541 { "irq", ARM_FT_ISR },
1542 { "FIQ", ARM_FT_FIQ },
1543 { "fiq", ARM_FT_FIQ },
1544 { "ABORT", ARM_FT_ISR },
1545 { "abort", ARM_FT_ISR },
1546 { "ABORT", ARM_FT_ISR },
1547 { "abort", ARM_FT_ISR },
1548 { "UNDEF", ARM_FT_EXCEPTION },
1549 { "undef", ARM_FT_EXCEPTION },
1550 { "SWI", ARM_FT_EXCEPTION },
1551 { "swi", ARM_FT_EXCEPTION },
1552 { NULL, ARM_FT_NORMAL }
1553 };
1554
1555 /* Returns the (interrupt) function type of the current
1556 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1557
1558 static unsigned long
1559 arm_isr_value (tree argument)
1560 {
1561 const isr_attribute_arg * ptr;
1562 const char * arg;
1563
1564 if (!arm_arch_notm)
1565 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1566
1567 /* No argument - default to IRQ. */
1568 if (argument == NULL_TREE)
1569 return ARM_FT_ISR;
1570
1571 /* Get the value of the argument. */
1572 if (TREE_VALUE (argument) == NULL_TREE
1573 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1574 return ARM_FT_UNKNOWN;
1575
1576 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1577
1578 /* Check it against the list of known arguments. */
1579 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1580 if (streq (arg, ptr->arg))
1581 return ptr->return_value;
1582
1583 /* An unrecognized interrupt type. */
1584 return ARM_FT_UNKNOWN;
1585 }
1586
1587 /* Computes the type of the current function. */
1588
1589 static unsigned long
1590 arm_compute_func_type (void)
1591 {
1592 unsigned long type = ARM_FT_UNKNOWN;
1593 tree a;
1594 tree attr;
1595
1596 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1597
1598 /* Decide if the current function is volatile. Such functions
1599 never return, and many memory cycles can be saved by not storing
1600 register values that will never be needed again. This optimization
1601 was added to speed up context switching in a kernel application. */
1602 if (optimize > 0
1603 && (TREE_NOTHROW (current_function_decl)
1604 || !(flag_unwind_tables
1605 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1606 && TREE_THIS_VOLATILE (current_function_decl))
1607 type |= ARM_FT_VOLATILE;
1608
1609 if (cfun->static_chain_decl != NULL)
1610 type |= ARM_FT_NESTED;
1611
1612 attr = DECL_ATTRIBUTES (current_function_decl);
1613
1614 a = lookup_attribute ("naked", attr);
1615 if (a != NULL_TREE)
1616 type |= ARM_FT_NAKED;
1617
1618 a = lookup_attribute ("isr", attr);
1619 if (a == NULL_TREE)
1620 a = lookup_attribute ("interrupt", attr);
1621
1622 if (a == NULL_TREE)
1623 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1624 else
1625 type |= arm_isr_value (TREE_VALUE (a));
1626
1627 return type;
1628 }
1629
1630 /* Returns the type of the current function. */
1631
1632 unsigned long
1633 arm_current_func_type (void)
1634 {
1635 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1636 cfun->machine->func_type = arm_compute_func_type ();
1637
1638 return cfun->machine->func_type;
1639 }
1640 \f
1641 /* Return 1 if it is possible to return using a single instruction.
1642 If SIBLING is non-null, this is a test for a return before a sibling
1643 call. SIBLING is the call insn, so we can examine its register usage. */
1644
1645 int
1646 use_return_insn (int iscond, rtx sibling)
1647 {
1648 int regno;
1649 unsigned int func_type;
1650 unsigned long saved_int_regs;
1651 unsigned HOST_WIDE_INT stack_adjust;
1652 arm_stack_offsets *offsets;
1653
1654 /* Never use a return instruction before reload has run. */
1655 if (!reload_completed)
1656 return 0;
1657
1658 func_type = arm_current_func_type ();
1659
1660 /* Naked, volatile and stack alignment functions need special
1661 consideration. */
1662 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
1663 return 0;
1664
1665 /* So do interrupt functions that use the frame pointer and Thumb
1666 interrupt functions. */
1667 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
1668 return 0;
1669
1670 offsets = arm_get_frame_offsets ();
1671 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
1672
1673 /* As do variadic functions. */
1674 if (current_function_pretend_args_size
1675 || cfun->machine->uses_anonymous_args
1676 /* Or if the function calls __builtin_eh_return () */
1677 || current_function_calls_eh_return
1678 /* Or if the function calls alloca */
1679 || current_function_calls_alloca
1680 /* Or if there is a stack adjustment. However, if the stack pointer
1681 is saved on the stack, we can use a pre-incrementing stack load. */
1682 || !(stack_adjust == 0 || (frame_pointer_needed && stack_adjust == 4)))
1683 return 0;
1684
1685 saved_int_regs = arm_compute_save_reg_mask ();
1686
1687 /* Unfortunately, the insn
1688
1689 ldmib sp, {..., sp, ...}
1690
1691 triggers a bug on most SA-110 based devices, such that the stack
1692 pointer won't be correctly restored if the instruction takes a
1693 page fault. We work around this problem by popping r3 along with
1694 the other registers, since that is never slower than executing
1695 another instruction.
1696
1697 We test for !arm_arch5 here, because code for any architecture
1698 less than this could potentially be run on one of the buggy
1699 chips. */
1700 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
1701 {
1702 /* Validate that r3 is a call-clobbered register (always true in
1703 the default abi) ... */
1704 if (!call_used_regs[3])
1705 return 0;
1706
1707 /* ... that it isn't being used for a return value ... */
1708 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
1709 return 0;
1710
1711 /* ... or for a tail-call argument ... */
1712 if (sibling)
1713 {
1714 gcc_assert (GET_CODE (sibling) == CALL_INSN);
1715
1716 if (find_regno_fusage (sibling, USE, 3))
1717 return 0;
1718 }
1719
1720 /* ... and that there are no call-saved registers in r0-r2
1721 (always true in the default ABI). */
1722 if (saved_int_regs & 0x7)
1723 return 0;
1724 }
1725
1726 /* Can't be done if interworking with Thumb, and any registers have been
1727 stacked. */
1728 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
1729 return 0;
1730
1731 /* On StrongARM, conditional returns are expensive if they aren't
1732 taken and multiple registers have been stacked. */
1733 if (iscond && arm_tune_strongarm)
1734 {
1735 /* Conditional return when just the LR is stored is a simple
1736 conditional-load instruction, that's not expensive. */
1737 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
1738 return 0;
1739
1740 if (flag_pic
1741 && arm_pic_register != INVALID_REGNUM
1742 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
1743 return 0;
1744 }
1745
1746 /* If there are saved registers but the LR isn't saved, then we need
1747 two instructions for the return. */
1748 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
1749 return 0;
1750
1751 /* Can't be done if any of the FPA regs are pushed,
1752 since this also requires an insn. */
1753 if (TARGET_HARD_FLOAT && TARGET_FPA)
1754 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
1755 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1756 return 0;
1757
1758 /* Likewise VFP regs. */
1759 if (TARGET_HARD_FLOAT && TARGET_VFP)
1760 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
1761 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1762 return 0;
1763
1764 if (TARGET_REALLY_IWMMXT)
1765 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
1766 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
1767 return 0;
1768
1769 return 1;
1770 }
1771
1772 /* Return TRUE if int I is a valid immediate ARM constant. */
1773
1774 int
1775 const_ok_for_arm (HOST_WIDE_INT i)
1776 {
1777 int lowbit;
1778
1779 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
1780 be all zero, or all one. */
1781 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
1782 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
1783 != ((~(unsigned HOST_WIDE_INT) 0)
1784 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
1785 return FALSE;
1786
1787 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
1788
1789 /* Fast return for 0 and small values. We must do this for zero, since
1790 the code below can't handle that one case. */
1791 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
1792 return TRUE;
1793
1794 /* Get the number of trailing zeros. */
1795 lowbit = ffs((int) i) - 1;
1796
1797 /* Only even shifts are allowed in ARM mode so round down to the
1798 nearest even number. */
1799 if (TARGET_ARM)
1800 lowbit &= ~1;
1801
1802 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
1803 return TRUE;
1804
1805 if (TARGET_ARM)
1806 {
1807 /* Allow rotated constants in ARM mode. */
1808 if (lowbit <= 4
1809 && ((i & ~0xc000003f) == 0
1810 || (i & ~0xf000000f) == 0
1811 || (i & ~0xfc000003) == 0))
1812 return TRUE;
1813 }
1814 else
1815 {
1816 HOST_WIDE_INT v;
1817
1818 /* Allow repeated pattern. */
1819 v = i & 0xff;
1820 v |= v << 16;
1821 if (i == v || i == (v | (v << 8)))
1822 return TRUE;
1823 }
1824
1825 return FALSE;
1826 }
1827
1828 /* Return true if I is a valid constant for the operation CODE. */
1829 static int
1830 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
1831 {
1832 if (const_ok_for_arm (i))
1833 return 1;
1834
1835 switch (code)
1836 {
1837 case PLUS:
1838 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
1839
1840 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
1841 case XOR:
1842 case IOR:
1843 return 0;
1844
1845 case AND:
1846 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
1847
1848 default:
1849 gcc_unreachable ();
1850 }
1851 }
1852
1853 /* Emit a sequence of insns to handle a large constant.
1854 CODE is the code of the operation required, it can be any of SET, PLUS,
1855 IOR, AND, XOR, MINUS;
1856 MODE is the mode in which the operation is being performed;
1857 VAL is the integer to operate on;
1858 SOURCE is the other operand (a register, or a null-pointer for SET);
1859 SUBTARGETS means it is safe to create scratch registers if that will
1860 either produce a simpler sequence, or we will want to cse the values.
1861 Return value is the number of insns emitted. */
1862
1863 /* ??? Tweak this for thumb2. */
1864 int
1865 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
1866 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
1867 {
1868 rtx cond;
1869
1870 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
1871 cond = COND_EXEC_TEST (PATTERN (insn));
1872 else
1873 cond = NULL_RTX;
1874
1875 if (subtargets || code == SET
1876 || (GET_CODE (target) == REG && GET_CODE (source) == REG
1877 && REGNO (target) != REGNO (source)))
1878 {
1879 /* After arm_reorg has been called, we can't fix up expensive
1880 constants by pushing them into memory so we must synthesize
1881 them in-line, regardless of the cost. This is only likely to
1882 be more costly on chips that have load delay slots and we are
1883 compiling without running the scheduler (so no splitting
1884 occurred before the final instruction emission).
1885
1886 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
1887 */
1888 if (!after_arm_reorg
1889 && !cond
1890 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
1891 1, 0)
1892 > arm_constant_limit + (code != SET)))
1893 {
1894 if (code == SET)
1895 {
1896 /* Currently SET is the only monadic value for CODE, all
1897 the rest are diadic. */
1898 emit_set_insn (target, GEN_INT (val));
1899 return 1;
1900 }
1901 else
1902 {
1903 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
1904
1905 emit_set_insn (temp, GEN_INT (val));
1906 /* For MINUS, the value is subtracted from, since we never
1907 have subtraction of a constant. */
1908 if (code == MINUS)
1909 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
1910 else
1911 emit_set_insn (target,
1912 gen_rtx_fmt_ee (code, mode, source, temp));
1913 return 2;
1914 }
1915 }
1916 }
1917
1918 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
1919 1);
1920 }
1921
1922 /* Return the number of ARM instructions required to synthesize the given
1923 constant. */
1924 static int
1925 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
1926 {
1927 HOST_WIDE_INT temp1;
1928 int num_insns = 0;
1929 do
1930 {
1931 int end;
1932
1933 if (i <= 0)
1934 i += 32;
1935 if (remainder & (3 << (i - 2)))
1936 {
1937 end = i - 8;
1938 if (end < 0)
1939 end += 32;
1940 temp1 = remainder & ((0x0ff << end)
1941 | ((i < end) ? (0xff >> (32 - end)) : 0));
1942 remainder &= ~temp1;
1943 num_insns++;
1944 i -= 6;
1945 }
1946 i -= 2;
1947 } while (remainder);
1948 return num_insns;
1949 }
1950
1951 /* Emit an instruction with the indicated PATTERN. If COND is
1952 non-NULL, conditionalize the execution of the instruction on COND
1953 being true. */
1954
1955 static void
1956 emit_constant_insn (rtx cond, rtx pattern)
1957 {
1958 if (cond)
1959 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
1960 emit_insn (pattern);
1961 }
1962
1963 /* As above, but extra parameter GENERATE which, if clear, suppresses
1964 RTL generation. */
1965 /* ??? This needs more work for thumb2. */
1966
1967 static int
1968 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
1969 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
1970 int generate)
1971 {
1972 int can_invert = 0;
1973 int can_negate = 0;
1974 int can_negate_initial = 0;
1975 int can_shift = 0;
1976 int i;
1977 int num_bits_set = 0;
1978 int set_sign_bit_copies = 0;
1979 int clear_sign_bit_copies = 0;
1980 int clear_zero_bit_copies = 0;
1981 int set_zero_bit_copies = 0;
1982 int insns = 0;
1983 unsigned HOST_WIDE_INT temp1, temp2;
1984 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
1985
1986 /* Find out which operations are safe for a given CODE. Also do a quick
1987 check for degenerate cases; these can occur when DImode operations
1988 are split. */
1989 switch (code)
1990 {
1991 case SET:
1992 can_invert = 1;
1993 can_shift = 1;
1994 can_negate = 1;
1995 break;
1996
1997 case PLUS:
1998 can_negate = 1;
1999 can_negate_initial = 1;
2000 break;
2001
2002 case IOR:
2003 if (remainder == 0xffffffff)
2004 {
2005 if (generate)
2006 emit_constant_insn (cond,
2007 gen_rtx_SET (VOIDmode, target,
2008 GEN_INT (ARM_SIGN_EXTEND (val))));
2009 return 1;
2010 }
2011 if (remainder == 0)
2012 {
2013 if (reload_completed && rtx_equal_p (target, source))
2014 return 0;
2015 if (generate)
2016 emit_constant_insn (cond,
2017 gen_rtx_SET (VOIDmode, target, source));
2018 return 1;
2019 }
2020 break;
2021
2022 case AND:
2023 if (remainder == 0)
2024 {
2025 if (generate)
2026 emit_constant_insn (cond,
2027 gen_rtx_SET (VOIDmode, target, const0_rtx));
2028 return 1;
2029 }
2030 if (remainder == 0xffffffff)
2031 {
2032 if (reload_completed && rtx_equal_p (target, source))
2033 return 0;
2034 if (generate)
2035 emit_constant_insn (cond,
2036 gen_rtx_SET (VOIDmode, target, source));
2037 return 1;
2038 }
2039 can_invert = 1;
2040 break;
2041
2042 case XOR:
2043 if (remainder == 0)
2044 {
2045 if (reload_completed && rtx_equal_p (target, source))
2046 return 0;
2047 if (generate)
2048 emit_constant_insn (cond,
2049 gen_rtx_SET (VOIDmode, target, source));
2050 return 1;
2051 }
2052
2053 /* We don't know how to handle other cases yet. */
2054 gcc_assert (remainder == 0xffffffff);
2055
2056 if (generate)
2057 emit_constant_insn (cond,
2058 gen_rtx_SET (VOIDmode, target,
2059 gen_rtx_NOT (mode, source)));
2060 return 1;
2061
2062 case MINUS:
2063 /* We treat MINUS as (val - source), since (source - val) is always
2064 passed as (source + (-val)). */
2065 if (remainder == 0)
2066 {
2067 if (generate)
2068 emit_constant_insn (cond,
2069 gen_rtx_SET (VOIDmode, target,
2070 gen_rtx_NEG (mode, source)));
2071 return 1;
2072 }
2073 if (const_ok_for_arm (val))
2074 {
2075 if (generate)
2076 emit_constant_insn (cond,
2077 gen_rtx_SET (VOIDmode, target,
2078 gen_rtx_MINUS (mode, GEN_INT (val),
2079 source)));
2080 return 1;
2081 }
2082 can_negate = 1;
2083
2084 break;
2085
2086 default:
2087 gcc_unreachable ();
2088 }
2089
2090 /* If we can do it in one insn get out quickly. */
2091 if (const_ok_for_arm (val)
2092 || (can_negate_initial && const_ok_for_arm (-val))
2093 || (can_invert && const_ok_for_arm (~val)))
2094 {
2095 if (generate)
2096 emit_constant_insn (cond,
2097 gen_rtx_SET (VOIDmode, target,
2098 (source
2099 ? gen_rtx_fmt_ee (code, mode, source,
2100 GEN_INT (val))
2101 : GEN_INT (val))));
2102 return 1;
2103 }
2104
2105 /* Calculate a few attributes that may be useful for specific
2106 optimizations. */
2107 for (i = 31; i >= 0; i--)
2108 {
2109 if ((remainder & (1 << i)) == 0)
2110 clear_sign_bit_copies++;
2111 else
2112 break;
2113 }
2114
2115 for (i = 31; i >= 0; i--)
2116 {
2117 if ((remainder & (1 << i)) != 0)
2118 set_sign_bit_copies++;
2119 else
2120 break;
2121 }
2122
2123 for (i = 0; i <= 31; i++)
2124 {
2125 if ((remainder & (1 << i)) == 0)
2126 clear_zero_bit_copies++;
2127 else
2128 break;
2129 }
2130
2131 for (i = 0; i <= 31; i++)
2132 {
2133 if ((remainder & (1 << i)) != 0)
2134 set_zero_bit_copies++;
2135 else
2136 break;
2137 }
2138
2139 switch (code)
2140 {
2141 case SET:
2142 /* See if we can use movw. */
2143 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2144 {
2145 if (generate)
2146 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2147 GEN_INT (val)));
2148 return 1;
2149 }
2150
2151 /* See if we can do this by sign_extending a constant that is known
2152 to be negative. This is a good, way of doing it, since the shift
2153 may well merge into a subsequent insn. */
2154 if (set_sign_bit_copies > 1)
2155 {
2156 if (const_ok_for_arm
2157 (temp1 = ARM_SIGN_EXTEND (remainder
2158 << (set_sign_bit_copies - 1))))
2159 {
2160 if (generate)
2161 {
2162 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2163 emit_constant_insn (cond,
2164 gen_rtx_SET (VOIDmode, new_src,
2165 GEN_INT (temp1)));
2166 emit_constant_insn (cond,
2167 gen_ashrsi3 (target, new_src,
2168 GEN_INT (set_sign_bit_copies - 1)));
2169 }
2170 return 2;
2171 }
2172 /* For an inverted constant, we will need to set the low bits,
2173 these will be shifted out of harm's way. */
2174 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2175 if (const_ok_for_arm (~temp1))
2176 {
2177 if (generate)
2178 {
2179 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2180 emit_constant_insn (cond,
2181 gen_rtx_SET (VOIDmode, new_src,
2182 GEN_INT (temp1)));
2183 emit_constant_insn (cond,
2184 gen_ashrsi3 (target, new_src,
2185 GEN_INT (set_sign_bit_copies - 1)));
2186 }
2187 return 2;
2188 }
2189 }
2190
2191 /* See if we can calculate the value as the difference between two
2192 valid immediates. */
2193 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2194 {
2195 int topshift = clear_sign_bit_copies & ~1;
2196
2197 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2198 & (0xff000000 >> topshift));
2199
2200 /* If temp1 is zero, then that means the 9 most significant
2201 bits of remainder were 1 and we've caused it to overflow.
2202 When topshift is 0 we don't need to do anything since we
2203 can borrow from 'bit 32'. */
2204 if (temp1 == 0 && topshift != 0)
2205 temp1 = 0x80000000 >> (topshift - 1);
2206
2207 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2208
2209 if (const_ok_for_arm (temp2))
2210 {
2211 if (generate)
2212 {
2213 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2214 emit_constant_insn (cond,
2215 gen_rtx_SET (VOIDmode, new_src,
2216 GEN_INT (temp1)));
2217 emit_constant_insn (cond,
2218 gen_addsi3 (target, new_src,
2219 GEN_INT (-temp2)));
2220 }
2221
2222 return 2;
2223 }
2224 }
2225
2226 /* See if we can generate this by setting the bottom (or the top)
2227 16 bits, and then shifting these into the other half of the
2228 word. We only look for the simplest cases, to do more would cost
2229 too much. Be careful, however, not to generate this when the
2230 alternative would take fewer insns. */
2231 if (val & 0xffff0000)
2232 {
2233 temp1 = remainder & 0xffff0000;
2234 temp2 = remainder & 0x0000ffff;
2235
2236 /* Overlaps outside this range are best done using other methods. */
2237 for (i = 9; i < 24; i++)
2238 {
2239 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2240 && !const_ok_for_arm (temp2))
2241 {
2242 rtx new_src = (subtargets
2243 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2244 : target);
2245 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2246 source, subtargets, generate);
2247 source = new_src;
2248 if (generate)
2249 emit_constant_insn
2250 (cond,
2251 gen_rtx_SET
2252 (VOIDmode, target,
2253 gen_rtx_IOR (mode,
2254 gen_rtx_ASHIFT (mode, source,
2255 GEN_INT (i)),
2256 source)));
2257 return insns + 1;
2258 }
2259 }
2260
2261 /* Don't duplicate cases already considered. */
2262 for (i = 17; i < 24; i++)
2263 {
2264 if (((temp1 | (temp1 >> i)) == remainder)
2265 && !const_ok_for_arm (temp1))
2266 {
2267 rtx new_src = (subtargets
2268 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2269 : target);
2270 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2271 source, subtargets, generate);
2272 source = new_src;
2273 if (generate)
2274 emit_constant_insn
2275 (cond,
2276 gen_rtx_SET (VOIDmode, target,
2277 gen_rtx_IOR
2278 (mode,
2279 gen_rtx_LSHIFTRT (mode, source,
2280 GEN_INT (i)),
2281 source)));
2282 return insns + 1;
2283 }
2284 }
2285 }
2286 break;
2287
2288 case IOR:
2289 case XOR:
2290 /* If we have IOR or XOR, and the constant can be loaded in a
2291 single instruction, and we can find a temporary to put it in,
2292 then this can be done in two instructions instead of 3-4. */
2293 if (subtargets
2294 /* TARGET can't be NULL if SUBTARGETS is 0 */
2295 || (reload_completed && !reg_mentioned_p (target, source)))
2296 {
2297 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2298 {
2299 if (generate)
2300 {
2301 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2302
2303 emit_constant_insn (cond,
2304 gen_rtx_SET (VOIDmode, sub,
2305 GEN_INT (val)));
2306 emit_constant_insn (cond,
2307 gen_rtx_SET (VOIDmode, target,
2308 gen_rtx_fmt_ee (code, mode,
2309 source, sub)));
2310 }
2311 return 2;
2312 }
2313 }
2314
2315 if (code == XOR)
2316 break;
2317
2318 if (set_sign_bit_copies > 8
2319 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2320 {
2321 if (generate)
2322 {
2323 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2324 rtx shift = GEN_INT (set_sign_bit_copies);
2325
2326 emit_constant_insn
2327 (cond,
2328 gen_rtx_SET (VOIDmode, sub,
2329 gen_rtx_NOT (mode,
2330 gen_rtx_ASHIFT (mode,
2331 source,
2332 shift))));
2333 emit_constant_insn
2334 (cond,
2335 gen_rtx_SET (VOIDmode, target,
2336 gen_rtx_NOT (mode,
2337 gen_rtx_LSHIFTRT (mode, sub,
2338 shift))));
2339 }
2340 return 2;
2341 }
2342
2343 if (set_zero_bit_copies > 8
2344 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2345 {
2346 if (generate)
2347 {
2348 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2349 rtx shift = GEN_INT (set_zero_bit_copies);
2350
2351 emit_constant_insn
2352 (cond,
2353 gen_rtx_SET (VOIDmode, sub,
2354 gen_rtx_NOT (mode,
2355 gen_rtx_LSHIFTRT (mode,
2356 source,
2357 shift))));
2358 emit_constant_insn
2359 (cond,
2360 gen_rtx_SET (VOIDmode, target,
2361 gen_rtx_NOT (mode,
2362 gen_rtx_ASHIFT (mode, sub,
2363 shift))));
2364 }
2365 return 2;
2366 }
2367
2368 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2369 {
2370 if (generate)
2371 {
2372 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2373 emit_constant_insn (cond,
2374 gen_rtx_SET (VOIDmode, sub,
2375 gen_rtx_NOT (mode, source)));
2376 source = sub;
2377 if (subtargets)
2378 sub = gen_reg_rtx (mode);
2379 emit_constant_insn (cond,
2380 gen_rtx_SET (VOIDmode, sub,
2381 gen_rtx_AND (mode, source,
2382 GEN_INT (temp1))));
2383 emit_constant_insn (cond,
2384 gen_rtx_SET (VOIDmode, target,
2385 gen_rtx_NOT (mode, sub)));
2386 }
2387 return 3;
2388 }
2389 break;
2390
2391 case AND:
2392 /* See if two shifts will do 2 or more insn's worth of work. */
2393 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2394 {
2395 HOST_WIDE_INT shift_mask = ((0xffffffff
2396 << (32 - clear_sign_bit_copies))
2397 & 0xffffffff);
2398
2399 if ((remainder | shift_mask) != 0xffffffff)
2400 {
2401 if (generate)
2402 {
2403 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2404 insns = arm_gen_constant (AND, mode, cond,
2405 remainder | shift_mask,
2406 new_src, source, subtargets, 1);
2407 source = new_src;
2408 }
2409 else
2410 {
2411 rtx targ = subtargets ? NULL_RTX : target;
2412 insns = arm_gen_constant (AND, mode, cond,
2413 remainder | shift_mask,
2414 targ, source, subtargets, 0);
2415 }
2416 }
2417
2418 if (generate)
2419 {
2420 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2421 rtx shift = GEN_INT (clear_sign_bit_copies);
2422
2423 emit_insn (gen_ashlsi3 (new_src, source, shift));
2424 emit_insn (gen_lshrsi3 (target, new_src, shift));
2425 }
2426
2427 return insns + 2;
2428 }
2429
2430 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2431 {
2432 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2433
2434 if ((remainder | shift_mask) != 0xffffffff)
2435 {
2436 if (generate)
2437 {
2438 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2439
2440 insns = arm_gen_constant (AND, mode, cond,
2441 remainder | shift_mask,
2442 new_src, source, subtargets, 1);
2443 source = new_src;
2444 }
2445 else
2446 {
2447 rtx targ = subtargets ? NULL_RTX : target;
2448
2449 insns = arm_gen_constant (AND, mode, cond,
2450 remainder | shift_mask,
2451 targ, source, subtargets, 0);
2452 }
2453 }
2454
2455 if (generate)
2456 {
2457 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2458 rtx shift = GEN_INT (clear_zero_bit_copies);
2459
2460 emit_insn (gen_lshrsi3 (new_src, source, shift));
2461 emit_insn (gen_ashlsi3 (target, new_src, shift));
2462 }
2463
2464 return insns + 2;
2465 }
2466
2467 break;
2468
2469 default:
2470 break;
2471 }
2472
2473 for (i = 0; i < 32; i++)
2474 if (remainder & (1 << i))
2475 num_bits_set++;
2476
2477 if (code == AND || (can_invert && num_bits_set > 16))
2478 remainder = (~remainder) & 0xffffffff;
2479 else if (code == PLUS && num_bits_set > 16)
2480 remainder = (-remainder) & 0xffffffff;
2481 else
2482 {
2483 can_invert = 0;
2484 can_negate = 0;
2485 }
2486
2487 /* Now try and find a way of doing the job in either two or three
2488 instructions.
2489 We start by looking for the largest block of zeros that are aligned on
2490 a 2-bit boundary, we then fill up the temps, wrapping around to the
2491 top of the word when we drop off the bottom.
2492 In the worst case this code should produce no more than four insns.
2493 Thumb-2 constants are shifted, not rotated, so the MSB is always the
2494 best place to start. */
2495
2496 /* ??? Use thumb2 replicated constants when the high and low halfwords are
2497 the same. */
2498 {
2499 int best_start = 0;
2500 if (!TARGET_THUMB2)
2501 {
2502 int best_consecutive_zeros = 0;
2503
2504 for (i = 0; i < 32; i += 2)
2505 {
2506 int consecutive_zeros = 0;
2507
2508 if (!(remainder & (3 << i)))
2509 {
2510 while ((i < 32) && !(remainder & (3 << i)))
2511 {
2512 consecutive_zeros += 2;
2513 i += 2;
2514 }
2515 if (consecutive_zeros > best_consecutive_zeros)
2516 {
2517 best_consecutive_zeros = consecutive_zeros;
2518 best_start = i - consecutive_zeros;
2519 }
2520 i -= 2;
2521 }
2522 }
2523
2524 /* So long as it won't require any more insns to do so, it's
2525 desirable to emit a small constant (in bits 0...9) in the last
2526 insn. This way there is more chance that it can be combined with
2527 a later addressing insn to form a pre-indexed load or store
2528 operation. Consider:
2529
2530 *((volatile int *)0xe0000100) = 1;
2531 *((volatile int *)0xe0000110) = 2;
2532
2533 We want this to wind up as:
2534
2535 mov rA, #0xe0000000
2536 mov rB, #1
2537 str rB, [rA, #0x100]
2538 mov rB, #2
2539 str rB, [rA, #0x110]
2540
2541 rather than having to synthesize both large constants from scratch.
2542
2543 Therefore, we calculate how many insns would be required to emit
2544 the constant starting from `best_start', and also starting from
2545 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2546 yield a shorter sequence, we may as well use zero. */
2547 if (best_start != 0
2548 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2549 && (count_insns_for_constant (remainder, 0) <=
2550 count_insns_for_constant (remainder, best_start)))
2551 best_start = 0;
2552 }
2553
2554 /* Now start emitting the insns. */
2555 i = best_start;
2556 do
2557 {
2558 int end;
2559
2560 if (i <= 0)
2561 i += 32;
2562 if (remainder & (3 << (i - 2)))
2563 {
2564 end = i - 8;
2565 if (end < 0)
2566 end += 32;
2567 temp1 = remainder & ((0x0ff << end)
2568 | ((i < end) ? (0xff >> (32 - end)) : 0));
2569 remainder &= ~temp1;
2570
2571 if (generate)
2572 {
2573 rtx new_src, temp1_rtx;
2574
2575 if (code == SET || code == MINUS)
2576 {
2577 new_src = (subtargets ? gen_reg_rtx (mode) : target);
2578 if (can_invert && code != MINUS)
2579 temp1 = ~temp1;
2580 }
2581 else
2582 {
2583 if (remainder && subtargets)
2584 new_src = gen_reg_rtx (mode);
2585 else
2586 new_src = target;
2587 if (can_invert)
2588 temp1 = ~temp1;
2589 else if (can_negate)
2590 temp1 = -temp1;
2591 }
2592
2593 temp1 = trunc_int_for_mode (temp1, mode);
2594 temp1_rtx = GEN_INT (temp1);
2595
2596 if (code == SET)
2597 ;
2598 else if (code == MINUS)
2599 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
2600 else
2601 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
2602
2603 emit_constant_insn (cond,
2604 gen_rtx_SET (VOIDmode, new_src,
2605 temp1_rtx));
2606 source = new_src;
2607 }
2608
2609 if (code == SET)
2610 {
2611 can_invert = 0;
2612 code = PLUS;
2613 }
2614 else if (code == MINUS)
2615 code = PLUS;
2616
2617 insns++;
2618 if (TARGET_ARM)
2619 i -= 6;
2620 else
2621 i -= 7;
2622 }
2623 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
2624 shifts. */
2625 if (TARGET_ARM)
2626 i -= 2;
2627 else
2628 i--;
2629 }
2630 while (remainder);
2631 }
2632
2633 return insns;
2634 }
2635
2636 /* Canonicalize a comparison so that we are more likely to recognize it.
2637 This can be done for a few constant compares, where we can make the
2638 immediate value easier to load. */
2639
2640 enum rtx_code
2641 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
2642 rtx * op1)
2643 {
2644 unsigned HOST_WIDE_INT i = INTVAL (*op1);
2645 unsigned HOST_WIDE_INT maxval;
2646 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
2647
2648 switch (code)
2649 {
2650 case EQ:
2651 case NE:
2652 return code;
2653
2654 case GT:
2655 case LE:
2656 if (i != maxval
2657 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2658 {
2659 *op1 = GEN_INT (i + 1);
2660 return code == GT ? GE : LT;
2661 }
2662 break;
2663
2664 case GE:
2665 case LT:
2666 if (i != ~maxval
2667 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2668 {
2669 *op1 = GEN_INT (i - 1);
2670 return code == GE ? GT : LE;
2671 }
2672 break;
2673
2674 case GTU:
2675 case LEU:
2676 if (i != ~((unsigned HOST_WIDE_INT) 0)
2677 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2678 {
2679 *op1 = GEN_INT (i + 1);
2680 return code == GTU ? GEU : LTU;
2681 }
2682 break;
2683
2684 case GEU:
2685 case LTU:
2686 if (i != 0
2687 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2688 {
2689 *op1 = GEN_INT (i - 1);
2690 return code == GEU ? GTU : LEU;
2691 }
2692 break;
2693
2694 default:
2695 gcc_unreachable ();
2696 }
2697
2698 return code;
2699 }
2700
2701
2702 /* Define how to find the value returned by a function. */
2703
2704 rtx
2705 arm_function_value(tree type, tree func ATTRIBUTE_UNUSED)
2706 {
2707 enum machine_mode mode;
2708 int unsignedp ATTRIBUTE_UNUSED;
2709 rtx r ATTRIBUTE_UNUSED;
2710
2711 mode = TYPE_MODE (type);
2712 /* Promote integer types. */
2713 if (INTEGRAL_TYPE_P (type))
2714 PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
2715
2716 /* Promotes small structs returned in a register to full-word size
2717 for big-endian AAPCS. */
2718 if (arm_return_in_msb (type))
2719 {
2720 HOST_WIDE_INT size = int_size_in_bytes (type);
2721 if (size % UNITS_PER_WORD != 0)
2722 {
2723 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2724 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
2725 }
2726 }
2727
2728 return LIBCALL_VALUE(mode);
2729 }
2730
2731 /* Determine the amount of memory needed to store the possible return
2732 registers of an untyped call. */
2733 int
2734 arm_apply_result_size (void)
2735 {
2736 int size = 16;
2737
2738 if (TARGET_ARM)
2739 {
2740 if (TARGET_HARD_FLOAT_ABI)
2741 {
2742 if (TARGET_FPA)
2743 size += 12;
2744 if (TARGET_MAVERICK)
2745 size += 8;
2746 }
2747 if (TARGET_IWMMXT_ABI)
2748 size += 8;
2749 }
2750
2751 return size;
2752 }
2753
2754 /* Decide whether a type should be returned in memory (true)
2755 or in a register (false). This is called by the macro
2756 RETURN_IN_MEMORY. */
2757 int
2758 arm_return_in_memory (tree type)
2759 {
2760 HOST_WIDE_INT size;
2761
2762 size = int_size_in_bytes (type);
2763
2764 /* Vector values should be returned using ARM registers, not memory (unless
2765 they're over 16 bytes, which will break since we only have four
2766 call-clobbered registers to play with). */
2767 if (TREE_CODE (type) == VECTOR_TYPE)
2768 return (size < 0 || size > (4 * UNITS_PER_WORD));
2769
2770 if (!AGGREGATE_TYPE_P (type) &&
2771 !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
2772 /* All simple types are returned in registers.
2773 For AAPCS, complex types are treated the same as aggregates. */
2774 return 0;
2775
2776 if (arm_abi != ARM_ABI_APCS)
2777 {
2778 /* ATPCS and later return aggregate types in memory only if they are
2779 larger than a word (or are variable size). */
2780 return (size < 0 || size > UNITS_PER_WORD);
2781 }
2782
2783 /* For the arm-wince targets we choose to be compatible with Microsoft's
2784 ARM and Thumb compilers, which always return aggregates in memory. */
2785 #ifndef ARM_WINCE
2786 /* All structures/unions bigger than one word are returned in memory.
2787 Also catch the case where int_size_in_bytes returns -1. In this case
2788 the aggregate is either huge or of variable size, and in either case
2789 we will want to return it via memory and not in a register. */
2790 if (size < 0 || size > UNITS_PER_WORD)
2791 return 1;
2792
2793 if (TREE_CODE (type) == RECORD_TYPE)
2794 {
2795 tree field;
2796
2797 /* For a struct the APCS says that we only return in a register
2798 if the type is 'integer like' and every addressable element
2799 has an offset of zero. For practical purposes this means
2800 that the structure can have at most one non bit-field element
2801 and that this element must be the first one in the structure. */
2802
2803 /* Find the first field, ignoring non FIELD_DECL things which will
2804 have been created by C++. */
2805 for (field = TYPE_FIELDS (type);
2806 field && TREE_CODE (field) != FIELD_DECL;
2807 field = TREE_CHAIN (field))
2808 continue;
2809
2810 if (field == NULL)
2811 return 0; /* An empty structure. Allowed by an extension to ANSI C. */
2812
2813 /* Check that the first field is valid for returning in a register. */
2814
2815 /* ... Floats are not allowed */
2816 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2817 return 1;
2818
2819 /* ... Aggregates that are not themselves valid for returning in
2820 a register are not allowed. */
2821 if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2822 return 1;
2823
2824 /* Now check the remaining fields, if any. Only bitfields are allowed,
2825 since they are not addressable. */
2826 for (field = TREE_CHAIN (field);
2827 field;
2828 field = TREE_CHAIN (field))
2829 {
2830 if (TREE_CODE (field) != FIELD_DECL)
2831 continue;
2832
2833 if (!DECL_BIT_FIELD_TYPE (field))
2834 return 1;
2835 }
2836
2837 return 0;
2838 }
2839
2840 if (TREE_CODE (type) == UNION_TYPE)
2841 {
2842 tree field;
2843
2844 /* Unions can be returned in registers if every element is
2845 integral, or can be returned in an integer register. */
2846 for (field = TYPE_FIELDS (type);
2847 field;
2848 field = TREE_CHAIN (field))
2849 {
2850 if (TREE_CODE (field) != FIELD_DECL)
2851 continue;
2852
2853 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2854 return 1;
2855
2856 if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2857 return 1;
2858 }
2859
2860 return 0;
2861 }
2862 #endif /* not ARM_WINCE */
2863
2864 /* Return all other types in memory. */
2865 return 1;
2866 }
2867
2868 /* Indicate whether or not words of a double are in big-endian order. */
2869
2870 int
2871 arm_float_words_big_endian (void)
2872 {
2873 if (TARGET_MAVERICK)
2874 return 0;
2875
2876 /* For FPA, float words are always big-endian. For VFP, floats words
2877 follow the memory system mode. */
2878
2879 if (TARGET_FPA)
2880 {
2881 return 1;
2882 }
2883
2884 if (TARGET_VFP)
2885 return (TARGET_BIG_END ? 1 : 0);
2886
2887 return 1;
2888 }
2889
2890 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2891 for a call to a function whose data type is FNTYPE.
2892 For a library call, FNTYPE is NULL. */
2893 void
2894 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
2895 rtx libname ATTRIBUTE_UNUSED,
2896 tree fndecl ATTRIBUTE_UNUSED)
2897 {
2898 /* On the ARM, the offset starts at 0. */
2899 pcum->nregs = 0;
2900 pcum->iwmmxt_nregs = 0;
2901 pcum->can_split = true;
2902
2903 /* Varargs vectors are treated the same as long long.
2904 named_count avoids having to change the way arm handles 'named' */
2905 pcum->named_count = 0;
2906 pcum->nargs = 0;
2907
2908 if (TARGET_REALLY_IWMMXT && fntype)
2909 {
2910 tree fn_arg;
2911
2912 for (fn_arg = TYPE_ARG_TYPES (fntype);
2913 fn_arg;
2914 fn_arg = TREE_CHAIN (fn_arg))
2915 pcum->named_count += 1;
2916
2917 if (! pcum->named_count)
2918 pcum->named_count = INT_MAX;
2919 }
2920 }
2921
2922
2923 /* Return true if mode/type need doubleword alignment. */
2924 bool
2925 arm_needs_doubleword_align (enum machine_mode mode, tree type)
2926 {
2927 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
2928 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
2929 }
2930
2931
2932 /* Determine where to put an argument to a function.
2933 Value is zero to push the argument on the stack,
2934 or a hard register in which to store the argument.
2935
2936 MODE is the argument's machine mode.
2937 TYPE is the data type of the argument (as a tree).
2938 This is null for libcalls where that information may
2939 not be available.
2940 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2941 the preceding args and about the function being called.
2942 NAMED is nonzero if this argument is a named parameter
2943 (otherwise it is an extra parameter matching an ellipsis). */
2944
2945 rtx
2946 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2947 tree type, int named)
2948 {
2949 int nregs;
2950
2951 /* Varargs vectors are treated the same as long long.
2952 named_count avoids having to change the way arm handles 'named' */
2953 if (TARGET_IWMMXT_ABI
2954 && arm_vector_mode_supported_p (mode)
2955 && pcum->named_count > pcum->nargs + 1)
2956 {
2957 if (pcum->iwmmxt_nregs <= 9)
2958 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
2959 else
2960 {
2961 pcum->can_split = false;
2962 return NULL_RTX;
2963 }
2964 }
2965
2966 /* Put doubleword aligned quantities in even register pairs. */
2967 if (pcum->nregs & 1
2968 && ARM_DOUBLEWORD_ALIGN
2969 && arm_needs_doubleword_align (mode, type))
2970 pcum->nregs++;
2971
2972 if (mode == VOIDmode)
2973 /* Pick an arbitrary value for operand 2 of the call insn. */
2974 return const0_rtx;
2975
2976 /* Only allow splitting an arg between regs and memory if all preceding
2977 args were allocated to regs. For args passed by reference we only count
2978 the reference pointer. */
2979 if (pcum->can_split)
2980 nregs = 1;
2981 else
2982 nregs = ARM_NUM_REGS2 (mode, type);
2983
2984 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
2985 return NULL_RTX;
2986
2987 return gen_rtx_REG (mode, pcum->nregs);
2988 }
2989
2990 static int
2991 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2992 tree type, bool named ATTRIBUTE_UNUSED)
2993 {
2994 int nregs = pcum->nregs;
2995
2996 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
2997 return 0;
2998
2999 if (NUM_ARG_REGS > nregs
3000 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
3001 && pcum->can_split)
3002 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
3003
3004 return 0;
3005 }
3006
3007 /* Variable sized types are passed by reference. This is a GCC
3008 extension to the ARM ABI. */
3009
3010 static bool
3011 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3012 enum machine_mode mode ATTRIBUTE_UNUSED,
3013 tree type, bool named ATTRIBUTE_UNUSED)
3014 {
3015 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3016 }
3017 \f
3018 /* Encode the current state of the #pragma [no_]long_calls. */
3019 typedef enum
3020 {
3021 OFF, /* No #pragma [no_]long_calls is in effect. */
3022 LONG, /* #pragma long_calls is in effect. */
3023 SHORT /* #pragma no_long_calls is in effect. */
3024 } arm_pragma_enum;
3025
3026 static arm_pragma_enum arm_pragma_long_calls = OFF;
3027
3028 void
3029 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3030 {
3031 arm_pragma_long_calls = LONG;
3032 }
3033
3034 void
3035 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3036 {
3037 arm_pragma_long_calls = SHORT;
3038 }
3039
3040 void
3041 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3042 {
3043 arm_pragma_long_calls = OFF;
3044 }
3045 \f
3046 /* Table of machine attributes. */
3047 const struct attribute_spec arm_attribute_table[] =
3048 {
3049 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
3050 /* Function calls made to this symbol must be done indirectly, because
3051 it may lie outside of the 26 bit addressing range of a normal function
3052 call. */
3053 { "long_call", 0, 0, false, true, true, NULL },
3054 /* Whereas these functions are always known to reside within the 26 bit
3055 addressing range. */
3056 { "short_call", 0, 0, false, true, true, NULL },
3057 /* Interrupt Service Routines have special prologue and epilogue requirements. */
3058 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
3059 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
3060 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3061 #ifdef ARM_PE
3062 /* ARM/PE has three new attributes:
3063 interfacearm - ?
3064 dllexport - for exporting a function/variable that will live in a dll
3065 dllimport - for importing a function/variable from a dll
3066
3067 Microsoft allows multiple declspecs in one __declspec, separating
3068 them with spaces. We do NOT support this. Instead, use __declspec
3069 multiple times.
3070 */
3071 { "dllimport", 0, 0, true, false, false, NULL },
3072 { "dllexport", 0, 0, true, false, false, NULL },
3073 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3074 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
3075 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
3076 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
3077 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
3078 #endif
3079 { NULL, 0, 0, false, false, false, NULL }
3080 };
3081
3082 /* Handle an attribute requiring a FUNCTION_DECL;
3083 arguments as in struct attribute_spec.handler. */
3084 static tree
3085 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
3086 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
3087 {
3088 if (TREE_CODE (*node) != FUNCTION_DECL)
3089 {
3090 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3091 IDENTIFIER_POINTER (name));
3092 *no_add_attrs = true;
3093 }
3094
3095 return NULL_TREE;
3096 }
3097
3098 /* Handle an "interrupt" or "isr" attribute;
3099 arguments as in struct attribute_spec.handler. */
3100 static tree
3101 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
3102 bool *no_add_attrs)
3103 {
3104 if (DECL_P (*node))
3105 {
3106 if (TREE_CODE (*node) != FUNCTION_DECL)
3107 {
3108 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3109 IDENTIFIER_POINTER (name));
3110 *no_add_attrs = true;
3111 }
3112 /* FIXME: the argument if any is checked for type attributes;
3113 should it be checked for decl ones? */
3114 }
3115 else
3116 {
3117 if (TREE_CODE (*node) == FUNCTION_TYPE
3118 || TREE_CODE (*node) == METHOD_TYPE)
3119 {
3120 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
3121 {
3122 warning (OPT_Wattributes, "%qs attribute ignored",
3123 IDENTIFIER_POINTER (name));
3124 *no_add_attrs = true;
3125 }
3126 }
3127 else if (TREE_CODE (*node) == POINTER_TYPE
3128 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
3129 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
3130 && arm_isr_value (args) != ARM_FT_UNKNOWN)
3131 {
3132 *node = build_variant_type_copy (*node);
3133 TREE_TYPE (*node) = build_type_attribute_variant
3134 (TREE_TYPE (*node),
3135 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
3136 *no_add_attrs = true;
3137 }
3138 else
3139 {
3140 /* Possibly pass this attribute on from the type to a decl. */
3141 if (flags & ((int) ATTR_FLAG_DECL_NEXT
3142 | (int) ATTR_FLAG_FUNCTION_NEXT
3143 | (int) ATTR_FLAG_ARRAY_NEXT))
3144 {
3145 *no_add_attrs = true;
3146 return tree_cons (name, args, NULL_TREE);
3147 }
3148 else
3149 {
3150 warning (OPT_Wattributes, "%qs attribute ignored",
3151 IDENTIFIER_POINTER (name));
3152 }
3153 }
3154 }
3155
3156 return NULL_TREE;
3157 }
3158
3159 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
3160 /* Handle the "notshared" attribute. This attribute is another way of
3161 requesting hidden visibility. ARM's compiler supports
3162 "__declspec(notshared)"; we support the same thing via an
3163 attribute. */
3164
3165 static tree
3166 arm_handle_notshared_attribute (tree *node,
3167 tree name ATTRIBUTE_UNUSED,
3168 tree args ATTRIBUTE_UNUSED,
3169 int flags ATTRIBUTE_UNUSED,
3170 bool *no_add_attrs)
3171 {
3172 tree decl = TYPE_NAME (*node);
3173
3174 if (decl)
3175 {
3176 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
3177 DECL_VISIBILITY_SPECIFIED (decl) = 1;
3178 *no_add_attrs = false;
3179 }
3180 return NULL_TREE;
3181 }
3182 #endif
3183
3184 /* Return 0 if the attributes for two types are incompatible, 1 if they
3185 are compatible, and 2 if they are nearly compatible (which causes a
3186 warning to be generated). */
3187 static int
3188 arm_comp_type_attributes (tree type1, tree type2)
3189 {
3190 int l1, l2, s1, s2;
3191
3192 /* Check for mismatch of non-default calling convention. */
3193 if (TREE_CODE (type1) != FUNCTION_TYPE)
3194 return 1;
3195
3196 /* Check for mismatched call attributes. */
3197 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
3198 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
3199 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
3200 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
3201
3202 /* Only bother to check if an attribute is defined. */
3203 if (l1 | l2 | s1 | s2)
3204 {
3205 /* If one type has an attribute, the other must have the same attribute. */
3206 if ((l1 != l2) || (s1 != s2))
3207 return 0;
3208
3209 /* Disallow mixed attributes. */
3210 if ((l1 & s2) || (l2 & s1))
3211 return 0;
3212 }
3213
3214 /* Check for mismatched ISR attribute. */
3215 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
3216 if (! l1)
3217 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
3218 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
3219 if (! l2)
3220 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
3221 if (l1 != l2)
3222 return 0;
3223
3224 return 1;
3225 }
3226
3227 /* Assigns default attributes to newly defined type. This is used to
3228 set short_call/long_call attributes for function types of
3229 functions defined inside corresponding #pragma scopes. */
3230 static void
3231 arm_set_default_type_attributes (tree type)
3232 {
3233 /* Add __attribute__ ((long_call)) to all functions, when
3234 inside #pragma long_calls or __attribute__ ((short_call)),
3235 when inside #pragma no_long_calls. */
3236 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
3237 {
3238 tree type_attr_list, attr_name;
3239 type_attr_list = TYPE_ATTRIBUTES (type);
3240
3241 if (arm_pragma_long_calls == LONG)
3242 attr_name = get_identifier ("long_call");
3243 else if (arm_pragma_long_calls == SHORT)
3244 attr_name = get_identifier ("short_call");
3245 else
3246 return;
3247
3248 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
3249 TYPE_ATTRIBUTES (type) = type_attr_list;
3250 }
3251 }
3252 \f
3253 /* Return true if DECL is known to be linked into section SECTION. */
3254
3255 static bool
3256 arm_function_in_section_p (tree decl, section *section)
3257 {
3258 /* We can only be certain about functions defined in the same
3259 compilation unit. */
3260 if (!TREE_STATIC (decl))
3261 return false;
3262
3263 /* Make sure that SYMBOL always binds to the definition in this
3264 compilation unit. */
3265 if (!targetm.binds_local_p (decl))
3266 return false;
3267
3268 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
3269 if (!DECL_SECTION_NAME (decl))
3270 {
3271 /* Only cater for unit-at-a-time mode, where we know that the user
3272 cannot later specify a section for DECL. */
3273 if (!flag_unit_at_a_time)
3274 return false;
3275
3276 /* Make sure that we will not create a unique section for DECL. */
3277 if (flag_function_sections || DECL_ONE_ONLY (decl))
3278 return false;
3279 }
3280
3281 return function_section (decl) == section;
3282 }
3283
3284 /* Return nonzero if a 32-bit "long_call" should be generated for
3285 a call from the current function to DECL. We generate a long_call
3286 if the function:
3287
3288 a. has an __attribute__((long call))
3289 or b. is within the scope of a #pragma long_calls
3290 or c. the -mlong-calls command line switch has been specified
3291
3292 However we do not generate a long call if the function:
3293
3294 d. has an __attribute__ ((short_call))
3295 or e. is inside the scope of a #pragma no_long_calls
3296 or f. is defined in the same section as the current function. */
3297
3298 bool
3299 arm_is_long_call_p (tree decl)
3300 {
3301 tree attrs;
3302
3303 if (!decl)
3304 return TARGET_LONG_CALLS;
3305
3306 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
3307 if (lookup_attribute ("short_call", attrs))
3308 return false;
3309
3310 /* For "f", be conservative, and only cater for cases in which the
3311 whole of the current function is placed in the same section. */
3312 if (!flag_reorder_blocks_and_partition
3313 && arm_function_in_section_p (decl, current_function_section ()))
3314 return false;
3315
3316 if (lookup_attribute ("long_call", attrs))
3317 return true;
3318
3319 return TARGET_LONG_CALLS;
3320 }
3321
3322 /* Return nonzero if it is ok to make a tail-call to DECL. */
3323 static bool
3324 arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3325 {
3326 unsigned long func_type;
3327
3328 if (cfun->machine->sibcall_blocked)
3329 return false;
3330
3331 /* Never tailcall something for which we have no decl, or if we
3332 are in Thumb mode. */
3333 if (decl == NULL || TARGET_THUMB)
3334 return false;
3335
3336 /* The PIC register is live on entry to VxWorks PLT entries, so we
3337 must make the call before restoring the PIC register. */
3338 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
3339 return false;
3340
3341 /* Cannot tail-call to long calls, since these are out of range of
3342 a branch instruction. */
3343 if (arm_is_long_call_p (decl))
3344 return false;
3345
3346 /* If we are interworking and the function is not declared static
3347 then we can't tail-call it unless we know that it exists in this
3348 compilation unit (since it might be a Thumb routine). */
3349 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
3350 return false;
3351
3352 func_type = arm_current_func_type ();
3353 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
3354 if (IS_INTERRUPT (func_type))
3355 return false;
3356
3357 /* Never tailcall if function may be called with a misaligned SP. */
3358 if (IS_STACKALIGN (func_type))
3359 return false;
3360
3361 /* Everything else is ok. */
3362 return true;
3363 }
3364
3365 \f
3366 /* Addressing mode support functions. */
3367
3368 /* Return nonzero if X is a legitimate immediate operand when compiling
3369 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
3370 int
3371 legitimate_pic_operand_p (rtx x)
3372 {
3373 if (GET_CODE (x) == SYMBOL_REF
3374 || (GET_CODE (x) == CONST
3375 && GET_CODE (XEXP (x, 0)) == PLUS
3376 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3377 return 0;
3378
3379 return 1;
3380 }
3381
3382 /* Record that the current function needs a PIC register. Initialize
3383 cfun->machine->pic_reg if we have not already done so. */
3384
3385 static void
3386 require_pic_register (void)
3387 {
3388 /* A lot of the logic here is made obscure by the fact that this
3389 routine gets called as part of the rtx cost estimation process.
3390 We don't want those calls to affect any assumptions about the real
3391 function; and further, we can't call entry_of_function() until we
3392 start the real expansion process. */
3393 if (!current_function_uses_pic_offset_table)
3394 {
3395 gcc_assert (can_create_pseudo_p ());
3396 if (arm_pic_register != INVALID_REGNUM)
3397 {
3398 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
3399
3400 /* Play games to avoid marking the function as needing pic
3401 if we are being called as part of the cost-estimation
3402 process. */
3403 if (current_ir_type () != IR_GIMPLE)
3404 current_function_uses_pic_offset_table = 1;
3405 }
3406 else
3407 {
3408 rtx seq;
3409
3410 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
3411
3412 /* Play games to avoid marking the function as needing pic
3413 if we are being called as part of the cost-estimation
3414 process. */
3415 if (current_ir_type () != IR_GIMPLE)
3416 {
3417 current_function_uses_pic_offset_table = 1;
3418 start_sequence ();
3419
3420 arm_load_pic_register (0UL);
3421
3422 seq = get_insns ();
3423 end_sequence ();
3424 emit_insn_after (seq, entry_of_function ());
3425 }
3426 }
3427 }
3428 }
3429
3430 rtx
3431 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
3432 {
3433 if (GET_CODE (orig) == SYMBOL_REF
3434 || GET_CODE (orig) == LABEL_REF)
3435 {
3436 #ifndef AOF_ASSEMBLER
3437 rtx pic_ref, address;
3438 #endif
3439 rtx insn;
3440 int subregs = 0;
3441
3442 /* If this function doesn't have a pic register, create one now. */
3443 require_pic_register ();
3444
3445 if (reg == 0)
3446 {
3447 gcc_assert (can_create_pseudo_p ());
3448 reg = gen_reg_rtx (Pmode);
3449
3450 subregs = 1;
3451 }
3452
3453 #ifdef AOF_ASSEMBLER
3454 /* The AOF assembler can generate relocations for these directly, and
3455 understands that the PIC register has to be added into the offset. */
3456 insn = emit_insn (gen_pic_load_addr_based (reg, orig));
3457 #else
3458 if (subregs)
3459 address = gen_reg_rtx (Pmode);
3460 else
3461 address = reg;
3462
3463 if (TARGET_ARM)
3464 emit_insn (gen_pic_load_addr_arm (address, orig));
3465 else if (TARGET_THUMB2)
3466 emit_insn (gen_pic_load_addr_thumb2 (address, orig));
3467 else /* TARGET_THUMB1 */
3468 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
3469
3470 /* VxWorks does not impose a fixed gap between segments; the run-time
3471 gap can be different from the object-file gap. We therefore can't
3472 use GOTOFF unless we are absolutely sure that the symbol is in the
3473 same segment as the GOT. Unfortunately, the flexibility of linker
3474 scripts means that we can't be sure of that in general, so assume
3475 that GOTOFF is never valid on VxWorks. */
3476 if ((GET_CODE (orig) == LABEL_REF
3477 || (GET_CODE (orig) == SYMBOL_REF &&
3478 SYMBOL_REF_LOCAL_P (orig)))
3479 && NEED_GOT_RELOC
3480 && !TARGET_VXWORKS_RTP)
3481 pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
3482 else
3483 {
3484 pic_ref = gen_const_mem (Pmode,
3485 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
3486 address));
3487 }
3488
3489 insn = emit_move_insn (reg, pic_ref);
3490 #endif
3491 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3492 by loop. */
3493 set_unique_reg_note (insn, REG_EQUAL, orig);
3494
3495 return reg;
3496 }
3497 else if (GET_CODE (orig) == CONST)
3498 {
3499 rtx base, offset;
3500
3501 if (GET_CODE (XEXP (orig, 0)) == PLUS
3502 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
3503 return orig;
3504
3505 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
3506 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
3507 return orig;
3508
3509 if (reg == 0)
3510 {
3511 gcc_assert (can_create_pseudo_p ());
3512 reg = gen_reg_rtx (Pmode);
3513 }
3514
3515 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3516
3517 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3518 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3519 base == reg ? 0 : reg);
3520
3521 if (GET_CODE (offset) == CONST_INT)
3522 {
3523 /* The base register doesn't really matter, we only want to
3524 test the index for the appropriate mode. */
3525 if (!arm_legitimate_index_p (mode, offset, SET, 0))
3526 {
3527 gcc_assert (can_create_pseudo_p ());
3528 offset = force_reg (Pmode, offset);
3529 }
3530
3531 if (GET_CODE (offset) == CONST_INT)
3532 return plus_constant (base, INTVAL (offset));
3533 }
3534
3535 if (GET_MODE_SIZE (mode) > 4
3536 && (GET_MODE_CLASS (mode) == MODE_INT
3537 || TARGET_SOFT_FLOAT))
3538 {
3539 emit_insn (gen_addsi3 (reg, base, offset));
3540 return reg;
3541 }
3542
3543 return gen_rtx_PLUS (Pmode, base, offset);
3544 }
3545
3546 return orig;
3547 }
3548
3549
3550 /* Find a spare register to use during the prolog of a function. */
3551
3552 static int
3553 thumb_find_work_register (unsigned long pushed_regs_mask)
3554 {
3555 int reg;
3556
3557 /* Check the argument registers first as these are call-used. The
3558 register allocation order means that sometimes r3 might be used
3559 but earlier argument registers might not, so check them all. */
3560 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
3561 if (!df_regs_ever_live_p (reg))
3562 return reg;
3563
3564 /* Before going on to check the call-saved registers we can try a couple
3565 more ways of deducing that r3 is available. The first is when we are
3566 pushing anonymous arguments onto the stack and we have less than 4
3567 registers worth of fixed arguments(*). In this case r3 will be part of
3568 the variable argument list and so we can be sure that it will be
3569 pushed right at the start of the function. Hence it will be available
3570 for the rest of the prologue.
3571 (*): ie current_function_pretend_args_size is greater than 0. */
3572 if (cfun->machine->uses_anonymous_args
3573 && current_function_pretend_args_size > 0)
3574 return LAST_ARG_REGNUM;
3575
3576 /* The other case is when we have fixed arguments but less than 4 registers
3577 worth. In this case r3 might be used in the body of the function, but
3578 it is not being used to convey an argument into the function. In theory
3579 we could just check current_function_args_size to see how many bytes are
3580 being passed in argument registers, but it seems that it is unreliable.
3581 Sometimes it will have the value 0 when in fact arguments are being
3582 passed. (See testcase execute/20021111-1.c for an example). So we also
3583 check the args_info.nregs field as well. The problem with this field is
3584 that it makes no allowances for arguments that are passed to the
3585 function but which are not used. Hence we could miss an opportunity
3586 when a function has an unused argument in r3. But it is better to be
3587 safe than to be sorry. */
3588 if (! cfun->machine->uses_anonymous_args
3589 && current_function_args_size >= 0
3590 && current_function_args_size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
3591 && cfun->args_info.nregs < 4)
3592 return LAST_ARG_REGNUM;
3593
3594 /* Otherwise look for a call-saved register that is going to be pushed. */
3595 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
3596 if (pushed_regs_mask & (1 << reg))
3597 return reg;
3598
3599 if (TARGET_THUMB2)
3600 {
3601 /* Thumb-2 can use high regs. */
3602 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
3603 if (pushed_regs_mask & (1 << reg))
3604 return reg;
3605 }
3606 /* Something went wrong - thumb_compute_save_reg_mask()
3607 should have arranged for a suitable register to be pushed. */
3608 gcc_unreachable ();
3609 }
3610
3611 static GTY(()) int pic_labelno;
3612
3613 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
3614 low register. */
3615
3616 void
3617 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
3618 {
3619 #ifndef AOF_ASSEMBLER
3620 rtx l1, labelno, pic_tmp, pic_tmp2, pic_rtx, pic_reg;
3621 rtx global_offset_table;
3622
3623 if (current_function_uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
3624 return;
3625
3626 gcc_assert (flag_pic);
3627
3628 pic_reg = cfun->machine->pic_reg;
3629 if (TARGET_VXWORKS_RTP)
3630 {
3631 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
3632 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3633 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3634
3635 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
3636
3637 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
3638 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
3639 }
3640 else
3641 {
3642 /* We use an UNSPEC rather than a LABEL_REF because this label
3643 never appears in the code stream. */
3644
3645 labelno = GEN_INT (pic_labelno++);
3646 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
3647 l1 = gen_rtx_CONST (VOIDmode, l1);
3648
3649 global_offset_table
3650 = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3651 /* On the ARM the PC register contains 'dot + 8' at the time of the
3652 addition, on the Thumb it is 'dot + 4'. */
3653 pic_tmp = plus_constant (l1, TARGET_ARM ? 8 : 4);
3654 if (GOT_PCREL)
3655 {
3656 pic_tmp2 = gen_rtx_PLUS (Pmode, global_offset_table, pc_rtx);
3657 pic_tmp2 = gen_rtx_CONST (VOIDmode, pic_tmp2);
3658 }
3659 else
3660 pic_tmp2 = gen_rtx_CONST (VOIDmode, global_offset_table);
3661
3662 pic_rtx = gen_rtx_MINUS (Pmode, pic_tmp2, pic_tmp);
3663 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3664
3665 if (TARGET_ARM)
3666 {
3667 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3668 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
3669 }
3670 else if (TARGET_THUMB2)
3671 {
3672 /* Thumb-2 only allows very limited access to the PC. Calculate the
3673 address in a temporary register. */
3674 if (arm_pic_register != INVALID_REGNUM)
3675 {
3676 pic_tmp = gen_rtx_REG (SImode,
3677 thumb_find_work_register (saved_regs));
3678 }
3679 else
3680 {
3681 gcc_assert (can_create_pseudo_p ());
3682 pic_tmp = gen_reg_rtx (Pmode);
3683 }
3684
3685 emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
3686 emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
3687 emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
3688 }
3689 else /* TARGET_THUMB1 */
3690 {
3691 if (arm_pic_register != INVALID_REGNUM
3692 && REGNO (pic_reg) > LAST_LO_REGNUM)
3693 {
3694 /* We will have pushed the pic register, so we should always be
3695 able to find a work register. */
3696 pic_tmp = gen_rtx_REG (SImode,
3697 thumb_find_work_register (saved_regs));
3698 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
3699 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
3700 }
3701 else
3702 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
3703 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
3704 }
3705 }
3706
3707 /* Need to emit this whether or not we obey regdecls,
3708 since setjmp/longjmp can cause life info to screw up. */
3709 emit_insn (gen_rtx_USE (VOIDmode, pic_reg));
3710 #endif /* AOF_ASSEMBLER */
3711 }
3712
3713
3714 /* Return nonzero if X is valid as an ARM state addressing register. */
3715 static int
3716 arm_address_register_rtx_p (rtx x, int strict_p)
3717 {
3718 int regno;
3719
3720 if (GET_CODE (x) != REG)
3721 return 0;
3722
3723 regno = REGNO (x);
3724
3725 if (strict_p)
3726 return ARM_REGNO_OK_FOR_BASE_P (regno);
3727
3728 return (regno <= LAST_ARM_REGNUM
3729 || regno >= FIRST_PSEUDO_REGISTER
3730 || regno == FRAME_POINTER_REGNUM
3731 || regno == ARG_POINTER_REGNUM);
3732 }
3733
3734 /* Return TRUE if this rtx is the difference of a symbol and a label,
3735 and will reduce to a PC-relative relocation in the object file.
3736 Expressions like this can be left alone when generating PIC, rather
3737 than forced through the GOT. */
3738 static int
3739 pcrel_constant_p (rtx x)
3740 {
3741 if (GET_CODE (x) == MINUS)
3742 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
3743
3744 return FALSE;
3745 }
3746
3747 /* Return nonzero if X is a valid ARM state address operand. */
3748 int
3749 arm_legitimate_address_p (enum machine_mode mode, rtx x, RTX_CODE outer,
3750 int strict_p)
3751 {
3752 bool use_ldrd;
3753 enum rtx_code code = GET_CODE (x);
3754
3755 if (arm_address_register_rtx_p (x, strict_p))
3756 return 1;
3757
3758 use_ldrd = (TARGET_LDRD
3759 && (mode == DImode
3760 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3761
3762 if (code == POST_INC || code == PRE_DEC
3763 || ((code == PRE_INC || code == POST_DEC)
3764 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3765 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3766
3767 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3768 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3769 && GET_CODE (XEXP (x, 1)) == PLUS
3770 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3771 {
3772 rtx addend = XEXP (XEXP (x, 1), 1);
3773
3774 /* Don't allow ldrd post increment by register because it's hard
3775 to fixup invalid register choices. */
3776 if (use_ldrd
3777 && GET_CODE (x) == POST_MODIFY
3778 && GET_CODE (addend) == REG)
3779 return 0;
3780
3781 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
3782 && arm_legitimate_index_p (mode, addend, outer, strict_p));
3783 }
3784
3785 /* After reload constants split into minipools will have addresses
3786 from a LABEL_REF. */
3787 else if (reload_completed
3788 && (code == LABEL_REF
3789 || (code == CONST
3790 && GET_CODE (XEXP (x, 0)) == PLUS
3791 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3792 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3793 return 1;
3794
3795 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
3796 return 0;
3797
3798 else if (code == PLUS)
3799 {
3800 rtx xop0 = XEXP (x, 0);
3801 rtx xop1 = XEXP (x, 1);
3802
3803 return ((arm_address_register_rtx_p (xop0, strict_p)
3804 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
3805 || (arm_address_register_rtx_p (xop1, strict_p)
3806 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
3807 }
3808
3809 #if 0
3810 /* Reload currently can't handle MINUS, so disable this for now */
3811 else if (GET_CODE (x) == MINUS)
3812 {
3813 rtx xop0 = XEXP (x, 0);
3814 rtx xop1 = XEXP (x, 1);
3815
3816 return (arm_address_register_rtx_p (xop0, strict_p)
3817 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
3818 }
3819 #endif
3820
3821 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3822 && code == SYMBOL_REF
3823 && CONSTANT_POOL_ADDRESS_P (x)
3824 && ! (flag_pic
3825 && symbol_mentioned_p (get_pool_constant (x))
3826 && ! pcrel_constant_p (get_pool_constant (x))))
3827 return 1;
3828
3829 return 0;
3830 }
3831
3832 /* Return nonzero if X is a valid Thumb-2 address operand. */
3833 int
3834 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
3835 {
3836 bool use_ldrd;
3837 enum rtx_code code = GET_CODE (x);
3838
3839 if (arm_address_register_rtx_p (x, strict_p))
3840 return 1;
3841
3842 use_ldrd = (TARGET_LDRD
3843 && (mode == DImode
3844 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3845
3846 if (code == POST_INC || code == PRE_DEC
3847 || ((code == PRE_INC || code == POST_DEC)
3848 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3849 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3850
3851 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3852 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3853 && GET_CODE (XEXP (x, 1)) == PLUS
3854 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3855 {
3856 /* Thumb-2 only has autoincrement by constant. */
3857 rtx addend = XEXP (XEXP (x, 1), 1);
3858 HOST_WIDE_INT offset;
3859
3860 if (GET_CODE (addend) != CONST_INT)
3861 return 0;
3862
3863 offset = INTVAL(addend);
3864 if (GET_MODE_SIZE (mode) <= 4)
3865 return (offset > -256 && offset < 256);
3866
3867 return (use_ldrd && offset > -1024 && offset < 1024
3868 && (offset & 3) == 0);
3869 }
3870
3871 /* After reload constants split into minipools will have addresses
3872 from a LABEL_REF. */
3873 else if (reload_completed
3874 && (code == LABEL_REF
3875 || (code == CONST
3876 && GET_CODE (XEXP (x, 0)) == PLUS
3877 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3878 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3879 return 1;
3880
3881 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
3882 return 0;
3883
3884 else if (code == PLUS)
3885 {
3886 rtx xop0 = XEXP (x, 0);
3887 rtx xop1 = XEXP (x, 1);
3888
3889 return ((arm_address_register_rtx_p (xop0, strict_p)
3890 && thumb2_legitimate_index_p (mode, xop1, strict_p))
3891 || (arm_address_register_rtx_p (xop1, strict_p)
3892 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
3893 }
3894
3895 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3896 && code == SYMBOL_REF
3897 && CONSTANT_POOL_ADDRESS_P (x)
3898 && ! (flag_pic
3899 && symbol_mentioned_p (get_pool_constant (x))
3900 && ! pcrel_constant_p (get_pool_constant (x))))
3901 return 1;
3902
3903 return 0;
3904 }
3905
3906 /* Return nonzero if INDEX is valid for an address index operand in
3907 ARM state. */
3908 static int
3909 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
3910 int strict_p)
3911 {
3912 HOST_WIDE_INT range;
3913 enum rtx_code code = GET_CODE (index);
3914
3915 /* Standard coprocessor addressing modes. */
3916 if (TARGET_HARD_FLOAT
3917 && (TARGET_FPA || TARGET_MAVERICK)
3918 && (GET_MODE_CLASS (mode) == MODE_FLOAT
3919 || (TARGET_MAVERICK && mode == DImode)))
3920 return (code == CONST_INT && INTVAL (index) < 1024
3921 && INTVAL (index) > -1024
3922 && (INTVAL (index) & 3) == 0);
3923
3924 if (TARGET_NEON
3925 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
3926 return (code == CONST_INT
3927 && INTVAL (index) < 1016
3928 && INTVAL (index) > -1024
3929 && (INTVAL (index) & 3) == 0);
3930
3931 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
3932 return (code == CONST_INT
3933 && INTVAL (index) < 1024
3934 && INTVAL (index) > -1024
3935 && (INTVAL (index) & 3) == 0);
3936
3937 if (arm_address_register_rtx_p (index, strict_p)
3938 && (GET_MODE_SIZE (mode) <= 4))
3939 return 1;
3940
3941 if (mode == DImode || mode == DFmode)
3942 {
3943 if (code == CONST_INT)
3944 {
3945 HOST_WIDE_INT val = INTVAL (index);
3946
3947 if (TARGET_LDRD)
3948 return val > -256 && val < 256;
3949 else
3950 return val > -4096 && val < 4092;
3951 }
3952
3953 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
3954 }
3955
3956 if (GET_MODE_SIZE (mode) <= 4
3957 && ! (arm_arch4
3958 && (mode == HImode
3959 || (mode == QImode && outer == SIGN_EXTEND))))
3960 {
3961 if (code == MULT)
3962 {
3963 rtx xiop0 = XEXP (index, 0);
3964 rtx xiop1 = XEXP (index, 1);
3965
3966 return ((arm_address_register_rtx_p (xiop0, strict_p)
3967 && power_of_two_operand (xiop1, SImode))
3968 || (arm_address_register_rtx_p (xiop1, strict_p)
3969 && power_of_two_operand (xiop0, SImode)));
3970 }
3971 else if (code == LSHIFTRT || code == ASHIFTRT
3972 || code == ASHIFT || code == ROTATERT)
3973 {
3974 rtx op = XEXP (index, 1);
3975
3976 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
3977 && GET_CODE (op) == CONST_INT
3978 && INTVAL (op) > 0
3979 && INTVAL (op) <= 31);
3980 }
3981 }
3982
3983 /* For ARM v4 we may be doing a sign-extend operation during the
3984 load. */
3985 if (arm_arch4)
3986 {
3987 if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode))
3988 range = 256;
3989 else
3990 range = 4096;
3991 }
3992 else
3993 range = (mode == HImode) ? 4095 : 4096;
3994
3995 return (code == CONST_INT
3996 && INTVAL (index) < range
3997 && INTVAL (index) > -range);
3998 }
3999
4000 /* Return true if OP is a valid index scaling factor for Thumb-2 address
4001 index operand. i.e. 1, 2, 4 or 8. */
4002 static bool
4003 thumb2_index_mul_operand (rtx op)
4004 {
4005 HOST_WIDE_INT val;
4006
4007 if (GET_CODE(op) != CONST_INT)
4008 return false;
4009
4010 val = INTVAL(op);
4011 return (val == 1 || val == 2 || val == 4 || val == 8);
4012 }
4013
4014 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
4015 static int
4016 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
4017 {
4018 enum rtx_code code = GET_CODE (index);
4019
4020 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
4021 /* Standard coprocessor addressing modes. */
4022 if (TARGET_HARD_FLOAT
4023 && (TARGET_FPA || TARGET_MAVERICK)
4024 && (GET_MODE_CLASS (mode) == MODE_FLOAT
4025 || (TARGET_MAVERICK && mode == DImode)))
4026 return (code == CONST_INT && INTVAL (index) < 1024
4027 && INTVAL (index) > -1024
4028 && (INTVAL (index) & 3) == 0);
4029
4030 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
4031 {
4032 /* For DImode assume values will usually live in core regs
4033 and only allow LDRD addressing modes. */
4034 if (!TARGET_LDRD || mode != DImode)
4035 return (code == CONST_INT
4036 && INTVAL (index) < 1024
4037 && INTVAL (index) > -1024
4038 && (INTVAL (index) & 3) == 0);
4039 }
4040
4041 if (TARGET_NEON
4042 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
4043 return (code == CONST_INT
4044 && INTVAL (index) < 1016
4045 && INTVAL (index) > -1024
4046 && (INTVAL (index) & 3) == 0);
4047
4048 if (arm_address_register_rtx_p (index, strict_p)
4049 && (GET_MODE_SIZE (mode) <= 4))
4050 return 1;
4051
4052 if (mode == DImode || mode == DFmode)
4053 {
4054 HOST_WIDE_INT val = INTVAL (index);
4055 /* ??? Can we assume ldrd for thumb2? */
4056 /* Thumb-2 ldrd only has reg+const addressing modes. */
4057 if (code != CONST_INT)
4058 return 0;
4059
4060 /* ldrd supports offsets of +-1020.
4061 However the ldr fallback does not. */
4062 return val > -256 && val < 256 && (val & 3) == 0;
4063 }
4064
4065 if (code == MULT)
4066 {
4067 rtx xiop0 = XEXP (index, 0);
4068 rtx xiop1 = XEXP (index, 1);
4069
4070 return ((arm_address_register_rtx_p (xiop0, strict_p)
4071 && thumb2_index_mul_operand (xiop1))
4072 || (arm_address_register_rtx_p (xiop1, strict_p)
4073 && thumb2_index_mul_operand (xiop0)));
4074 }
4075 else if (code == ASHIFT)
4076 {
4077 rtx op = XEXP (index, 1);
4078
4079 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
4080 && GET_CODE (op) == CONST_INT
4081 && INTVAL (op) > 0
4082 && INTVAL (op) <= 3);
4083 }
4084
4085 return (code == CONST_INT
4086 && INTVAL (index) < 4096
4087 && INTVAL (index) > -256);
4088 }
4089
4090 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
4091 static int
4092 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
4093 {
4094 int regno;
4095
4096 if (GET_CODE (x) != REG)
4097 return 0;
4098
4099 regno = REGNO (x);
4100
4101 if (strict_p)
4102 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
4103
4104 return (regno <= LAST_LO_REGNUM
4105 || regno > LAST_VIRTUAL_REGISTER
4106 || regno == FRAME_POINTER_REGNUM
4107 || (GET_MODE_SIZE (mode) >= 4
4108 && (regno == STACK_POINTER_REGNUM
4109 || regno >= FIRST_PSEUDO_REGISTER
4110 || x == hard_frame_pointer_rtx
4111 || x == arg_pointer_rtx)));
4112 }
4113
4114 /* Return nonzero if x is a legitimate index register. This is the case
4115 for any base register that can access a QImode object. */
4116 inline static int
4117 thumb1_index_register_rtx_p (rtx x, int strict_p)
4118 {
4119 return thumb1_base_register_rtx_p (x, QImode, strict_p);
4120 }
4121
4122 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
4123
4124 The AP may be eliminated to either the SP or the FP, so we use the
4125 least common denominator, e.g. SImode, and offsets from 0 to 64.
4126
4127 ??? Verify whether the above is the right approach.
4128
4129 ??? Also, the FP may be eliminated to the SP, so perhaps that
4130 needs special handling also.
4131
4132 ??? Look at how the mips16 port solves this problem. It probably uses
4133 better ways to solve some of these problems.
4134
4135 Although it is not incorrect, we don't accept QImode and HImode
4136 addresses based on the frame pointer or arg pointer until the
4137 reload pass starts. This is so that eliminating such addresses
4138 into stack based ones won't produce impossible code. */
4139 int
4140 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
4141 {
4142 /* ??? Not clear if this is right. Experiment. */
4143 if (GET_MODE_SIZE (mode) < 4
4144 && !(reload_in_progress || reload_completed)
4145 && (reg_mentioned_p (frame_pointer_rtx, x)
4146 || reg_mentioned_p (arg_pointer_rtx, x)
4147 || reg_mentioned_p (virtual_incoming_args_rtx, x)
4148 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
4149 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
4150 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
4151 return 0;
4152
4153 /* Accept any base register. SP only in SImode or larger. */
4154 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
4155 return 1;
4156
4157 /* This is PC relative data before arm_reorg runs. */
4158 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
4159 && GET_CODE (x) == SYMBOL_REF
4160 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
4161 return 1;
4162
4163 /* This is PC relative data after arm_reorg runs. */
4164 else if (GET_MODE_SIZE (mode) >= 4 && reload_completed
4165 && (GET_CODE (x) == LABEL_REF
4166 || (GET_CODE (x) == CONST
4167 && GET_CODE (XEXP (x, 0)) == PLUS
4168 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4169 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
4170 return 1;
4171
4172 /* Post-inc indexing only supported for SImode and larger. */
4173 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
4174 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
4175 return 1;
4176
4177 else if (GET_CODE (x) == PLUS)
4178 {
4179 /* REG+REG address can be any two index registers. */
4180 /* We disallow FRAME+REG addressing since we know that FRAME
4181 will be replaced with STACK, and SP relative addressing only
4182 permits SP+OFFSET. */
4183 if (GET_MODE_SIZE (mode) <= 4
4184 && XEXP (x, 0) != frame_pointer_rtx
4185 && XEXP (x, 1) != frame_pointer_rtx
4186 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4187 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
4188 return 1;
4189
4190 /* REG+const has 5-7 bit offset for non-SP registers. */
4191 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4192 || XEXP (x, 0) == arg_pointer_rtx)
4193 && GET_CODE (XEXP (x, 1)) == CONST_INT
4194 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4195 return 1;
4196
4197 /* REG+const has 10-bit offset for SP, but only SImode and
4198 larger is supported. */
4199 /* ??? Should probably check for DI/DFmode overflow here
4200 just like GO_IF_LEGITIMATE_OFFSET does. */
4201 else if (GET_CODE (XEXP (x, 0)) == REG
4202 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
4203 && GET_MODE_SIZE (mode) >= 4
4204 && GET_CODE (XEXP (x, 1)) == CONST_INT
4205 && INTVAL (XEXP (x, 1)) >= 0
4206 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
4207 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4208 return 1;
4209
4210 else if (GET_CODE (XEXP (x, 0)) == REG
4211 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
4212 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
4213 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
4214 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
4215 && GET_MODE_SIZE (mode) >= 4
4216 && GET_CODE (XEXP (x, 1)) == CONST_INT
4217 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4218 return 1;
4219 }
4220
4221 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
4222 && GET_MODE_SIZE (mode) == 4
4223 && GET_CODE (x) == SYMBOL_REF
4224 && CONSTANT_POOL_ADDRESS_P (x)
4225 && ! (flag_pic
4226 && symbol_mentioned_p (get_pool_constant (x))
4227 && ! pcrel_constant_p (get_pool_constant (x))))
4228 return 1;
4229
4230 return 0;
4231 }
4232
4233 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
4234 instruction of mode MODE. */
4235 int
4236 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
4237 {
4238 switch (GET_MODE_SIZE (mode))
4239 {
4240 case 1:
4241 return val >= 0 && val < 32;
4242
4243 case 2:
4244 return val >= 0 && val < 64 && (val & 1) == 0;
4245
4246 default:
4247 return (val >= 0
4248 && (val + GET_MODE_SIZE (mode)) <= 128
4249 && (val & 3) == 0);
4250 }
4251 }
4252
4253 /* Build the SYMBOL_REF for __tls_get_addr. */
4254
4255 static GTY(()) rtx tls_get_addr_libfunc;
4256
4257 static rtx
4258 get_tls_get_addr (void)
4259 {
4260 if (!tls_get_addr_libfunc)
4261 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
4262 return tls_get_addr_libfunc;
4263 }
4264
4265 static rtx
4266 arm_load_tp (rtx target)
4267 {
4268 if (!target)
4269 target = gen_reg_rtx (SImode);
4270
4271 if (TARGET_HARD_TP)
4272 {
4273 /* Can return in any reg. */
4274 emit_insn (gen_load_tp_hard (target));
4275 }
4276 else
4277 {
4278 /* Always returned in r0. Immediately copy the result into a pseudo,
4279 otherwise other uses of r0 (e.g. setting up function arguments) may
4280 clobber the value. */
4281
4282 rtx tmp;
4283
4284 emit_insn (gen_load_tp_soft ());
4285
4286 tmp = gen_rtx_REG (SImode, 0);
4287 emit_move_insn (target, tmp);
4288 }
4289 return target;
4290 }
4291
4292 static rtx
4293 load_tls_operand (rtx x, rtx reg)
4294 {
4295 rtx tmp;
4296
4297 if (reg == NULL_RTX)
4298 reg = gen_reg_rtx (SImode);
4299
4300 tmp = gen_rtx_CONST (SImode, x);
4301
4302 emit_move_insn (reg, tmp);
4303
4304 return reg;
4305 }
4306
4307 static rtx
4308 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
4309 {
4310 rtx insns, label, labelno, sum;
4311
4312 start_sequence ();
4313
4314 labelno = GEN_INT (pic_labelno++);
4315 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4316 label = gen_rtx_CONST (VOIDmode, label);
4317
4318 sum = gen_rtx_UNSPEC (Pmode,
4319 gen_rtvec (4, x, GEN_INT (reloc), label,
4320 GEN_INT (TARGET_ARM ? 8 : 4)),
4321 UNSPEC_TLS);
4322 reg = load_tls_operand (sum, reg);
4323
4324 if (TARGET_ARM)
4325 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
4326 else if (TARGET_THUMB2)
4327 {
4328 rtx tmp;
4329 /* Thumb-2 only allows very limited access to the PC. Calculate
4330 the address in a temporary register. */
4331 tmp = gen_reg_rtx (SImode);
4332 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4333 emit_insn (gen_addsi3(reg, reg, tmp));
4334 }
4335 else /* TARGET_THUMB1 */
4336 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4337
4338 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
4339 Pmode, 1, reg, Pmode);
4340
4341 insns = get_insns ();
4342 end_sequence ();
4343
4344 return insns;
4345 }
4346
4347 rtx
4348 legitimize_tls_address (rtx x, rtx reg)
4349 {
4350 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
4351 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
4352
4353 switch (model)
4354 {
4355 case TLS_MODEL_GLOBAL_DYNAMIC:
4356 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
4357 dest = gen_reg_rtx (Pmode);
4358 emit_libcall_block (insns, dest, ret, x);
4359 return dest;
4360
4361 case TLS_MODEL_LOCAL_DYNAMIC:
4362 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
4363
4364 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
4365 share the LDM result with other LD model accesses. */
4366 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
4367 UNSPEC_TLS);
4368 dest = gen_reg_rtx (Pmode);
4369 emit_libcall_block (insns, dest, ret, eqv);
4370
4371 /* Load the addend. */
4372 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
4373 UNSPEC_TLS);
4374 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
4375 return gen_rtx_PLUS (Pmode, dest, addend);
4376
4377 case TLS_MODEL_INITIAL_EXEC:
4378 labelno = GEN_INT (pic_labelno++);
4379 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4380 label = gen_rtx_CONST (VOIDmode, label);
4381 sum = gen_rtx_UNSPEC (Pmode,
4382 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
4383 GEN_INT (TARGET_ARM ? 8 : 4)),
4384 UNSPEC_TLS);
4385 reg = load_tls_operand (sum, reg);
4386
4387 if (TARGET_ARM)
4388 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
4389 else if (TARGET_THUMB2)
4390 {
4391 rtx tmp;
4392 /* Thumb-2 only allows very limited access to the PC. Calculate
4393 the address in a temporary register. */
4394 tmp = gen_reg_rtx (SImode);
4395 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4396 emit_insn (gen_addsi3(reg, reg, tmp));
4397 emit_move_insn (reg, gen_const_mem (SImode, reg));
4398 }
4399 else
4400 {
4401 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4402 emit_move_insn (reg, gen_const_mem (SImode, reg));
4403 }
4404
4405 tp = arm_load_tp (NULL_RTX);
4406
4407 return gen_rtx_PLUS (Pmode, tp, reg);
4408
4409 case TLS_MODEL_LOCAL_EXEC:
4410 tp = arm_load_tp (NULL_RTX);
4411
4412 reg = gen_rtx_UNSPEC (Pmode,
4413 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
4414 UNSPEC_TLS);
4415 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
4416
4417 return gen_rtx_PLUS (Pmode, tp, reg);
4418
4419 default:
4420 abort ();
4421 }
4422 }
4423
4424 /* Try machine-dependent ways of modifying an illegitimate address
4425 to be legitimate. If we find one, return the new, valid address. */
4426 rtx
4427 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4428 {
4429 if (arm_tls_symbol_p (x))
4430 return legitimize_tls_address (x, NULL_RTX);
4431
4432 if (GET_CODE (x) == PLUS)
4433 {
4434 rtx xop0 = XEXP (x, 0);
4435 rtx xop1 = XEXP (x, 1);
4436
4437 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
4438 xop0 = force_reg (SImode, xop0);
4439
4440 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
4441 xop1 = force_reg (SImode, xop1);
4442
4443 if (ARM_BASE_REGISTER_RTX_P (xop0)
4444 && GET_CODE (xop1) == CONST_INT)
4445 {
4446 HOST_WIDE_INT n, low_n;
4447 rtx base_reg, val;
4448 n = INTVAL (xop1);
4449
4450 /* VFP addressing modes actually allow greater offsets, but for
4451 now we just stick with the lowest common denominator. */
4452 if (mode == DImode
4453 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
4454 {
4455 low_n = n & 0x0f;
4456 n &= ~0x0f;
4457 if (low_n > 4)
4458 {
4459 n += 16;
4460 low_n -= 16;
4461 }
4462 }
4463 else
4464 {
4465 low_n = ((mode) == TImode ? 0
4466 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
4467 n -= low_n;
4468 }
4469
4470 base_reg = gen_reg_rtx (SImode);
4471 val = force_operand (plus_constant (xop0, n), NULL_RTX);
4472 emit_move_insn (base_reg, val);
4473 x = plus_constant (base_reg, low_n);
4474 }
4475 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4476 x = gen_rtx_PLUS (SImode, xop0, xop1);
4477 }
4478
4479 /* XXX We don't allow MINUS any more -- see comment in
4480 arm_legitimate_address_p (). */
4481 else if (GET_CODE (x) == MINUS)
4482 {
4483 rtx xop0 = XEXP (x, 0);
4484 rtx xop1 = XEXP (x, 1);
4485
4486 if (CONSTANT_P (xop0))
4487 xop0 = force_reg (SImode, xop0);
4488
4489 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
4490 xop1 = force_reg (SImode, xop1);
4491
4492 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4493 x = gen_rtx_MINUS (SImode, xop0, xop1);
4494 }
4495
4496 /* Make sure to take full advantage of the pre-indexed addressing mode
4497 with absolute addresses which often allows for the base register to
4498 be factorized for multiple adjacent memory references, and it might
4499 even allows for the mini pool to be avoided entirely. */
4500 else if (GET_CODE (x) == CONST_INT && optimize > 0)
4501 {
4502 unsigned int bits;
4503 HOST_WIDE_INT mask, base, index;
4504 rtx base_reg;
4505
4506 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
4507 use a 8-bit index. So let's use a 12-bit index for SImode only and
4508 hope that arm_gen_constant will enable ldrb to use more bits. */
4509 bits = (mode == SImode) ? 12 : 8;
4510 mask = (1 << bits) - 1;
4511 base = INTVAL (x) & ~mask;
4512 index = INTVAL (x) & mask;
4513 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
4514 {
4515 /* It'll most probably be more efficient to generate the base
4516 with more bits set and use a negative index instead. */
4517 base |= mask;
4518 index -= mask;
4519 }
4520 base_reg = force_reg (SImode, GEN_INT (base));
4521 x = plus_constant (base_reg, index);
4522 }
4523
4524 if (flag_pic)
4525 {
4526 /* We need to find and carefully transform any SYMBOL and LABEL
4527 references; so go back to the original address expression. */
4528 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4529
4530 if (new_x != orig_x)
4531 x = new_x;
4532 }
4533
4534 return x;
4535 }
4536
4537
4538 /* Try machine-dependent ways of modifying an illegitimate Thumb address
4539 to be legitimate. If we find one, return the new, valid address. */
4540 rtx
4541 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4542 {
4543 if (arm_tls_symbol_p (x))
4544 return legitimize_tls_address (x, NULL_RTX);
4545
4546 if (GET_CODE (x) == PLUS
4547 && GET_CODE (XEXP (x, 1)) == CONST_INT
4548 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
4549 || INTVAL (XEXP (x, 1)) < 0))
4550 {
4551 rtx xop0 = XEXP (x, 0);
4552 rtx xop1 = XEXP (x, 1);
4553 HOST_WIDE_INT offset = INTVAL (xop1);
4554
4555 /* Try and fold the offset into a biasing of the base register and
4556 then offsetting that. Don't do this when optimizing for space
4557 since it can cause too many CSEs. */
4558 if (optimize_size && offset >= 0
4559 && offset < 256 + 31 * GET_MODE_SIZE (mode))
4560 {
4561 HOST_WIDE_INT delta;
4562
4563 if (offset >= 256)
4564 delta = offset - (256 - GET_MODE_SIZE (mode));
4565 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
4566 delta = 31 * GET_MODE_SIZE (mode);
4567 else
4568 delta = offset & (~31 * GET_MODE_SIZE (mode));
4569
4570 xop0 = force_operand (plus_constant (xop0, offset - delta),
4571 NULL_RTX);
4572 x = plus_constant (xop0, delta);
4573 }
4574 else if (offset < 0 && offset > -256)
4575 /* Small negative offsets are best done with a subtract before the
4576 dereference, forcing these into a register normally takes two
4577 instructions. */
4578 x = force_operand (x, NULL_RTX);
4579 else
4580 {
4581 /* For the remaining cases, force the constant into a register. */
4582 xop1 = force_reg (SImode, xop1);
4583 x = gen_rtx_PLUS (SImode, xop0, xop1);
4584 }
4585 }
4586 else if (GET_CODE (x) == PLUS
4587 && s_register_operand (XEXP (x, 1), SImode)
4588 && !s_register_operand (XEXP (x, 0), SImode))
4589 {
4590 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
4591
4592 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
4593 }
4594
4595 if (flag_pic)
4596 {
4597 /* We need to find and carefully transform any SYMBOL and LABEL
4598 references; so go back to the original address expression. */
4599 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4600
4601 if (new_x != orig_x)
4602 x = new_x;
4603 }
4604
4605 return x;
4606 }
4607
4608 rtx
4609 thumb_legitimize_reload_address (rtx *x_p,
4610 enum machine_mode mode,
4611 int opnum, int type,
4612 int ind_levels ATTRIBUTE_UNUSED)
4613 {
4614 rtx x = *x_p;
4615
4616 if (GET_CODE (x) == PLUS
4617 && GET_MODE_SIZE (mode) < 4
4618 && REG_P (XEXP (x, 0))
4619 && XEXP (x, 0) == stack_pointer_rtx
4620 && GET_CODE (XEXP (x, 1)) == CONST_INT
4621 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4622 {
4623 rtx orig_x = x;
4624
4625 x = copy_rtx (x);
4626 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4627 Pmode, VOIDmode, 0, 0, opnum, type);
4628 return x;
4629 }
4630
4631 /* If both registers are hi-regs, then it's better to reload the
4632 entire expression rather than each register individually. That
4633 only requires one reload register rather than two. */
4634 if (GET_CODE (x) == PLUS
4635 && REG_P (XEXP (x, 0))
4636 && REG_P (XEXP (x, 1))
4637 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
4638 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
4639 {
4640 rtx orig_x = x;
4641
4642 x = copy_rtx (x);
4643 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4644 Pmode, VOIDmode, 0, 0, opnum, type);
4645 return x;
4646 }
4647
4648 return NULL;
4649 }
4650
4651 /* Test for various thread-local symbols. */
4652
4653 /* Return TRUE if X is a thread-local symbol. */
4654
4655 static bool
4656 arm_tls_symbol_p (rtx x)
4657 {
4658 if (! TARGET_HAVE_TLS)
4659 return false;
4660
4661 if (GET_CODE (x) != SYMBOL_REF)
4662 return false;
4663
4664 return SYMBOL_REF_TLS_MODEL (x) != 0;
4665 }
4666
4667 /* Helper for arm_tls_referenced_p. */
4668
4669 static int
4670 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
4671 {
4672 if (GET_CODE (*x) == SYMBOL_REF)
4673 return SYMBOL_REF_TLS_MODEL (*x) != 0;
4674
4675 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
4676 TLS offsets, not real symbol references. */
4677 if (GET_CODE (*x) == UNSPEC
4678 && XINT (*x, 1) == UNSPEC_TLS)
4679 return -1;
4680
4681 return 0;
4682 }
4683
4684 /* Return TRUE if X contains any TLS symbol references. */
4685
4686 bool
4687 arm_tls_referenced_p (rtx x)
4688 {
4689 if (! TARGET_HAVE_TLS)
4690 return false;
4691
4692 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
4693 }
4694
4695 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
4696
4697 bool
4698 arm_cannot_force_const_mem (rtx x)
4699 {
4700 rtx base, offset;
4701
4702 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
4703 {
4704 split_const (x, &base, &offset);
4705 if (GET_CODE (base) == SYMBOL_REF
4706 && !offset_within_block_p (base, INTVAL (offset)))
4707 return true;
4708 }
4709 return arm_tls_referenced_p (x);
4710 }
4711 \f
4712 #define REG_OR_SUBREG_REG(X) \
4713 (GET_CODE (X) == REG \
4714 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
4715
4716 #define REG_OR_SUBREG_RTX(X) \
4717 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
4718
4719 #ifndef COSTS_N_INSNS
4720 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
4721 #endif
4722 static inline int
4723 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
4724 {
4725 enum machine_mode mode = GET_MODE (x);
4726
4727 switch (code)
4728 {
4729 case ASHIFT:
4730 case ASHIFTRT:
4731 case LSHIFTRT:
4732 case ROTATERT:
4733 case PLUS:
4734 case MINUS:
4735 case COMPARE:
4736 case NEG:
4737 case NOT:
4738 return COSTS_N_INSNS (1);
4739
4740 case MULT:
4741 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4742 {
4743 int cycles = 0;
4744 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
4745
4746 while (i)
4747 {
4748 i >>= 2;
4749 cycles++;
4750 }
4751 return COSTS_N_INSNS (2) + cycles;
4752 }
4753 return COSTS_N_INSNS (1) + 16;
4754
4755 case SET:
4756 return (COSTS_N_INSNS (1)
4757 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
4758 + GET_CODE (SET_DEST (x)) == MEM));
4759
4760 case CONST_INT:
4761 if (outer == SET)
4762 {
4763 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
4764 return 0;
4765 if (thumb_shiftable_const (INTVAL (x)))
4766 return COSTS_N_INSNS (2);
4767 return COSTS_N_INSNS (3);
4768 }
4769 else if ((outer == PLUS || outer == COMPARE)
4770 && INTVAL (x) < 256 && INTVAL (x) > -256)
4771 return 0;
4772 else if (outer == AND
4773 && INTVAL (x) < 256 && INTVAL (x) >= -256)
4774 return COSTS_N_INSNS (1);
4775 else if (outer == ASHIFT || outer == ASHIFTRT
4776 || outer == LSHIFTRT)
4777 return 0;
4778 return COSTS_N_INSNS (2);
4779
4780 case CONST:
4781 case CONST_DOUBLE:
4782 case LABEL_REF:
4783 case SYMBOL_REF:
4784 return COSTS_N_INSNS (3);
4785
4786 case UDIV:
4787 case UMOD:
4788 case DIV:
4789 case MOD:
4790 return 100;
4791
4792 case TRUNCATE:
4793 return 99;
4794
4795 case AND:
4796 case XOR:
4797 case IOR:
4798 /* XXX guess. */
4799 return 8;
4800
4801 case MEM:
4802 /* XXX another guess. */
4803 /* Memory costs quite a lot for the first word, but subsequent words
4804 load at the equivalent of a single insn each. */
4805 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4806 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
4807 ? 4 : 0));
4808
4809 case IF_THEN_ELSE:
4810 /* XXX a guess. */
4811 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
4812 return 14;
4813 return 2;
4814
4815 case ZERO_EXTEND:
4816 /* XXX still guessing. */
4817 switch (GET_MODE (XEXP (x, 0)))
4818 {
4819 case QImode:
4820 return (1 + (mode == DImode ? 4 : 0)
4821 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4822
4823 case HImode:
4824 return (4 + (mode == DImode ? 4 : 0)
4825 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4826
4827 case SImode:
4828 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4829
4830 default:
4831 return 99;
4832 }
4833
4834 default:
4835 return 99;
4836 }
4837 }
4838
4839
4840 /* Worker routine for arm_rtx_costs. */
4841 /* ??? This needs updating for thumb2. */
4842 static inline int
4843 arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
4844 {
4845 enum machine_mode mode = GET_MODE (x);
4846 enum rtx_code subcode;
4847 int extra_cost;
4848
4849 switch (code)
4850 {
4851 case MEM:
4852 /* Memory costs quite a lot for the first word, but subsequent words
4853 load at the equivalent of a single insn each. */
4854 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4855 + (GET_CODE (x) == SYMBOL_REF
4856 && CONSTANT_POOL_ADDRESS_P (x) ? 4 : 0));
4857
4858 case DIV:
4859 case MOD:
4860 case UDIV:
4861 case UMOD:
4862 return optimize_size ? COSTS_N_INSNS (2) : 100;
4863
4864 case ROTATE:
4865 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
4866 return 4;
4867 /* Fall through */
4868 case ROTATERT:
4869 if (mode != SImode)
4870 return 8;
4871 /* Fall through */
4872 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
4873 if (mode == DImode)
4874 return (8 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : 8)
4875 + ((GET_CODE (XEXP (x, 0)) == REG
4876 || (GET_CODE (XEXP (x, 0)) == SUBREG
4877 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4878 ? 0 : 8));
4879 return (1 + ((GET_CODE (XEXP (x, 0)) == REG
4880 || (GET_CODE (XEXP (x, 0)) == SUBREG
4881 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4882 ? 0 : 4)
4883 + ((GET_CODE (XEXP (x, 1)) == REG
4884 || (GET_CODE (XEXP (x, 1)) == SUBREG
4885 && GET_CODE (SUBREG_REG (XEXP (x, 1))) == REG)
4886 || (GET_CODE (XEXP (x, 1)) == CONST_INT))
4887 ? 0 : 4));
4888
4889 case MINUS:
4890 if (GET_CODE (XEXP (x, 1)) == MULT && mode == SImode && arm_arch_thumb2)
4891 {
4892 extra_cost = rtx_cost (XEXP (x, 1), code);
4893 if (!REG_OR_SUBREG_REG (XEXP (x, 0)))
4894 extra_cost += 4 * ARM_NUM_REGS (mode);
4895 return extra_cost;
4896 }
4897
4898 if (mode == DImode)
4899 return (4 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 8)
4900 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4901 || (GET_CODE (XEXP (x, 0)) == CONST_INT
4902 && const_ok_for_arm (INTVAL (XEXP (x, 0)))))
4903 ? 0 : 8));
4904
4905 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4906 return (2 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4907 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4908 && arm_const_double_rtx (XEXP (x, 1))))
4909 ? 0 : 8)
4910 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4911 || (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
4912 && arm_const_double_rtx (XEXP (x, 0))))
4913 ? 0 : 8));
4914
4915 if (((GET_CODE (XEXP (x, 0)) == CONST_INT
4916 && const_ok_for_arm (INTVAL (XEXP (x, 0)))
4917 && REG_OR_SUBREG_REG (XEXP (x, 1))))
4918 || (((subcode = GET_CODE (XEXP (x, 1))) == ASHIFT
4919 || subcode == ASHIFTRT || subcode == LSHIFTRT
4920 || subcode == ROTATE || subcode == ROTATERT
4921 || (subcode == MULT
4922 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
4923 && ((INTVAL (XEXP (XEXP (x, 1), 1)) &
4924 (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0)))
4925 && REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 0))
4926 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 1))
4927 || GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
4928 && REG_OR_SUBREG_REG (XEXP (x, 0))))
4929 return 1;
4930 /* Fall through */
4931
4932 case PLUS:
4933 if (GET_CODE (XEXP (x, 0)) == MULT)
4934 {
4935 extra_cost = rtx_cost (XEXP (x, 0), code);
4936 if (!REG_OR_SUBREG_REG (XEXP (x, 1)))
4937 extra_cost += 4 * ARM_NUM_REGS (mode);
4938 return extra_cost;
4939 }
4940
4941 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4942 return (2 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4943 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4944 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4945 && arm_const_double_rtx (XEXP (x, 1))))
4946 ? 0 : 8));
4947
4948 /* Fall through */
4949 case AND: case XOR: case IOR:
4950 extra_cost = 0;
4951
4952 /* Normally the frame registers will be spilt into reg+const during
4953 reload, so it is a bad idea to combine them with other instructions,
4954 since then they might not be moved outside of loops. As a compromise
4955 we allow integration with ops that have a constant as their second
4956 operand. */
4957 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
4958 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
4959 && GET_CODE (XEXP (x, 1)) != CONST_INT)
4960 || (REG_OR_SUBREG_REG (XEXP (x, 0))
4961 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
4962 extra_cost = 4;
4963
4964 if (mode == DImode)
4965 return (4 + extra_cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4966 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4967 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4968 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4969 ? 0 : 8));
4970
4971 if (REG_OR_SUBREG_REG (XEXP (x, 0)))
4972 return (1 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : extra_cost)
4973 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4974 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4975 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4976 ? 0 : 4));
4977
4978 else if (REG_OR_SUBREG_REG (XEXP (x, 1)))
4979 return (1 + extra_cost
4980 + ((((subcode = GET_CODE (XEXP (x, 0))) == ASHIFT
4981 || subcode == LSHIFTRT || subcode == ASHIFTRT
4982 || subcode == ROTATE || subcode == ROTATERT
4983 || (subcode == MULT
4984 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4985 && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
4986 (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
4987 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 0)))
4988 && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1)))
4989 || GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))
4990 ? 0 : 4));
4991
4992 return 8;
4993
4994 case MULT:
4995 /* This should have been handled by the CPU specific routines. */
4996 gcc_unreachable ();
4997
4998 case TRUNCATE:
4999 if (arm_arch3m && mode == SImode
5000 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
5001 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5002 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
5003 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
5004 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
5005 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
5006 return 8;
5007 return 99;
5008
5009 case NEG:
5010 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5011 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 6);
5012 /* Fall through */
5013 case NOT:
5014 if (mode == DImode)
5015 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
5016
5017 return 1 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
5018
5019 case IF_THEN_ELSE:
5020 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
5021 return 14;
5022 return 2;
5023
5024 case COMPARE:
5025 return 1;
5026
5027 case ABS:
5028 return 4 + (mode == DImode ? 4 : 0);
5029
5030 case SIGN_EXTEND:
5031 /* ??? value extensions are cheaper on armv6. */
5032 if (GET_MODE (XEXP (x, 0)) == QImode)
5033 return (4 + (mode == DImode ? 4 : 0)
5034 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5035 /* Fall through */
5036 case ZERO_EXTEND:
5037 switch (GET_MODE (XEXP (x, 0)))
5038 {
5039 case QImode:
5040 return (1 + (mode == DImode ? 4 : 0)
5041 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5042
5043 case HImode:
5044 return (4 + (mode == DImode ? 4 : 0)
5045 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5046
5047 case SImode:
5048 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5049
5050 case V8QImode:
5051 case V4HImode:
5052 case V2SImode:
5053 case V4QImode:
5054 case V2HImode:
5055 return 1;
5056
5057 default:
5058 gcc_unreachable ();
5059 }
5060 gcc_unreachable ();
5061
5062 case CONST_INT:
5063 if (const_ok_for_arm (INTVAL (x)))
5064 return outer == SET ? 2 : -1;
5065 else if (outer == AND
5066 && const_ok_for_arm (~INTVAL (x)))
5067 return -1;
5068 else if ((outer == COMPARE
5069 || outer == PLUS || outer == MINUS)
5070 && const_ok_for_arm (-INTVAL (x)))
5071 return -1;
5072 else
5073 return 5;
5074
5075 case CONST:
5076 case LABEL_REF:
5077 case SYMBOL_REF:
5078 return 6;
5079
5080 case CONST_DOUBLE:
5081 if (arm_const_double_rtx (x) || vfp3_const_double_rtx (x))
5082 return outer == SET ? 2 : -1;
5083 else if ((outer == COMPARE || outer == PLUS)
5084 && neg_const_double_rtx_ok_for_fpa (x))
5085 return -1;
5086 return 7;
5087
5088 default:
5089 return 99;
5090 }
5091 }
5092
5093 /* RTX costs when optimizing for size. */
5094 static bool
5095 arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
5096 {
5097 enum machine_mode mode = GET_MODE (x);
5098
5099 if (TARGET_THUMB)
5100 {
5101 /* XXX TBD. For now, use the standard costs. */
5102 *total = thumb1_rtx_costs (x, code, outer_code);
5103 return true;
5104 }
5105
5106 switch (code)
5107 {
5108 case MEM:
5109 /* A memory access costs 1 insn if the mode is small, or the address is
5110 a single register, otherwise it costs one insn per word. */
5111 if (REG_P (XEXP (x, 0)))
5112 *total = COSTS_N_INSNS (1);
5113 else
5114 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5115 return true;
5116
5117 case DIV:
5118 case MOD:
5119 case UDIV:
5120 case UMOD:
5121 /* Needs a libcall, so it costs about this. */
5122 *total = COSTS_N_INSNS (2);
5123 return false;
5124
5125 case ROTATE:
5126 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
5127 {
5128 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code);
5129 return true;
5130 }
5131 /* Fall through */
5132 case ROTATERT:
5133 case ASHIFT:
5134 case LSHIFTRT:
5135 case ASHIFTRT:
5136 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
5137 {
5138 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code);
5139 return true;
5140 }
5141 else if (mode == SImode)
5142 {
5143 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code);
5144 /* Slightly disparage register shifts, but not by much. */
5145 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5146 *total += 1 + rtx_cost (XEXP (x, 1), code);
5147 return true;
5148 }
5149
5150 /* Needs a libcall. */
5151 *total = COSTS_N_INSNS (2);
5152 return false;
5153
5154 case MINUS:
5155 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5156 {
5157 *total = COSTS_N_INSNS (1);
5158 return false;
5159 }
5160
5161 if (mode == SImode)
5162 {
5163 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
5164 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
5165
5166 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
5167 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
5168 || subcode1 == ROTATE || subcode1 == ROTATERT
5169 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
5170 || subcode1 == ASHIFTRT)
5171 {
5172 /* It's just the cost of the two operands. */
5173 *total = 0;
5174 return false;
5175 }
5176
5177 *total = COSTS_N_INSNS (1);
5178 return false;
5179 }
5180
5181 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5182 return false;
5183
5184 case PLUS:
5185 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5186 {
5187 *total = COSTS_N_INSNS (1);
5188 return false;
5189 }
5190
5191 /* Fall through */
5192 case AND: case XOR: case IOR:
5193 if (mode == SImode)
5194 {
5195 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
5196
5197 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
5198 || subcode == LSHIFTRT || subcode == ASHIFTRT
5199 || (code == AND && subcode == NOT))
5200 {
5201 /* It's just the cost of the two operands. */
5202 *total = 0;
5203 return false;
5204 }
5205 }
5206
5207 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5208 return false;
5209
5210 case MULT:
5211 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5212 return false;
5213
5214 case NEG:
5215 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5216 *total = COSTS_N_INSNS (1);
5217 /* Fall through */
5218 case NOT:
5219 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5220
5221 return false;
5222
5223 case IF_THEN_ELSE:
5224 *total = 0;
5225 return false;
5226
5227 case COMPARE:
5228 if (cc_register (XEXP (x, 0), VOIDmode))
5229 * total = 0;
5230 else
5231 *total = COSTS_N_INSNS (1);
5232 return false;
5233
5234 case ABS:
5235 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5236 *total = COSTS_N_INSNS (1);
5237 else
5238 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
5239 return false;
5240
5241 case SIGN_EXTEND:
5242 *total = 0;
5243 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
5244 {
5245 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5246 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5247 }
5248 if (mode == DImode)
5249 *total += COSTS_N_INSNS (1);
5250 return false;
5251
5252 case ZERO_EXTEND:
5253 *total = 0;
5254 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5255 {
5256 switch (GET_MODE (XEXP (x, 0)))
5257 {
5258 case QImode:
5259 *total += COSTS_N_INSNS (1);
5260 break;
5261
5262 case HImode:
5263 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5264
5265 case SImode:
5266 break;
5267
5268 default:
5269 *total += COSTS_N_INSNS (2);
5270 }
5271 }
5272
5273 if (mode == DImode)
5274 *total += COSTS_N_INSNS (1);
5275
5276 return false;
5277
5278 case CONST_INT:
5279 if (const_ok_for_arm (INTVAL (x)))
5280 *total = COSTS_N_INSNS (outer_code == SET ? 1 : 0);
5281 else if (const_ok_for_arm (~INTVAL (x)))
5282 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
5283 else if (const_ok_for_arm (-INTVAL (x)))
5284 {
5285 if (outer_code == COMPARE || outer_code == PLUS
5286 || outer_code == MINUS)
5287 *total = 0;
5288 else
5289 *total = COSTS_N_INSNS (1);
5290 }
5291 else
5292 *total = COSTS_N_INSNS (2);
5293 return true;
5294
5295 case CONST:
5296 case LABEL_REF:
5297 case SYMBOL_REF:
5298 *total = COSTS_N_INSNS (2);
5299 return true;
5300
5301 case CONST_DOUBLE:
5302 *total = COSTS_N_INSNS (4);
5303 return true;
5304
5305 default:
5306 if (mode != VOIDmode)
5307 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5308 else
5309 *total = COSTS_N_INSNS (4); /* How knows? */
5310 return false;
5311 }
5312 }
5313
5314 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
5315 supported on any "slowmul" cores, so it can be ignored. */
5316
5317 static bool
5318 arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
5319 {
5320 enum machine_mode mode = GET_MODE (x);
5321
5322 if (TARGET_THUMB)
5323 {
5324 *total = thumb1_rtx_costs (x, code, outer_code);
5325 return true;
5326 }
5327
5328 switch (code)
5329 {
5330 case MULT:
5331 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5332 || mode == DImode)
5333 {
5334 *total = 30;
5335 return true;
5336 }
5337
5338 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5339 {
5340 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5341 & (unsigned HOST_WIDE_INT) 0xffffffff);
5342 int cost, const_ok = const_ok_for_arm (i);
5343 int j, booth_unit_size;
5344
5345 /* Tune as appropriate. */
5346 cost = const_ok ? 4 : 8;
5347 booth_unit_size = 2;
5348 for (j = 0; i && j < 32; j += booth_unit_size)
5349 {
5350 i >>= booth_unit_size;
5351 cost += 2;
5352 }
5353
5354 *total = cost;
5355 return true;
5356 }
5357
5358 *total = 30 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5359 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5360 return true;
5361
5362 default:
5363 *total = arm_rtx_costs_1 (x, code, outer_code);
5364 return true;
5365 }
5366 }
5367
5368
5369 /* RTX cost for cores with a fast multiply unit (M variants). */
5370
5371 static bool
5372 arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
5373 {
5374 enum machine_mode mode = GET_MODE (x);
5375
5376 if (TARGET_THUMB1)
5377 {
5378 *total = thumb1_rtx_costs (x, code, outer_code);
5379 return true;
5380 }
5381
5382 /* ??? should thumb2 use different costs? */
5383 switch (code)
5384 {
5385 case MULT:
5386 /* There is no point basing this on the tuning, since it is always the
5387 fast variant if it exists at all. */
5388 if (mode == DImode
5389 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5390 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5391 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5392 {
5393 *total = 8;
5394 return true;
5395 }
5396
5397
5398 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5399 || mode == DImode)
5400 {
5401 *total = 30;
5402 return true;
5403 }
5404
5405 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5406 {
5407 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5408 & (unsigned HOST_WIDE_INT) 0xffffffff);
5409 int cost, const_ok = const_ok_for_arm (i);
5410 int j, booth_unit_size;
5411
5412 /* Tune as appropriate. */
5413 cost = const_ok ? 4 : 8;
5414 booth_unit_size = 8;
5415 for (j = 0; i && j < 32; j += booth_unit_size)
5416 {
5417 i >>= booth_unit_size;
5418 cost += 2;
5419 }
5420
5421 *total = cost;
5422 return true;
5423 }
5424
5425 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5426 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5427 return true;
5428
5429 default:
5430 *total = arm_rtx_costs_1 (x, code, outer_code);
5431 return true;
5432 }
5433 }
5434
5435
5436 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
5437 so it can be ignored. */
5438
5439 static bool
5440 arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
5441 {
5442 enum machine_mode mode = GET_MODE (x);
5443
5444 if (TARGET_THUMB)
5445 {
5446 *total = thumb1_rtx_costs (x, code, outer_code);
5447 return true;
5448 }
5449
5450 switch (code)
5451 {
5452 case MULT:
5453 /* There is no point basing this on the tuning, since it is always the
5454 fast variant if it exists at all. */
5455 if (mode == DImode
5456 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5457 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5458 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5459 {
5460 *total = 8;
5461 return true;
5462 }
5463
5464
5465 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5466 || mode == DImode)
5467 {
5468 *total = 30;
5469 return true;
5470 }
5471
5472 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5473 {
5474 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5475 & (unsigned HOST_WIDE_INT) 0xffffffff);
5476 int cost, const_ok = const_ok_for_arm (i);
5477 unsigned HOST_WIDE_INT masked_const;
5478
5479 /* The cost will be related to two insns.
5480 First a load of the constant (MOV or LDR), then a multiply. */
5481 cost = 2;
5482 if (! const_ok)
5483 cost += 1; /* LDR is probably more expensive because
5484 of longer result latency. */
5485 masked_const = i & 0xffff8000;
5486 if (masked_const != 0 && masked_const != 0xffff8000)
5487 {
5488 masked_const = i & 0xf8000000;
5489 if (masked_const == 0 || masked_const == 0xf8000000)
5490 cost += 1;
5491 else
5492 cost += 2;
5493 }
5494 *total = cost;
5495 return true;
5496 }
5497
5498 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5499 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5500 return true;
5501
5502 case COMPARE:
5503 /* A COMPARE of a MULT is slow on XScale; the muls instruction
5504 will stall until the multiplication is complete. */
5505 if (GET_CODE (XEXP (x, 0)) == MULT)
5506 *total = 4 + rtx_cost (XEXP (x, 0), code);
5507 else
5508 *total = arm_rtx_costs_1 (x, code, outer_code);
5509 return true;
5510
5511 default:
5512 *total = arm_rtx_costs_1 (x, code, outer_code);
5513 return true;
5514 }
5515 }
5516
5517
5518 /* RTX costs for 9e (and later) cores. */
5519
5520 static bool
5521 arm_9e_rtx_costs (rtx x, int code, int outer_code, int *total)
5522 {
5523 enum machine_mode mode = GET_MODE (x);
5524 int nonreg_cost;
5525 int cost;
5526
5527 if (TARGET_THUMB1)
5528 {
5529 switch (code)
5530 {
5531 case MULT:
5532 *total = COSTS_N_INSNS (3);
5533 return true;
5534
5535 default:
5536 *total = thumb1_rtx_costs (x, code, outer_code);
5537 return true;
5538 }
5539 }
5540
5541 switch (code)
5542 {
5543 case MULT:
5544 /* There is no point basing this on the tuning, since it is always the
5545 fast variant if it exists at all. */
5546 if (mode == DImode
5547 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5548 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5549 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5550 {
5551 *total = 3;
5552 return true;
5553 }
5554
5555
5556 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5557 {
5558 *total = 30;
5559 return true;
5560 }
5561 if (mode == DImode)
5562 {
5563 cost = 7;
5564 nonreg_cost = 8;
5565 }
5566 else
5567 {
5568 cost = 2;
5569 nonreg_cost = 4;
5570 }
5571
5572
5573 *total = cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : nonreg_cost)
5574 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : nonreg_cost);
5575 return true;
5576
5577 default:
5578 *total = arm_rtx_costs_1 (x, code, outer_code);
5579 return true;
5580 }
5581 }
5582 /* All address computations that can be done are free, but rtx cost returns
5583 the same for practically all of them. So we weight the different types
5584 of address here in the order (most pref first):
5585 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
5586 static inline int
5587 arm_arm_address_cost (rtx x)
5588 {
5589 enum rtx_code c = GET_CODE (x);
5590
5591 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
5592 return 0;
5593 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
5594 return 10;
5595
5596 if (c == PLUS || c == MINUS)
5597 {
5598 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5599 return 2;
5600
5601 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
5602 return 3;
5603
5604 return 4;
5605 }
5606
5607 return 6;
5608 }
5609
5610 static inline int
5611 arm_thumb_address_cost (rtx x)
5612 {
5613 enum rtx_code c = GET_CODE (x);
5614
5615 if (c == REG)
5616 return 1;
5617 if (c == PLUS
5618 && GET_CODE (XEXP (x, 0)) == REG
5619 && GET_CODE (XEXP (x, 1)) == CONST_INT)
5620 return 1;
5621
5622 return 2;
5623 }
5624
5625 static int
5626 arm_address_cost (rtx x)
5627 {
5628 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
5629 }
5630
5631 static int
5632 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
5633 {
5634 rtx i_pat, d_pat;
5635
5636 /* Some true dependencies can have a higher cost depending
5637 on precisely how certain input operands are used. */
5638 if (arm_tune_xscale
5639 && REG_NOTE_KIND (link) == 0
5640 && recog_memoized (insn) >= 0
5641 && recog_memoized (dep) >= 0)
5642 {
5643 int shift_opnum = get_attr_shift (insn);
5644 enum attr_type attr_type = get_attr_type (dep);
5645
5646 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
5647 operand for INSN. If we have a shifted input operand and the
5648 instruction we depend on is another ALU instruction, then we may
5649 have to account for an additional stall. */
5650 if (shift_opnum != 0
5651 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
5652 {
5653 rtx shifted_operand;
5654 int opno;
5655
5656 /* Get the shifted operand. */
5657 extract_insn (insn);
5658 shifted_operand = recog_data.operand[shift_opnum];
5659
5660 /* Iterate over all the operands in DEP. If we write an operand
5661 that overlaps with SHIFTED_OPERAND, then we have increase the
5662 cost of this dependency. */
5663 extract_insn (dep);
5664 preprocess_constraints ();
5665 for (opno = 0; opno < recog_data.n_operands; opno++)
5666 {
5667 /* We can ignore strict inputs. */
5668 if (recog_data.operand_type[opno] == OP_IN)
5669 continue;
5670
5671 if (reg_overlap_mentioned_p (recog_data.operand[opno],
5672 shifted_operand))
5673 return 2;
5674 }
5675 }
5676 }
5677
5678 /* XXX This is not strictly true for the FPA. */
5679 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
5680 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
5681 return 0;
5682
5683 /* Call insns don't incur a stall, even if they follow a load. */
5684 if (REG_NOTE_KIND (link) == 0
5685 && GET_CODE (insn) == CALL_INSN)
5686 return 1;
5687
5688 if ((i_pat = single_set (insn)) != NULL
5689 && GET_CODE (SET_SRC (i_pat)) == MEM
5690 && (d_pat = single_set (dep)) != NULL
5691 && GET_CODE (SET_DEST (d_pat)) == MEM)
5692 {
5693 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
5694 /* This is a load after a store, there is no conflict if the load reads
5695 from a cached area. Assume that loads from the stack, and from the
5696 constant pool are cached, and that others will miss. This is a
5697 hack. */
5698
5699 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
5700 || reg_mentioned_p (stack_pointer_rtx, src_mem)
5701 || reg_mentioned_p (frame_pointer_rtx, src_mem)
5702 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
5703 return 1;
5704 }
5705
5706 return cost;
5707 }
5708
5709 static int fp_consts_inited = 0;
5710
5711 /* Only zero is valid for VFP. Other values are also valid for FPA. */
5712 static const char * const strings_fp[8] =
5713 {
5714 "0", "1", "2", "3",
5715 "4", "5", "0.5", "10"
5716 };
5717
5718 static REAL_VALUE_TYPE values_fp[8];
5719
5720 static void
5721 init_fp_table (void)
5722 {
5723 int i;
5724 REAL_VALUE_TYPE r;
5725
5726 if (TARGET_VFP)
5727 fp_consts_inited = 1;
5728 else
5729 fp_consts_inited = 8;
5730
5731 for (i = 0; i < fp_consts_inited; i++)
5732 {
5733 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
5734 values_fp[i] = r;
5735 }
5736 }
5737
5738 /* Return TRUE if rtx X is a valid immediate FP constant. */
5739 int
5740 arm_const_double_rtx (rtx x)
5741 {
5742 REAL_VALUE_TYPE r;
5743 int i;
5744
5745 if (!fp_consts_inited)
5746 init_fp_table ();
5747
5748 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5749 if (REAL_VALUE_MINUS_ZERO (r))
5750 return 0;
5751
5752 for (i = 0; i < fp_consts_inited; i++)
5753 if (REAL_VALUES_EQUAL (r, values_fp[i]))
5754 return 1;
5755
5756 return 0;
5757 }
5758
5759 /* Return TRUE if rtx X is a valid immediate FPA constant. */
5760 int
5761 neg_const_double_rtx_ok_for_fpa (rtx x)
5762 {
5763 REAL_VALUE_TYPE r;
5764 int i;
5765
5766 if (!fp_consts_inited)
5767 init_fp_table ();
5768
5769 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5770 r = REAL_VALUE_NEGATE (r);
5771 if (REAL_VALUE_MINUS_ZERO (r))
5772 return 0;
5773
5774 for (i = 0; i < 8; i++)
5775 if (REAL_VALUES_EQUAL (r, values_fp[i]))
5776 return 1;
5777
5778 return 0;
5779 }
5780
5781
5782 /* VFPv3 has a fairly wide range of representable immediates, formed from
5783 "quarter-precision" floating-point values. These can be evaluated using this
5784 formula (with ^ for exponentiation):
5785
5786 -1^s * n * 2^-r
5787
5788 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
5789 16 <= n <= 31 and 0 <= r <= 7.
5790
5791 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
5792
5793 - A (most-significant) is the sign bit.
5794 - BCD are the exponent (encoded as r XOR 3).
5795 - EFGH are the mantissa (encoded as n - 16).
5796 */
5797
5798 /* Return an integer index for a VFPv3 immediate operand X suitable for the
5799 fconst[sd] instruction, or -1 if X isn't suitable. */
5800 static int
5801 vfp3_const_double_index (rtx x)
5802 {
5803 REAL_VALUE_TYPE r, m;
5804 int sign, exponent;
5805 unsigned HOST_WIDE_INT mantissa, mant_hi;
5806 unsigned HOST_WIDE_INT mask;
5807 HOST_WIDE_INT m1, m2;
5808 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
5809
5810 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
5811 return -1;
5812
5813 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5814
5815 /* We can't represent these things, so detect them first. */
5816 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
5817 return -1;
5818
5819 /* Extract sign, exponent and mantissa. */
5820 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
5821 r = REAL_VALUE_ABS (r);
5822 exponent = REAL_EXP (&r);
5823 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
5824 highest (sign) bit, with a fixed binary point at bit point_pos.
5825 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
5826 bits for the mantissa, this may fail (low bits would be lost). */
5827 real_ldexp (&m, &r, point_pos - exponent);
5828 REAL_VALUE_TO_INT (&m1, &m2, m);
5829 mantissa = m1;
5830 mant_hi = m2;
5831
5832 /* If there are bits set in the low part of the mantissa, we can't
5833 represent this value. */
5834 if (mantissa != 0)
5835 return -1;
5836
5837 /* Now make it so that mantissa contains the most-significant bits, and move
5838 the point_pos to indicate that the least-significant bits have been
5839 discarded. */
5840 point_pos -= HOST_BITS_PER_WIDE_INT;
5841 mantissa = mant_hi;
5842
5843 /* We can permit four significant bits of mantissa only, plus a high bit
5844 which is always 1. */
5845 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
5846 if ((mantissa & mask) != 0)
5847 return -1;
5848
5849 /* Now we know the mantissa is in range, chop off the unneeded bits. */
5850 mantissa >>= point_pos - 5;
5851
5852 /* The mantissa may be zero. Disallow that case. (It's possible to load the
5853 floating-point immediate zero with Neon using an integer-zero load, but
5854 that case is handled elsewhere.) */
5855 if (mantissa == 0)
5856 return -1;
5857
5858 gcc_assert (mantissa >= 16 && mantissa <= 31);
5859
5860 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
5861 normalized significands are in the range [1, 2). (Our mantissa is shifted
5862 left 4 places at this point relative to normalized IEEE754 values). GCC
5863 internally uses [0.5, 1) (see real.c), so the exponent returned from
5864 REAL_EXP must be altered. */
5865 exponent = 5 - exponent;
5866
5867 if (exponent < 0 || exponent > 7)
5868 return -1;
5869
5870 /* Sign, mantissa and exponent are now in the correct form to plug into the
5871 formulae described in the comment above. */
5872 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
5873 }
5874
5875 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
5876 int
5877 vfp3_const_double_rtx (rtx x)
5878 {
5879 if (!TARGET_VFP3)
5880 return 0;
5881
5882 return vfp3_const_double_index (x) != -1;
5883 }
5884
5885 /* Recognize immediates which can be used in various Neon instructions. Legal
5886 immediates are described by the following table (for VMVN variants, the
5887 bitwise inverse of the constant shown is recognized. In either case, VMOV
5888 is output and the correct instruction to use for a given constant is chosen
5889 by the assembler). The constant shown is replicated across all elements of
5890 the destination vector.
5891
5892 insn elems variant constant (binary)
5893 ---- ----- ------- -----------------
5894 vmov i32 0 00000000 00000000 00000000 abcdefgh
5895 vmov i32 1 00000000 00000000 abcdefgh 00000000
5896 vmov i32 2 00000000 abcdefgh 00000000 00000000
5897 vmov i32 3 abcdefgh 00000000 00000000 00000000
5898 vmov i16 4 00000000 abcdefgh
5899 vmov i16 5 abcdefgh 00000000
5900 vmvn i32 6 00000000 00000000 00000000 abcdefgh
5901 vmvn i32 7 00000000 00000000 abcdefgh 00000000
5902 vmvn i32 8 00000000 abcdefgh 00000000 00000000
5903 vmvn i32 9 abcdefgh 00000000 00000000 00000000
5904 vmvn i16 10 00000000 abcdefgh
5905 vmvn i16 11 abcdefgh 00000000
5906 vmov i32 12 00000000 00000000 abcdefgh 11111111
5907 vmvn i32 13 00000000 00000000 abcdefgh 11111111
5908 vmov i32 14 00000000 abcdefgh 11111111 11111111
5909 vmvn i32 15 00000000 abcdefgh 11111111 11111111
5910 vmov i8 16 abcdefgh
5911 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
5912 eeeeeeee ffffffff gggggggg hhhhhhhh
5913 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
5914
5915 For case 18, B = !b. Representable values are exactly those accepted by
5916 vfp3_const_double_index, but are output as floating-point numbers rather
5917 than indices.
5918
5919 Variants 0-5 (inclusive) may also be used as immediates for the second
5920 operand of VORR/VBIC instructions.
5921
5922 The INVERSE argument causes the bitwise inverse of the given operand to be
5923 recognized instead (used for recognizing legal immediates for the VAND/VORN
5924 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
5925 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
5926 output, rather than the real insns vbic/vorr).
5927
5928 INVERSE makes no difference to the recognition of float vectors.
5929
5930 The return value is the variant of immediate as shown in the above table, or
5931 -1 if the given value doesn't match any of the listed patterns.
5932 */
5933 static int
5934 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
5935 rtx *modconst, int *elementwidth)
5936 {
5937 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
5938 matches = 1; \
5939 for (i = 0; i < idx; i += (STRIDE)) \
5940 if (!(TEST)) \
5941 matches = 0; \
5942 if (matches) \
5943 { \
5944 immtype = (CLASS); \
5945 elsize = (ELSIZE); \
5946 break; \
5947 }
5948
5949 unsigned int i, elsize, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
5950 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
5951 unsigned char bytes[16];
5952 int immtype = -1, matches;
5953 unsigned int invmask = inverse ? 0xff : 0;
5954
5955 /* Vectors of float constants. */
5956 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5957 {
5958 rtx el0 = CONST_VECTOR_ELT (op, 0);
5959 REAL_VALUE_TYPE r0;
5960
5961 if (!vfp3_const_double_rtx (el0))
5962 return -1;
5963
5964 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
5965
5966 for (i = 1; i < n_elts; i++)
5967 {
5968 rtx elt = CONST_VECTOR_ELT (op, i);
5969 REAL_VALUE_TYPE re;
5970
5971 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
5972
5973 if (!REAL_VALUES_EQUAL (r0, re))
5974 return -1;
5975 }
5976
5977 if (modconst)
5978 *modconst = CONST_VECTOR_ELT (op, 0);
5979
5980 if (elementwidth)
5981 *elementwidth = 0;
5982
5983 return 18;
5984 }
5985
5986 /* Splat vector constant out into a byte vector. */
5987 for (i = 0; i < n_elts; i++)
5988 {
5989 rtx el = CONST_VECTOR_ELT (op, i);
5990 unsigned HOST_WIDE_INT elpart;
5991 unsigned int part, parts;
5992
5993 if (GET_CODE (el) == CONST_INT)
5994 {
5995 elpart = INTVAL (el);
5996 parts = 1;
5997 }
5998 else if (GET_CODE (el) == CONST_DOUBLE)
5999 {
6000 elpart = CONST_DOUBLE_LOW (el);
6001 parts = 2;
6002 }
6003 else
6004 gcc_unreachable ();
6005
6006 for (part = 0; part < parts; part++)
6007 {
6008 unsigned int byte;
6009 for (byte = 0; byte < innersize; byte++)
6010 {
6011 bytes[idx++] = (elpart & 0xff) ^ invmask;
6012 elpart >>= BITS_PER_UNIT;
6013 }
6014 if (GET_CODE (el) == CONST_DOUBLE)
6015 elpart = CONST_DOUBLE_HIGH (el);
6016 }
6017 }
6018
6019 /* Sanity check. */
6020 gcc_assert (idx == GET_MODE_SIZE (mode));
6021
6022 do
6023 {
6024 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6025 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6026
6027 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6028 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6029
6030 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6031 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6032
6033 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6034 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
6035
6036 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
6037
6038 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
6039
6040 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6041 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6042
6043 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6044 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6045
6046 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6047 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6048
6049 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6050 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
6051
6052 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
6053
6054 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
6055
6056 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6057 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6058
6059 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6060 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6061
6062 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6063 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6064
6065 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6066 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6067
6068 CHECK (1, 8, 16, bytes[i] == bytes[0]);
6069
6070 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6071 && bytes[i] == bytes[(i + 8) % idx]);
6072 }
6073 while (0);
6074
6075 if (immtype == -1)
6076 return -1;
6077
6078 if (elementwidth)
6079 *elementwidth = elsize;
6080
6081 if (modconst)
6082 {
6083 unsigned HOST_WIDE_INT imm = 0;
6084
6085 /* Un-invert bytes of recognized vector, if necessary. */
6086 if (invmask != 0)
6087 for (i = 0; i < idx; i++)
6088 bytes[i] ^= invmask;
6089
6090 if (immtype == 17)
6091 {
6092 /* FIXME: Broken on 32-bit H_W_I hosts. */
6093 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6094
6095 for (i = 0; i < 8; i++)
6096 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6097 << (i * BITS_PER_UNIT);
6098
6099 *modconst = GEN_INT (imm);
6100 }
6101 else
6102 {
6103 unsigned HOST_WIDE_INT imm = 0;
6104
6105 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6106 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6107
6108 *modconst = GEN_INT (imm);
6109 }
6110 }
6111
6112 return immtype;
6113 #undef CHECK
6114 }
6115
6116 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
6117 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
6118 float elements), and a modified constant (whatever should be output for a
6119 VMOV) in *MODCONST. */
6120
6121 int
6122 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
6123 rtx *modconst, int *elementwidth)
6124 {
6125 rtx tmpconst;
6126 int tmpwidth;
6127 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
6128
6129 if (retval == -1)
6130 return 0;
6131
6132 if (modconst)
6133 *modconst = tmpconst;
6134
6135 if (elementwidth)
6136 *elementwidth = tmpwidth;
6137
6138 return 1;
6139 }
6140
6141 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
6142 the immediate is valid, write a constant suitable for using as an operand
6143 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
6144 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
6145
6146 int
6147 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
6148 rtx *modconst, int *elementwidth)
6149 {
6150 rtx tmpconst;
6151 int tmpwidth;
6152 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
6153
6154 if (retval < 0 || retval > 5)
6155 return 0;
6156
6157 if (modconst)
6158 *modconst = tmpconst;
6159
6160 if (elementwidth)
6161 *elementwidth = tmpwidth;
6162
6163 return 1;
6164 }
6165
6166 /* Return a string suitable for output of Neon immediate logic operation
6167 MNEM. */
6168
6169 char *
6170 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
6171 int inverse, int quad)
6172 {
6173 int width, is_valid;
6174 static char templ[40];
6175
6176 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
6177
6178 gcc_assert (is_valid != 0);
6179
6180 if (quad)
6181 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
6182 else
6183 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
6184
6185 return templ;
6186 }
6187
6188 /* Output a sequence of pairwise operations to implement a reduction.
6189 NOTE: We do "too much work" here, because pairwise operations work on two
6190 registers-worth of operands in one go. Unfortunately we can't exploit those
6191 extra calculations to do the full operation in fewer steps, I don't think.
6192 Although all vector elements of the result but the first are ignored, we
6193 actually calculate the same result in each of the elements. An alternative
6194 such as initially loading a vector with zero to use as each of the second
6195 operands would use up an additional register and take an extra instruction,
6196 for no particular gain. */
6197
6198 void
6199 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
6200 rtx (*reduc) (rtx, rtx, rtx))
6201 {
6202 enum machine_mode inner = GET_MODE_INNER (mode);
6203 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
6204 rtx tmpsum = op1;
6205
6206 for (i = parts / 2; i >= 1; i /= 2)
6207 {
6208 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
6209 emit_insn (reduc (dest, tmpsum, tmpsum));
6210 tmpsum = dest;
6211 }
6212 }
6213
6214 /* Initialize a vector with non-constant elements. FIXME: We can do better
6215 than the current implementation (building a vector on the stack and then
6216 loading it) in many cases. See rs6000.c. */
6217
6218 void
6219 neon_expand_vector_init (rtx target, rtx vals)
6220 {
6221 enum machine_mode mode = GET_MODE (target);
6222 enum machine_mode inner = GET_MODE_INNER (mode);
6223 unsigned int i, n_elts = GET_MODE_NUNITS (mode);
6224 rtx mem;
6225
6226 gcc_assert (VECTOR_MODE_P (mode));
6227
6228 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
6229 for (i = 0; i < n_elts; i++)
6230 emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
6231 XVECEXP (vals, 0, i));
6232
6233 emit_move_insn (target, mem);
6234 }
6235
6236 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
6237 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
6238 reported source locations are bogus. */
6239
6240 static void
6241 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
6242 const char *err)
6243 {
6244 HOST_WIDE_INT lane;
6245
6246 gcc_assert (GET_CODE (operand) == CONST_INT);
6247
6248 lane = INTVAL (operand);
6249
6250 if (lane < low || lane >= high)
6251 error (err);
6252 }
6253
6254 /* Bounds-check lanes. */
6255
6256 void
6257 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6258 {
6259 bounds_check (operand, low, high, "lane out of range");
6260 }
6261
6262 /* Bounds-check constants. */
6263
6264 void
6265 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6266 {
6267 bounds_check (operand, low, high, "constant out of range");
6268 }
6269
6270 HOST_WIDE_INT
6271 neon_element_bits (enum machine_mode mode)
6272 {
6273 if (mode == DImode)
6274 return GET_MODE_BITSIZE (mode);
6275 else
6276 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
6277 }
6278
6279 \f
6280 /* Predicates for `match_operand' and `match_operator'. */
6281
6282 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
6283 int
6284 cirrus_memory_offset (rtx op)
6285 {
6286 /* Reject eliminable registers. */
6287 if (! (reload_in_progress || reload_completed)
6288 && ( reg_mentioned_p (frame_pointer_rtx, op)
6289 || reg_mentioned_p (arg_pointer_rtx, op)
6290 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6291 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6292 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6293 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6294 return 0;
6295
6296 if (GET_CODE (op) == MEM)
6297 {
6298 rtx ind;
6299
6300 ind = XEXP (op, 0);
6301
6302 /* Match: (mem (reg)). */
6303 if (GET_CODE (ind) == REG)
6304 return 1;
6305
6306 /* Match:
6307 (mem (plus (reg)
6308 (const))). */
6309 if (GET_CODE (ind) == PLUS
6310 && GET_CODE (XEXP (ind, 0)) == REG
6311 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6312 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
6313 return 1;
6314 }
6315
6316 return 0;
6317 }
6318
6319 /* Return TRUE if OP is a valid coprocessor memory address pattern.
6320 WB is true if full writeback address modes are allowed and is false
6321 if limited writeback address modes (POST_INC and PRE_DEC) are
6322 allowed. */
6323
6324 int
6325 arm_coproc_mem_operand (rtx op, bool wb)
6326 {
6327 rtx ind;
6328
6329 /* Reject eliminable registers. */
6330 if (! (reload_in_progress || reload_completed)
6331 && ( reg_mentioned_p (frame_pointer_rtx, op)
6332 || reg_mentioned_p (arg_pointer_rtx, op)
6333 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6334 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6335 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6336 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6337 return FALSE;
6338
6339 /* Constants are converted into offsets from labels. */
6340 if (GET_CODE (op) != MEM)
6341 return FALSE;
6342
6343 ind = XEXP (op, 0);
6344
6345 if (reload_completed
6346 && (GET_CODE (ind) == LABEL_REF
6347 || (GET_CODE (ind) == CONST
6348 && GET_CODE (XEXP (ind, 0)) == PLUS
6349 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6350 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6351 return TRUE;
6352
6353 /* Match: (mem (reg)). */
6354 if (GET_CODE (ind) == REG)
6355 return arm_address_register_rtx_p (ind, 0);
6356
6357 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
6358 acceptable in any case (subject to verification by
6359 arm_address_register_rtx_p). We need WB to be true to accept
6360 PRE_INC and POST_DEC. */
6361 if (GET_CODE (ind) == POST_INC
6362 || GET_CODE (ind) == PRE_DEC
6363 || (wb
6364 && (GET_CODE (ind) == PRE_INC
6365 || GET_CODE (ind) == POST_DEC)))
6366 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
6367
6368 if (wb
6369 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
6370 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
6371 && GET_CODE (XEXP (ind, 1)) == PLUS
6372 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
6373 ind = XEXP (ind, 1);
6374
6375 /* Match:
6376 (plus (reg)
6377 (const)). */
6378 if (GET_CODE (ind) == PLUS
6379 && GET_CODE (XEXP (ind, 0)) == REG
6380 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6381 && GET_CODE (XEXP (ind, 1)) == CONST_INT
6382 && INTVAL (XEXP (ind, 1)) > -1024
6383 && INTVAL (XEXP (ind, 1)) < 1024
6384 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
6385 return TRUE;
6386
6387 return FALSE;
6388 }
6389
6390 /* Return TRUE if OP is a memory operand which we can load or store a vector
6391 to/from. If CORE is true, we're moving from ARM registers not Neon
6392 registers. */
6393 int
6394 neon_vector_mem_operand (rtx op, bool core)
6395 {
6396 rtx ind;
6397
6398 /* Reject eliminable registers. */
6399 if (! (reload_in_progress || reload_completed)
6400 && ( reg_mentioned_p (frame_pointer_rtx, op)
6401 || reg_mentioned_p (arg_pointer_rtx, op)
6402 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6403 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6404 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6405 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6406 return FALSE;
6407
6408 /* Constants are converted into offsets from labels. */
6409 if (GET_CODE (op) != MEM)
6410 return FALSE;
6411
6412 ind = XEXP (op, 0);
6413
6414 if (reload_completed
6415 && (GET_CODE (ind) == LABEL_REF
6416 || (GET_CODE (ind) == CONST
6417 && GET_CODE (XEXP (ind, 0)) == PLUS
6418 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6419 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6420 return TRUE;
6421
6422 /* Match: (mem (reg)). */
6423 if (GET_CODE (ind) == REG)
6424 return arm_address_register_rtx_p (ind, 0);
6425
6426 /* Allow post-increment with Neon registers. */
6427 if (!core && GET_CODE (ind) == POST_INC)
6428 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
6429
6430 #if 0
6431 /* FIXME: We can support this too if we use VLD1/VST1. */
6432 if (!core
6433 && GET_CODE (ind) == POST_MODIFY
6434 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
6435 && GET_CODE (XEXP (ind, 1)) == PLUS
6436 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
6437 ind = XEXP (ind, 1);
6438 #endif
6439
6440 /* Match:
6441 (plus (reg)
6442 (const)). */
6443 if (!core
6444 && GET_CODE (ind) == PLUS
6445 && GET_CODE (XEXP (ind, 0)) == REG
6446 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6447 && GET_CODE (XEXP (ind, 1)) == CONST_INT
6448 && INTVAL (XEXP (ind, 1)) > -1024
6449 && INTVAL (XEXP (ind, 1)) < 1016
6450 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
6451 return TRUE;
6452
6453 return FALSE;
6454 }
6455
6456 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
6457 type. */
6458 int
6459 neon_struct_mem_operand (rtx op)
6460 {
6461 rtx ind;
6462
6463 /* Reject eliminable registers. */
6464 if (! (reload_in_progress || reload_completed)
6465 && ( reg_mentioned_p (frame_pointer_rtx, op)
6466 || reg_mentioned_p (arg_pointer_rtx, op)
6467 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6468 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6469 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6470 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6471 return FALSE;
6472
6473 /* Constants are converted into offsets from labels. */
6474 if (GET_CODE (op) != MEM)
6475 return FALSE;
6476
6477 ind = XEXP (op, 0);
6478
6479 if (reload_completed
6480 && (GET_CODE (ind) == LABEL_REF
6481 || (GET_CODE (ind) == CONST
6482 && GET_CODE (XEXP (ind, 0)) == PLUS
6483 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6484 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6485 return TRUE;
6486
6487 /* Match: (mem (reg)). */
6488 if (GET_CODE (ind) == REG)
6489 return arm_address_register_rtx_p (ind, 0);
6490
6491 return FALSE;
6492 }
6493
6494 /* Return true if X is a register that will be eliminated later on. */
6495 int
6496 arm_eliminable_register (rtx x)
6497 {
6498 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
6499 || REGNO (x) == ARG_POINTER_REGNUM
6500 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
6501 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
6502 }
6503
6504 /* Return GENERAL_REGS if a scratch register required to reload x to/from
6505 coprocessor registers. Otherwise return NO_REGS. */
6506
6507 enum reg_class
6508 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
6509 {
6510 if (TARGET_NEON
6511 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6512 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6513 && neon_vector_mem_operand (x, FALSE))
6514 return NO_REGS;
6515
6516 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
6517 return NO_REGS;
6518
6519 return GENERAL_REGS;
6520 }
6521
6522 /* Values which must be returned in the most-significant end of the return
6523 register. */
6524
6525 static bool
6526 arm_return_in_msb (tree valtype)
6527 {
6528 return (TARGET_AAPCS_BASED
6529 && BYTES_BIG_ENDIAN
6530 && (AGGREGATE_TYPE_P (valtype)
6531 || TREE_CODE (valtype) == COMPLEX_TYPE));
6532 }
6533
6534 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
6535 Use by the Cirrus Maverick code which has to workaround
6536 a hardware bug triggered by such instructions. */
6537 static bool
6538 arm_memory_load_p (rtx insn)
6539 {
6540 rtx body, lhs, rhs;;
6541
6542 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
6543 return false;
6544
6545 body = PATTERN (insn);
6546
6547 if (GET_CODE (body) != SET)
6548 return false;
6549
6550 lhs = XEXP (body, 0);
6551 rhs = XEXP (body, 1);
6552
6553 lhs = REG_OR_SUBREG_RTX (lhs);
6554
6555 /* If the destination is not a general purpose
6556 register we do not have to worry. */
6557 if (GET_CODE (lhs) != REG
6558 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
6559 return false;
6560
6561 /* As well as loads from memory we also have to react
6562 to loads of invalid constants which will be turned
6563 into loads from the minipool. */
6564 return (GET_CODE (rhs) == MEM
6565 || GET_CODE (rhs) == SYMBOL_REF
6566 || note_invalid_constants (insn, -1, false));
6567 }
6568
6569 /* Return TRUE if INSN is a Cirrus instruction. */
6570 static bool
6571 arm_cirrus_insn_p (rtx insn)
6572 {
6573 enum attr_cirrus attr;
6574
6575 /* get_attr cannot accept USE or CLOBBER. */
6576 if (!insn
6577 || GET_CODE (insn) != INSN
6578 || GET_CODE (PATTERN (insn)) == USE
6579 || GET_CODE (PATTERN (insn)) == CLOBBER)
6580 return 0;
6581
6582 attr = get_attr_cirrus (insn);
6583
6584 return attr != CIRRUS_NOT;
6585 }
6586
6587 /* Cirrus reorg for invalid instruction combinations. */
6588 static void
6589 cirrus_reorg (rtx first)
6590 {
6591 enum attr_cirrus attr;
6592 rtx body = PATTERN (first);
6593 rtx t;
6594 int nops;
6595
6596 /* Any branch must be followed by 2 non Cirrus instructions. */
6597 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
6598 {
6599 nops = 0;
6600 t = next_nonnote_insn (first);
6601
6602 if (arm_cirrus_insn_p (t))
6603 ++ nops;
6604
6605 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
6606 ++ nops;
6607
6608 while (nops --)
6609 emit_insn_after (gen_nop (), first);
6610
6611 return;
6612 }
6613
6614 /* (float (blah)) is in parallel with a clobber. */
6615 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
6616 body = XVECEXP (body, 0, 0);
6617
6618 if (GET_CODE (body) == SET)
6619 {
6620 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
6621
6622 /* cfldrd, cfldr64, cfstrd, cfstr64 must
6623 be followed by a non Cirrus insn. */
6624 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
6625 {
6626 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
6627 emit_insn_after (gen_nop (), first);
6628
6629 return;
6630 }
6631 else if (arm_memory_load_p (first))
6632 {
6633 unsigned int arm_regno;
6634
6635 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
6636 ldr/cfmv64hr combination where the Rd field is the same
6637 in both instructions must be split with a non Cirrus
6638 insn. Example:
6639
6640 ldr r0, blah
6641 nop
6642 cfmvsr mvf0, r0. */
6643
6644 /* Get Arm register number for ldr insn. */
6645 if (GET_CODE (lhs) == REG)
6646 arm_regno = REGNO (lhs);
6647 else
6648 {
6649 gcc_assert (GET_CODE (rhs) == REG);
6650 arm_regno = REGNO (rhs);
6651 }
6652
6653 /* Next insn. */
6654 first = next_nonnote_insn (first);
6655
6656 if (! arm_cirrus_insn_p (first))
6657 return;
6658
6659 body = PATTERN (first);
6660
6661 /* (float (blah)) is in parallel with a clobber. */
6662 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
6663 body = XVECEXP (body, 0, 0);
6664
6665 if (GET_CODE (body) == FLOAT)
6666 body = XEXP (body, 0);
6667
6668 if (get_attr_cirrus (first) == CIRRUS_MOVE
6669 && GET_CODE (XEXP (body, 1)) == REG
6670 && arm_regno == REGNO (XEXP (body, 1)))
6671 emit_insn_after (gen_nop (), first);
6672
6673 return;
6674 }
6675 }
6676
6677 /* get_attr cannot accept USE or CLOBBER. */
6678 if (!first
6679 || GET_CODE (first) != INSN
6680 || GET_CODE (PATTERN (first)) == USE
6681 || GET_CODE (PATTERN (first)) == CLOBBER)
6682 return;
6683
6684 attr = get_attr_cirrus (first);
6685
6686 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
6687 must be followed by a non-coprocessor instruction. */
6688 if (attr == CIRRUS_COMPARE)
6689 {
6690 nops = 0;
6691
6692 t = next_nonnote_insn (first);
6693
6694 if (arm_cirrus_insn_p (t))
6695 ++ nops;
6696
6697 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
6698 ++ nops;
6699
6700 while (nops --)
6701 emit_insn_after (gen_nop (), first);
6702
6703 return;
6704 }
6705 }
6706
6707 /* Return TRUE if X references a SYMBOL_REF. */
6708 int
6709 symbol_mentioned_p (rtx x)
6710 {
6711 const char * fmt;
6712 int i;
6713
6714 if (GET_CODE (x) == SYMBOL_REF)
6715 return 1;
6716
6717 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
6718 are constant offsets, not symbols. */
6719 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
6720 return 0;
6721
6722 fmt = GET_RTX_FORMAT (GET_CODE (x));
6723
6724 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6725 {
6726 if (fmt[i] == 'E')
6727 {
6728 int j;
6729
6730 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6731 if (symbol_mentioned_p (XVECEXP (x, i, j)))
6732 return 1;
6733 }
6734 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
6735 return 1;
6736 }
6737
6738 return 0;
6739 }
6740
6741 /* Return TRUE if X references a LABEL_REF. */
6742 int
6743 label_mentioned_p (rtx x)
6744 {
6745 const char * fmt;
6746 int i;
6747
6748 if (GET_CODE (x) == LABEL_REF)
6749 return 1;
6750
6751 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
6752 instruction, but they are constant offsets, not symbols. */
6753 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
6754 return 0;
6755
6756 fmt = GET_RTX_FORMAT (GET_CODE (x));
6757 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6758 {
6759 if (fmt[i] == 'E')
6760 {
6761 int j;
6762
6763 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6764 if (label_mentioned_p (XVECEXP (x, i, j)))
6765 return 1;
6766 }
6767 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
6768 return 1;
6769 }
6770
6771 return 0;
6772 }
6773
6774 int
6775 tls_mentioned_p (rtx x)
6776 {
6777 switch (GET_CODE (x))
6778 {
6779 case CONST:
6780 return tls_mentioned_p (XEXP (x, 0));
6781
6782 case UNSPEC:
6783 if (XINT (x, 1) == UNSPEC_TLS)
6784 return 1;
6785
6786 default:
6787 return 0;
6788 }
6789 }
6790
6791 /* Must not copy a SET whose source operand is PC-relative. */
6792
6793 static bool
6794 arm_cannot_copy_insn_p (rtx insn)
6795 {
6796 rtx pat = PATTERN (insn);
6797
6798 if (GET_CODE (pat) == SET)
6799 {
6800 rtx rhs = SET_SRC (pat);
6801
6802 if (GET_CODE (rhs) == UNSPEC
6803 && XINT (rhs, 1) == UNSPEC_PIC_BASE)
6804 return TRUE;
6805
6806 if (GET_CODE (rhs) == MEM
6807 && GET_CODE (XEXP (rhs, 0)) == UNSPEC
6808 && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
6809 return TRUE;
6810 }
6811
6812 return FALSE;
6813 }
6814
6815 enum rtx_code
6816 minmax_code (rtx x)
6817 {
6818 enum rtx_code code = GET_CODE (x);
6819
6820 switch (code)
6821 {
6822 case SMAX:
6823 return GE;
6824 case SMIN:
6825 return LE;
6826 case UMIN:
6827 return LEU;
6828 case UMAX:
6829 return GEU;
6830 default:
6831 gcc_unreachable ();
6832 }
6833 }
6834
6835 /* Return 1 if memory locations are adjacent. */
6836 int
6837 adjacent_mem_locations (rtx a, rtx b)
6838 {
6839 /* We don't guarantee to preserve the order of these memory refs. */
6840 if (volatile_refs_p (a) || volatile_refs_p (b))
6841 return 0;
6842
6843 if ((GET_CODE (XEXP (a, 0)) == REG
6844 || (GET_CODE (XEXP (a, 0)) == PLUS
6845 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
6846 && (GET_CODE (XEXP (b, 0)) == REG
6847 || (GET_CODE (XEXP (b, 0)) == PLUS
6848 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
6849 {
6850 HOST_WIDE_INT val0 = 0, val1 = 0;
6851 rtx reg0, reg1;
6852 int val_diff;
6853
6854 if (GET_CODE (XEXP (a, 0)) == PLUS)
6855 {
6856 reg0 = XEXP (XEXP (a, 0), 0);
6857 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
6858 }
6859 else
6860 reg0 = XEXP (a, 0);
6861
6862 if (GET_CODE (XEXP (b, 0)) == PLUS)
6863 {
6864 reg1 = XEXP (XEXP (b, 0), 0);
6865 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
6866 }
6867 else
6868 reg1 = XEXP (b, 0);
6869
6870 /* Don't accept any offset that will require multiple
6871 instructions to handle, since this would cause the
6872 arith_adjacentmem pattern to output an overlong sequence. */
6873 if (!const_ok_for_op (PLUS, val0) || !const_ok_for_op (PLUS, val1))
6874 return 0;
6875
6876 /* Don't allow an eliminable register: register elimination can make
6877 the offset too large. */
6878 if (arm_eliminable_register (reg0))
6879 return 0;
6880
6881 val_diff = val1 - val0;
6882
6883 if (arm_ld_sched)
6884 {
6885 /* If the target has load delay slots, then there's no benefit
6886 to using an ldm instruction unless the offset is zero and
6887 we are optimizing for size. */
6888 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
6889 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
6890 && (val_diff == 4 || val_diff == -4));
6891 }
6892
6893 return ((REGNO (reg0) == REGNO (reg1))
6894 && (val_diff == 4 || val_diff == -4));
6895 }
6896
6897 return 0;
6898 }
6899
6900 int
6901 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
6902 HOST_WIDE_INT *load_offset)
6903 {
6904 int unsorted_regs[4];
6905 HOST_WIDE_INT unsorted_offsets[4];
6906 int order[4];
6907 int base_reg = -1;
6908 int i;
6909
6910 /* Can only handle 2, 3, or 4 insns at present,
6911 though could be easily extended if required. */
6912 gcc_assert (nops >= 2 && nops <= 4);
6913
6914 /* Loop over the operands and check that the memory references are
6915 suitable (i.e. immediate offsets from the same base register). At
6916 the same time, extract the target register, and the memory
6917 offsets. */
6918 for (i = 0; i < nops; i++)
6919 {
6920 rtx reg;
6921 rtx offset;
6922
6923 /* Convert a subreg of a mem into the mem itself. */
6924 if (GET_CODE (operands[nops + i]) == SUBREG)
6925 operands[nops + i] = alter_subreg (operands + (nops + i));
6926
6927 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
6928
6929 /* Don't reorder volatile memory references; it doesn't seem worth
6930 looking for the case where the order is ok anyway. */
6931 if (MEM_VOLATILE_P (operands[nops + i]))
6932 return 0;
6933
6934 offset = const0_rtx;
6935
6936 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
6937 || (GET_CODE (reg) == SUBREG
6938 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
6939 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
6940 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
6941 == REG)
6942 || (GET_CODE (reg) == SUBREG
6943 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
6944 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
6945 == CONST_INT)))
6946 {
6947 if (i == 0)
6948 {
6949 base_reg = REGNO (reg);
6950 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
6951 ? REGNO (operands[i])
6952 : REGNO (SUBREG_REG (operands[i])));
6953 order[0] = 0;
6954 }
6955 else
6956 {
6957 if (base_reg != (int) REGNO (reg))
6958 /* Not addressed from the same base register. */
6959 return 0;
6960
6961 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
6962 ? REGNO (operands[i])
6963 : REGNO (SUBREG_REG (operands[i])));
6964 if (unsorted_regs[i] < unsorted_regs[order[0]])
6965 order[0] = i;
6966 }
6967
6968 /* If it isn't an integer register, or if it overwrites the
6969 base register but isn't the last insn in the list, then
6970 we can't do this. */
6971 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
6972 || (i != nops - 1 && unsorted_regs[i] == base_reg))
6973 return 0;
6974
6975 unsorted_offsets[i] = INTVAL (offset);
6976 }
6977 else
6978 /* Not a suitable memory address. */
6979 return 0;
6980 }
6981
6982 /* All the useful information has now been extracted from the
6983 operands into unsorted_regs and unsorted_offsets; additionally,
6984 order[0] has been set to the lowest numbered register in the
6985 list. Sort the registers into order, and check that the memory
6986 offsets are ascending and adjacent. */
6987
6988 for (i = 1; i < nops; i++)
6989 {
6990 int j;
6991
6992 order[i] = order[i - 1];
6993 for (j = 0; j < nops; j++)
6994 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
6995 && (order[i] == order[i - 1]
6996 || unsorted_regs[j] < unsorted_regs[order[i]]))
6997 order[i] = j;
6998
6999 /* Have we found a suitable register? if not, one must be used more
7000 than once. */
7001 if (order[i] == order[i - 1])
7002 return 0;
7003
7004 /* Is the memory address adjacent and ascending? */
7005 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7006 return 0;
7007 }
7008
7009 if (base)
7010 {
7011 *base = base_reg;
7012
7013 for (i = 0; i < nops; i++)
7014 regs[i] = unsorted_regs[order[i]];
7015
7016 *load_offset = unsorted_offsets[order[0]];
7017 }
7018
7019 if (unsorted_offsets[order[0]] == 0)
7020 return 1; /* ldmia */
7021
7022 if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
7023 return 2; /* ldmib */
7024
7025 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
7026 return 3; /* ldmda */
7027
7028 if (unsorted_offsets[order[nops - 1]] == -4)
7029 return 4; /* ldmdb */
7030
7031 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
7032 if the offset isn't small enough. The reason 2 ldrs are faster
7033 is because these ARMs are able to do more than one cache access
7034 in a single cycle. The ARM9 and StrongARM have Harvard caches,
7035 whilst the ARM8 has a double bandwidth cache. This means that
7036 these cores can do both an instruction fetch and a data fetch in
7037 a single cycle, so the trick of calculating the address into a
7038 scratch register (one of the result regs) and then doing a load
7039 multiple actually becomes slower (and no smaller in code size).
7040 That is the transformation
7041
7042 ldr rd1, [rbase + offset]
7043 ldr rd2, [rbase + offset + 4]
7044
7045 to
7046
7047 add rd1, rbase, offset
7048 ldmia rd1, {rd1, rd2}
7049
7050 produces worse code -- '3 cycles + any stalls on rd2' instead of
7051 '2 cycles + any stalls on rd2'. On ARMs with only one cache
7052 access per cycle, the first sequence could never complete in less
7053 than 6 cycles, whereas the ldm sequence would only take 5 and
7054 would make better use of sequential accesses if not hitting the
7055 cache.
7056
7057 We cheat here and test 'arm_ld_sched' which we currently know to
7058 only be true for the ARM8, ARM9 and StrongARM. If this ever
7059 changes, then the test below needs to be reworked. */
7060 if (nops == 2 && arm_ld_sched)
7061 return 0;
7062
7063 /* Can't do it without setting up the offset, only do this if it takes
7064 no more than one insn. */
7065 return (const_ok_for_arm (unsorted_offsets[order[0]])
7066 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
7067 }
7068
7069 const char *
7070 emit_ldm_seq (rtx *operands, int nops)
7071 {
7072 int regs[4];
7073 int base_reg;
7074 HOST_WIDE_INT offset;
7075 char buf[100];
7076 int i;
7077
7078 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7079 {
7080 case 1:
7081 strcpy (buf, "ldm%(ia%)\t");
7082 break;
7083
7084 case 2:
7085 strcpy (buf, "ldm%(ib%)\t");
7086 break;
7087
7088 case 3:
7089 strcpy (buf, "ldm%(da%)\t");
7090 break;
7091
7092 case 4:
7093 strcpy (buf, "ldm%(db%)\t");
7094 break;
7095
7096 case 5:
7097 if (offset >= 0)
7098 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7099 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7100 (long) offset);
7101 else
7102 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7103 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7104 (long) -offset);
7105 output_asm_insn (buf, operands);
7106 base_reg = regs[0];
7107 strcpy (buf, "ldm%(ia%)\t");
7108 break;
7109
7110 default:
7111 gcc_unreachable ();
7112 }
7113
7114 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7115 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7116
7117 for (i = 1; i < nops; i++)
7118 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7119 reg_names[regs[i]]);
7120
7121 strcat (buf, "}\t%@ phole ldm");
7122
7123 output_asm_insn (buf, operands);
7124 return "";
7125 }
7126
7127 int
7128 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
7129 HOST_WIDE_INT * load_offset)
7130 {
7131 int unsorted_regs[4];
7132 HOST_WIDE_INT unsorted_offsets[4];
7133 int order[4];
7134 int base_reg = -1;
7135 int i;
7136
7137 /* Can only handle 2, 3, or 4 insns at present, though could be easily
7138 extended if required. */
7139 gcc_assert (nops >= 2 && nops <= 4);
7140
7141 /* Loop over the operands and check that the memory references are
7142 suitable (i.e. immediate offsets from the same base register). At
7143 the same time, extract the target register, and the memory
7144 offsets. */
7145 for (i = 0; i < nops; i++)
7146 {
7147 rtx reg;
7148 rtx offset;
7149
7150 /* Convert a subreg of a mem into the mem itself. */
7151 if (GET_CODE (operands[nops + i]) == SUBREG)
7152 operands[nops + i] = alter_subreg (operands + (nops + i));
7153
7154 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
7155
7156 /* Don't reorder volatile memory references; it doesn't seem worth
7157 looking for the case where the order is ok anyway. */
7158 if (MEM_VOLATILE_P (operands[nops + i]))
7159 return 0;
7160
7161 offset = const0_rtx;
7162
7163 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
7164 || (GET_CODE (reg) == SUBREG
7165 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7166 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
7167 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
7168 == REG)
7169 || (GET_CODE (reg) == SUBREG
7170 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7171 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
7172 == CONST_INT)))
7173 {
7174 if (i == 0)
7175 {
7176 base_reg = REGNO (reg);
7177 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
7178 ? REGNO (operands[i])
7179 : REGNO (SUBREG_REG (operands[i])));
7180 order[0] = 0;
7181 }
7182 else
7183 {
7184 if (base_reg != (int) REGNO (reg))
7185 /* Not addressed from the same base register. */
7186 return 0;
7187
7188 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
7189 ? REGNO (operands[i])
7190 : REGNO (SUBREG_REG (operands[i])));
7191 if (unsorted_regs[i] < unsorted_regs[order[0]])
7192 order[0] = i;
7193 }
7194
7195 /* If it isn't an integer register, then we can't do this. */
7196 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
7197 return 0;
7198
7199 unsorted_offsets[i] = INTVAL (offset);
7200 }
7201 else
7202 /* Not a suitable memory address. */
7203 return 0;
7204 }
7205
7206 /* All the useful information has now been extracted from the
7207 operands into unsorted_regs and unsorted_offsets; additionally,
7208 order[0] has been set to the lowest numbered register in the
7209 list. Sort the registers into order, and check that the memory
7210 offsets are ascending and adjacent. */
7211
7212 for (i = 1; i < nops; i++)
7213 {
7214 int j;
7215
7216 order[i] = order[i - 1];
7217 for (j = 0; j < nops; j++)
7218 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
7219 && (order[i] == order[i - 1]
7220 || unsorted_regs[j] < unsorted_regs[order[i]]))
7221 order[i] = j;
7222
7223 /* Have we found a suitable register? if not, one must be used more
7224 than once. */
7225 if (order[i] == order[i - 1])
7226 return 0;
7227
7228 /* Is the memory address adjacent and ascending? */
7229 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7230 return 0;
7231 }
7232
7233 if (base)
7234 {
7235 *base = base_reg;
7236
7237 for (i = 0; i < nops; i++)
7238 regs[i] = unsorted_regs[order[i]];
7239
7240 *load_offset = unsorted_offsets[order[0]];
7241 }
7242
7243 if (unsorted_offsets[order[0]] == 0)
7244 return 1; /* stmia */
7245
7246 if (unsorted_offsets[order[0]] == 4)
7247 return 2; /* stmib */
7248
7249 if (unsorted_offsets[order[nops - 1]] == 0)
7250 return 3; /* stmda */
7251
7252 if (unsorted_offsets[order[nops - 1]] == -4)
7253 return 4; /* stmdb */
7254
7255 return 0;
7256 }
7257
7258 const char *
7259 emit_stm_seq (rtx *operands, int nops)
7260 {
7261 int regs[4];
7262 int base_reg;
7263 HOST_WIDE_INT offset;
7264 char buf[100];
7265 int i;
7266
7267 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7268 {
7269 case 1:
7270 strcpy (buf, "stm%(ia%)\t");
7271 break;
7272
7273 case 2:
7274 strcpy (buf, "stm%(ib%)\t");
7275 break;
7276
7277 case 3:
7278 strcpy (buf, "stm%(da%)\t");
7279 break;
7280
7281 case 4:
7282 strcpy (buf, "stm%(db%)\t");
7283 break;
7284
7285 default:
7286 gcc_unreachable ();
7287 }
7288
7289 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7290 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7291
7292 for (i = 1; i < nops; i++)
7293 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7294 reg_names[regs[i]]);
7295
7296 strcat (buf, "}\t%@ phole stm");
7297
7298 output_asm_insn (buf, operands);
7299 return "";
7300 }
7301 \f
7302 /* Routines for use in generating RTL. */
7303
7304 rtx
7305 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
7306 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
7307 {
7308 HOST_WIDE_INT offset = *offsetp;
7309 int i = 0, j;
7310 rtx result;
7311 int sign = up ? 1 : -1;
7312 rtx mem, addr;
7313
7314 /* XScale has load-store double instructions, but they have stricter
7315 alignment requirements than load-store multiple, so we cannot
7316 use them.
7317
7318 For XScale ldm requires 2 + NREGS cycles to complete and blocks
7319 the pipeline until completion.
7320
7321 NREGS CYCLES
7322 1 3
7323 2 4
7324 3 5
7325 4 6
7326
7327 An ldr instruction takes 1-3 cycles, but does not block the
7328 pipeline.
7329
7330 NREGS CYCLES
7331 1 1-3
7332 2 2-6
7333 3 3-9
7334 4 4-12
7335
7336 Best case ldr will always win. However, the more ldr instructions
7337 we issue, the less likely we are to be able to schedule them well.
7338 Using ldr instructions also increases code size.
7339
7340 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
7341 for counts of 3 or 4 regs. */
7342 if (arm_tune_xscale && count <= 2 && ! optimize_size)
7343 {
7344 rtx seq;
7345
7346 start_sequence ();
7347
7348 for (i = 0; i < count; i++)
7349 {
7350 addr = plus_constant (from, i * 4 * sign);
7351 mem = adjust_automodify_address (basemem, SImode, addr, offset);
7352 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
7353 offset += 4 * sign;
7354 }
7355
7356 if (write_back)
7357 {
7358 emit_move_insn (from, plus_constant (from, count * 4 * sign));
7359 *offsetp = offset;
7360 }
7361
7362 seq = get_insns ();
7363 end_sequence ();
7364
7365 return seq;
7366 }
7367
7368 result = gen_rtx_PARALLEL (VOIDmode,
7369 rtvec_alloc (count + (write_back ? 1 : 0)));
7370 if (write_back)
7371 {
7372 XVECEXP (result, 0, 0)
7373 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
7374 i = 1;
7375 count++;
7376 }
7377
7378 for (j = 0; i < count; i++, j++)
7379 {
7380 addr = plus_constant (from, j * 4 * sign);
7381 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
7382 XVECEXP (result, 0, i)
7383 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
7384 offset += 4 * sign;
7385 }
7386
7387 if (write_back)
7388 *offsetp = offset;
7389
7390 return result;
7391 }
7392
7393 rtx
7394 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
7395 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
7396 {
7397 HOST_WIDE_INT offset = *offsetp;
7398 int i = 0, j;
7399 rtx result;
7400 int sign = up ? 1 : -1;
7401 rtx mem, addr;
7402
7403 /* See arm_gen_load_multiple for discussion of
7404 the pros/cons of ldm/stm usage for XScale. */
7405 if (arm_tune_xscale && count <= 2 && ! optimize_size)
7406 {
7407 rtx seq;
7408
7409 start_sequence ();
7410
7411 for (i = 0; i < count; i++)
7412 {
7413 addr = plus_constant (to, i * 4 * sign);
7414 mem = adjust_automodify_address (basemem, SImode, addr, offset);
7415 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
7416 offset += 4 * sign;
7417 }
7418
7419 if (write_back)
7420 {
7421 emit_move_insn (to, plus_constant (to, count * 4 * sign));
7422 *offsetp = offset;
7423 }
7424
7425 seq = get_insns ();
7426 end_sequence ();
7427
7428 return seq;
7429 }
7430
7431 result = gen_rtx_PARALLEL (VOIDmode,
7432 rtvec_alloc (count + (write_back ? 1 : 0)));
7433 if (write_back)
7434 {
7435 XVECEXP (result, 0, 0)
7436 = gen_rtx_SET (VOIDmode, to,
7437 plus_constant (to, count * 4 * sign));
7438 i = 1;
7439 count++;
7440 }
7441
7442 for (j = 0; i < count; i++, j++)
7443 {
7444 addr = plus_constant (to, j * 4 * sign);
7445 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
7446 XVECEXP (result, 0, i)
7447 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
7448 offset += 4 * sign;
7449 }
7450
7451 if (write_back)
7452 *offsetp = offset;
7453
7454 return result;
7455 }
7456
7457 int
7458 arm_gen_movmemqi (rtx *operands)
7459 {
7460 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
7461 HOST_WIDE_INT srcoffset, dstoffset;
7462 int i;
7463 rtx src, dst, srcbase, dstbase;
7464 rtx part_bytes_reg = NULL;
7465 rtx mem;
7466
7467 if (GET_CODE (operands[2]) != CONST_INT
7468 || GET_CODE (operands[3]) != CONST_INT
7469 || INTVAL (operands[2]) > 64
7470 || INTVAL (operands[3]) & 3)
7471 return 0;
7472
7473 dstbase = operands[0];
7474 srcbase = operands[1];
7475
7476 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
7477 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
7478
7479 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
7480 out_words_to_go = INTVAL (operands[2]) / 4;
7481 last_bytes = INTVAL (operands[2]) & 3;
7482 dstoffset = srcoffset = 0;
7483
7484 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
7485 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
7486
7487 for (i = 0; in_words_to_go >= 2; i+=4)
7488 {
7489 if (in_words_to_go > 4)
7490 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
7491 srcbase, &srcoffset));
7492 else
7493 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
7494 FALSE, srcbase, &srcoffset));
7495
7496 if (out_words_to_go)
7497 {
7498 if (out_words_to_go > 4)
7499 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
7500 dstbase, &dstoffset));
7501 else if (out_words_to_go != 1)
7502 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
7503 dst, TRUE,
7504 (last_bytes == 0
7505 ? FALSE : TRUE),
7506 dstbase, &dstoffset));
7507 else
7508 {
7509 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
7510 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
7511 if (last_bytes != 0)
7512 {
7513 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
7514 dstoffset += 4;
7515 }
7516 }
7517 }
7518
7519 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
7520 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
7521 }
7522
7523 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
7524 if (out_words_to_go)
7525 {
7526 rtx sreg;
7527
7528 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
7529 sreg = copy_to_reg (mem);
7530
7531 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
7532 emit_move_insn (mem, sreg);
7533 in_words_to_go--;
7534
7535 gcc_assert (!in_words_to_go); /* Sanity check */
7536 }
7537
7538 if (in_words_to_go)
7539 {
7540 gcc_assert (in_words_to_go > 0);
7541
7542 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
7543 part_bytes_reg = copy_to_mode_reg (SImode, mem);
7544 }
7545
7546 gcc_assert (!last_bytes || part_bytes_reg);
7547
7548 if (BYTES_BIG_ENDIAN && last_bytes)
7549 {
7550 rtx tmp = gen_reg_rtx (SImode);
7551
7552 /* The bytes we want are in the top end of the word. */
7553 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
7554 GEN_INT (8 * (4 - last_bytes))));
7555 part_bytes_reg = tmp;
7556
7557 while (last_bytes)
7558 {
7559 mem = adjust_automodify_address (dstbase, QImode,
7560 plus_constant (dst, last_bytes - 1),
7561 dstoffset + last_bytes - 1);
7562 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
7563
7564 if (--last_bytes)
7565 {
7566 tmp = gen_reg_rtx (SImode);
7567 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
7568 part_bytes_reg = tmp;
7569 }
7570 }
7571
7572 }
7573 else
7574 {
7575 if (last_bytes > 1)
7576 {
7577 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
7578 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
7579 last_bytes -= 2;
7580 if (last_bytes)
7581 {
7582 rtx tmp = gen_reg_rtx (SImode);
7583 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
7584 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
7585 part_bytes_reg = tmp;
7586 dstoffset += 2;
7587 }
7588 }
7589
7590 if (last_bytes)
7591 {
7592 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
7593 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
7594 }
7595 }
7596
7597 return 1;
7598 }
7599
7600 /* Select a dominance comparison mode if possible for a test of the general
7601 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
7602 COND_OR == DOM_CC_X_AND_Y => (X && Y)
7603 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
7604 COND_OR == DOM_CC_X_OR_Y => (X || Y)
7605 In all cases OP will be either EQ or NE, but we don't need to know which
7606 here. If we are unable to support a dominance comparison we return
7607 CC mode. This will then fail to match for the RTL expressions that
7608 generate this call. */
7609 enum machine_mode
7610 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
7611 {
7612 enum rtx_code cond1, cond2;
7613 int swapped = 0;
7614
7615 /* Currently we will probably get the wrong result if the individual
7616 comparisons are not simple. This also ensures that it is safe to
7617 reverse a comparison if necessary. */
7618 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
7619 != CCmode)
7620 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
7621 != CCmode))
7622 return CCmode;
7623
7624 /* The if_then_else variant of this tests the second condition if the
7625 first passes, but is true if the first fails. Reverse the first
7626 condition to get a true "inclusive-or" expression. */
7627 if (cond_or == DOM_CC_NX_OR_Y)
7628 cond1 = reverse_condition (cond1);
7629
7630 /* If the comparisons are not equal, and one doesn't dominate the other,
7631 then we can't do this. */
7632 if (cond1 != cond2
7633 && !comparison_dominates_p (cond1, cond2)
7634 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
7635 return CCmode;
7636
7637 if (swapped)
7638 {
7639 enum rtx_code temp = cond1;
7640 cond1 = cond2;
7641 cond2 = temp;
7642 }
7643
7644 switch (cond1)
7645 {
7646 case EQ:
7647 if (cond_or == DOM_CC_X_AND_Y)
7648 return CC_DEQmode;
7649
7650 switch (cond2)
7651 {
7652 case EQ: return CC_DEQmode;
7653 case LE: return CC_DLEmode;
7654 case LEU: return CC_DLEUmode;
7655 case GE: return CC_DGEmode;
7656 case GEU: return CC_DGEUmode;
7657 default: gcc_unreachable ();
7658 }
7659
7660 case LT:
7661 if (cond_or == DOM_CC_X_AND_Y)
7662 return CC_DLTmode;
7663
7664 switch (cond2)
7665 {
7666 case LT:
7667 return CC_DLTmode;
7668 case LE:
7669 return CC_DLEmode;
7670 case NE:
7671 return CC_DNEmode;
7672 default:
7673 gcc_unreachable ();
7674 }
7675
7676 case GT:
7677 if (cond_or == DOM_CC_X_AND_Y)
7678 return CC_DGTmode;
7679
7680 switch (cond2)
7681 {
7682 case GT:
7683 return CC_DGTmode;
7684 case GE:
7685 return CC_DGEmode;
7686 case NE:
7687 return CC_DNEmode;
7688 default:
7689 gcc_unreachable ();
7690 }
7691
7692 case LTU:
7693 if (cond_or == DOM_CC_X_AND_Y)
7694 return CC_DLTUmode;
7695
7696 switch (cond2)
7697 {
7698 case LTU:
7699 return CC_DLTUmode;
7700 case LEU:
7701 return CC_DLEUmode;
7702 case NE:
7703 return CC_DNEmode;
7704 default:
7705 gcc_unreachable ();
7706 }
7707
7708 case GTU:
7709 if (cond_or == DOM_CC_X_AND_Y)
7710 return CC_DGTUmode;
7711
7712 switch (cond2)
7713 {
7714 case GTU:
7715 return CC_DGTUmode;
7716 case GEU:
7717 return CC_DGEUmode;
7718 case NE:
7719 return CC_DNEmode;
7720 default:
7721 gcc_unreachable ();
7722 }
7723
7724 /* The remaining cases only occur when both comparisons are the
7725 same. */
7726 case NE:
7727 gcc_assert (cond1 == cond2);
7728 return CC_DNEmode;
7729
7730 case LE:
7731 gcc_assert (cond1 == cond2);
7732 return CC_DLEmode;
7733
7734 case GE:
7735 gcc_assert (cond1 == cond2);
7736 return CC_DGEmode;
7737
7738 case LEU:
7739 gcc_assert (cond1 == cond2);
7740 return CC_DLEUmode;
7741
7742 case GEU:
7743 gcc_assert (cond1 == cond2);
7744 return CC_DGEUmode;
7745
7746 default:
7747 gcc_unreachable ();
7748 }
7749 }
7750
7751 enum machine_mode
7752 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
7753 {
7754 /* All floating point compares return CCFP if it is an equality
7755 comparison, and CCFPE otherwise. */
7756 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
7757 {
7758 switch (op)
7759 {
7760 case EQ:
7761 case NE:
7762 case UNORDERED:
7763 case ORDERED:
7764 case UNLT:
7765 case UNLE:
7766 case UNGT:
7767 case UNGE:
7768 case UNEQ:
7769 case LTGT:
7770 return CCFPmode;
7771
7772 case LT:
7773 case LE:
7774 case GT:
7775 case GE:
7776 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
7777 return CCFPmode;
7778 return CCFPEmode;
7779
7780 default:
7781 gcc_unreachable ();
7782 }
7783 }
7784
7785 /* A compare with a shifted operand. Because of canonicalization, the
7786 comparison will have to be swapped when we emit the assembler. */
7787 if (GET_MODE (y) == SImode && GET_CODE (y) == REG
7788 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
7789 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
7790 || GET_CODE (x) == ROTATERT))
7791 return CC_SWPmode;
7792
7793 /* This operation is performed swapped, but since we only rely on the Z
7794 flag we don't need an additional mode. */
7795 if (GET_MODE (y) == SImode && REG_P (y)
7796 && GET_CODE (x) == NEG
7797 && (op == EQ || op == NE))
7798 return CC_Zmode;
7799
7800 /* This is a special case that is used by combine to allow a
7801 comparison of a shifted byte load to be split into a zero-extend
7802 followed by a comparison of the shifted integer (only valid for
7803 equalities and unsigned inequalities). */
7804 if (GET_MODE (x) == SImode
7805 && GET_CODE (x) == ASHIFT
7806 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
7807 && GET_CODE (XEXP (x, 0)) == SUBREG
7808 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
7809 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
7810 && (op == EQ || op == NE
7811 || op == GEU || op == GTU || op == LTU || op == LEU)
7812 && GET_CODE (y) == CONST_INT)
7813 return CC_Zmode;
7814
7815 /* A construct for a conditional compare, if the false arm contains
7816 0, then both conditions must be true, otherwise either condition
7817 must be true. Not all conditions are possible, so CCmode is
7818 returned if it can't be done. */
7819 if (GET_CODE (x) == IF_THEN_ELSE
7820 && (XEXP (x, 2) == const0_rtx
7821 || XEXP (x, 2) == const1_rtx)
7822 && COMPARISON_P (XEXP (x, 0))
7823 && COMPARISON_P (XEXP (x, 1)))
7824 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7825 INTVAL (XEXP (x, 2)));
7826
7827 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
7828 if (GET_CODE (x) == AND
7829 && COMPARISON_P (XEXP (x, 0))
7830 && COMPARISON_P (XEXP (x, 1)))
7831 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7832 DOM_CC_X_AND_Y);
7833
7834 if (GET_CODE (x) == IOR
7835 && COMPARISON_P (XEXP (x, 0))
7836 && COMPARISON_P (XEXP (x, 1)))
7837 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7838 DOM_CC_X_OR_Y);
7839
7840 /* An operation (on Thumb) where we want to test for a single bit.
7841 This is done by shifting that bit up into the top bit of a
7842 scratch register; we can then branch on the sign bit. */
7843 if (TARGET_THUMB1
7844 && GET_MODE (x) == SImode
7845 && (op == EQ || op == NE)
7846 && GET_CODE (x) == ZERO_EXTRACT
7847 && XEXP (x, 1) == const1_rtx)
7848 return CC_Nmode;
7849
7850 /* An operation that sets the condition codes as a side-effect, the
7851 V flag is not set correctly, so we can only use comparisons where
7852 this doesn't matter. (For LT and GE we can use "mi" and "pl"
7853 instead.) */
7854 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
7855 if (GET_MODE (x) == SImode
7856 && y == const0_rtx
7857 && (op == EQ || op == NE || op == LT || op == GE)
7858 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
7859 || GET_CODE (x) == AND || GET_CODE (x) == IOR
7860 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
7861 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
7862 || GET_CODE (x) == LSHIFTRT
7863 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
7864 || GET_CODE (x) == ROTATERT
7865 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
7866 return CC_NOOVmode;
7867
7868 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
7869 return CC_Zmode;
7870
7871 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
7872 && GET_CODE (x) == PLUS
7873 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
7874 return CC_Cmode;
7875
7876 return CCmode;
7877 }
7878
7879 /* X and Y are two things to compare using CODE. Emit the compare insn and
7880 return the rtx for register 0 in the proper mode. FP means this is a
7881 floating point compare: I don't think that it is needed on the arm. */
7882 rtx
7883 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
7884 {
7885 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
7886 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
7887
7888 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
7889
7890 return cc_reg;
7891 }
7892
7893 /* Generate a sequence of insns that will generate the correct return
7894 address mask depending on the physical architecture that the program
7895 is running on. */
7896 rtx
7897 arm_gen_return_addr_mask (void)
7898 {
7899 rtx reg = gen_reg_rtx (Pmode);
7900
7901 emit_insn (gen_return_addr_mask (reg));
7902 return reg;
7903 }
7904
7905 void
7906 arm_reload_in_hi (rtx *operands)
7907 {
7908 rtx ref = operands[1];
7909 rtx base, scratch;
7910 HOST_WIDE_INT offset = 0;
7911
7912 if (GET_CODE (ref) == SUBREG)
7913 {
7914 offset = SUBREG_BYTE (ref);
7915 ref = SUBREG_REG (ref);
7916 }
7917
7918 if (GET_CODE (ref) == REG)
7919 {
7920 /* We have a pseudo which has been spilt onto the stack; there
7921 are two cases here: the first where there is a simple
7922 stack-slot replacement and a second where the stack-slot is
7923 out of range, or is used as a subreg. */
7924 if (reg_equiv_mem[REGNO (ref)])
7925 {
7926 ref = reg_equiv_mem[REGNO (ref)];
7927 base = find_replacement (&XEXP (ref, 0));
7928 }
7929 else
7930 /* The slot is out of range, or was dressed up in a SUBREG. */
7931 base = reg_equiv_address[REGNO (ref)];
7932 }
7933 else
7934 base = find_replacement (&XEXP (ref, 0));
7935
7936 /* Handle the case where the address is too complex to be offset by 1. */
7937 if (GET_CODE (base) == MINUS
7938 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
7939 {
7940 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7941
7942 emit_set_insn (base_plus, base);
7943 base = base_plus;
7944 }
7945 else if (GET_CODE (base) == PLUS)
7946 {
7947 /* The addend must be CONST_INT, or we would have dealt with it above. */
7948 HOST_WIDE_INT hi, lo;
7949
7950 offset += INTVAL (XEXP (base, 1));
7951 base = XEXP (base, 0);
7952
7953 /* Rework the address into a legal sequence of insns. */
7954 /* Valid range for lo is -4095 -> 4095 */
7955 lo = (offset >= 0
7956 ? (offset & 0xfff)
7957 : -((-offset) & 0xfff));
7958
7959 /* Corner case, if lo is the max offset then we would be out of range
7960 once we have added the additional 1 below, so bump the msb into the
7961 pre-loading insn(s). */
7962 if (lo == 4095)
7963 lo &= 0x7ff;
7964
7965 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
7966 ^ (HOST_WIDE_INT) 0x80000000)
7967 - (HOST_WIDE_INT) 0x80000000);
7968
7969 gcc_assert (hi + lo == offset);
7970
7971 if (hi != 0)
7972 {
7973 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7974
7975 /* Get the base address; addsi3 knows how to handle constants
7976 that require more than one insn. */
7977 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
7978 base = base_plus;
7979 offset = lo;
7980 }
7981 }
7982
7983 /* Operands[2] may overlap operands[0] (though it won't overlap
7984 operands[1]), that's why we asked for a DImode reg -- so we can
7985 use the bit that does not overlap. */
7986 if (REGNO (operands[2]) == REGNO (operands[0]))
7987 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7988 else
7989 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
7990
7991 emit_insn (gen_zero_extendqisi2 (scratch,
7992 gen_rtx_MEM (QImode,
7993 plus_constant (base,
7994 offset))));
7995 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
7996 gen_rtx_MEM (QImode,
7997 plus_constant (base,
7998 offset + 1))));
7999 if (!BYTES_BIG_ENDIAN)
8000 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
8001 gen_rtx_IOR (SImode,
8002 gen_rtx_ASHIFT
8003 (SImode,
8004 gen_rtx_SUBREG (SImode, operands[0], 0),
8005 GEN_INT (8)),
8006 scratch));
8007 else
8008 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
8009 gen_rtx_IOR (SImode,
8010 gen_rtx_ASHIFT (SImode, scratch,
8011 GEN_INT (8)),
8012 gen_rtx_SUBREG (SImode, operands[0], 0)));
8013 }
8014
8015 /* Handle storing a half-word to memory during reload by synthesizing as two
8016 byte stores. Take care not to clobber the input values until after we
8017 have moved them somewhere safe. This code assumes that if the DImode
8018 scratch in operands[2] overlaps either the input value or output address
8019 in some way, then that value must die in this insn (we absolutely need
8020 two scratch registers for some corner cases). */
8021 void
8022 arm_reload_out_hi (rtx *operands)
8023 {
8024 rtx ref = operands[0];
8025 rtx outval = operands[1];
8026 rtx base, scratch;
8027 HOST_WIDE_INT offset = 0;
8028
8029 if (GET_CODE (ref) == SUBREG)
8030 {
8031 offset = SUBREG_BYTE (ref);
8032 ref = SUBREG_REG (ref);
8033 }
8034
8035 if (GET_CODE (ref) == REG)
8036 {
8037 /* We have a pseudo which has been spilt onto the stack; there
8038 are two cases here: the first where there is a simple
8039 stack-slot replacement and a second where the stack-slot is
8040 out of range, or is used as a subreg. */
8041 if (reg_equiv_mem[REGNO (ref)])
8042 {
8043 ref = reg_equiv_mem[REGNO (ref)];
8044 base = find_replacement (&XEXP (ref, 0));
8045 }
8046 else
8047 /* The slot is out of range, or was dressed up in a SUBREG. */
8048 base = reg_equiv_address[REGNO (ref)];
8049 }
8050 else
8051 base = find_replacement (&XEXP (ref, 0));
8052
8053 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
8054
8055 /* Handle the case where the address is too complex to be offset by 1. */
8056 if (GET_CODE (base) == MINUS
8057 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
8058 {
8059 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8060
8061 /* Be careful not to destroy OUTVAL. */
8062 if (reg_overlap_mentioned_p (base_plus, outval))
8063 {
8064 /* Updating base_plus might destroy outval, see if we can
8065 swap the scratch and base_plus. */
8066 if (!reg_overlap_mentioned_p (scratch, outval))
8067 {
8068 rtx tmp = scratch;
8069 scratch = base_plus;
8070 base_plus = tmp;
8071 }
8072 else
8073 {
8074 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8075
8076 /* Be conservative and copy OUTVAL into the scratch now,
8077 this should only be necessary if outval is a subreg
8078 of something larger than a word. */
8079 /* XXX Might this clobber base? I can't see how it can,
8080 since scratch is known to overlap with OUTVAL, and
8081 must be wider than a word. */
8082 emit_insn (gen_movhi (scratch_hi, outval));
8083 outval = scratch_hi;
8084 }
8085 }
8086
8087 emit_set_insn (base_plus, base);
8088 base = base_plus;
8089 }
8090 else if (GET_CODE (base) == PLUS)
8091 {
8092 /* The addend must be CONST_INT, or we would have dealt with it above. */
8093 HOST_WIDE_INT hi, lo;
8094
8095 offset += INTVAL (XEXP (base, 1));
8096 base = XEXP (base, 0);
8097
8098 /* Rework the address into a legal sequence of insns. */
8099 /* Valid range for lo is -4095 -> 4095 */
8100 lo = (offset >= 0
8101 ? (offset & 0xfff)
8102 : -((-offset) & 0xfff));
8103
8104 /* Corner case, if lo is the max offset then we would be out of range
8105 once we have added the additional 1 below, so bump the msb into the
8106 pre-loading insn(s). */
8107 if (lo == 4095)
8108 lo &= 0x7ff;
8109
8110 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
8111 ^ (HOST_WIDE_INT) 0x80000000)
8112 - (HOST_WIDE_INT) 0x80000000);
8113
8114 gcc_assert (hi + lo == offset);
8115
8116 if (hi != 0)
8117 {
8118 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8119
8120 /* Be careful not to destroy OUTVAL. */
8121 if (reg_overlap_mentioned_p (base_plus, outval))
8122 {
8123 /* Updating base_plus might destroy outval, see if we
8124 can swap the scratch and base_plus. */
8125 if (!reg_overlap_mentioned_p (scratch, outval))
8126 {
8127 rtx tmp = scratch;
8128 scratch = base_plus;
8129 base_plus = tmp;
8130 }
8131 else
8132 {
8133 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8134
8135 /* Be conservative and copy outval into scratch now,
8136 this should only be necessary if outval is a
8137 subreg of something larger than a word. */
8138 /* XXX Might this clobber base? I can't see how it
8139 can, since scratch is known to overlap with
8140 outval. */
8141 emit_insn (gen_movhi (scratch_hi, outval));
8142 outval = scratch_hi;
8143 }
8144 }
8145
8146 /* Get the base address; addsi3 knows how to handle constants
8147 that require more than one insn. */
8148 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
8149 base = base_plus;
8150 offset = lo;
8151 }
8152 }
8153
8154 if (BYTES_BIG_ENDIAN)
8155 {
8156 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8157 plus_constant (base, offset + 1)),
8158 gen_lowpart (QImode, outval)));
8159 emit_insn (gen_lshrsi3 (scratch,
8160 gen_rtx_SUBREG (SImode, outval, 0),
8161 GEN_INT (8)));
8162 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8163 gen_lowpart (QImode, scratch)));
8164 }
8165 else
8166 {
8167 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8168 gen_lowpart (QImode, outval)));
8169 emit_insn (gen_lshrsi3 (scratch,
8170 gen_rtx_SUBREG (SImode, outval, 0),
8171 GEN_INT (8)));
8172 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8173 plus_constant (base, offset + 1)),
8174 gen_lowpart (QImode, scratch)));
8175 }
8176 }
8177
8178 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
8179 (padded to the size of a word) should be passed in a register. */
8180
8181 static bool
8182 arm_must_pass_in_stack (enum machine_mode mode, tree type)
8183 {
8184 if (TARGET_AAPCS_BASED)
8185 return must_pass_in_stack_var_size (mode, type);
8186 else
8187 return must_pass_in_stack_var_size_or_pad (mode, type);
8188 }
8189
8190
8191 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
8192 Return true if an argument passed on the stack should be padded upwards,
8193 i.e. if the least-significant byte has useful data.
8194 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
8195 aggregate types are placed in the lowest memory address. */
8196
8197 bool
8198 arm_pad_arg_upward (enum machine_mode mode, tree type)
8199 {
8200 if (!TARGET_AAPCS_BASED)
8201 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
8202
8203 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
8204 return false;
8205
8206 return true;
8207 }
8208
8209
8210 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
8211 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
8212 byte of the register has useful data, and return the opposite if the
8213 most significant byte does.
8214 For AAPCS, small aggregates and small complex types are always padded
8215 upwards. */
8216
8217 bool
8218 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
8219 tree type, int first ATTRIBUTE_UNUSED)
8220 {
8221 if (TARGET_AAPCS_BASED
8222 && BYTES_BIG_ENDIAN
8223 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
8224 && int_size_in_bytes (type) <= 4)
8225 return true;
8226
8227 /* Otherwise, use default padding. */
8228 return !BYTES_BIG_ENDIAN;
8229 }
8230
8231 \f
8232 /* Print a symbolic form of X to the debug file, F. */
8233 static void
8234 arm_print_value (FILE *f, rtx x)
8235 {
8236 switch (GET_CODE (x))
8237 {
8238 case CONST_INT:
8239 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
8240 return;
8241
8242 case CONST_DOUBLE:
8243 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
8244 return;
8245
8246 case CONST_VECTOR:
8247 {
8248 int i;
8249
8250 fprintf (f, "<");
8251 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
8252 {
8253 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
8254 if (i < (CONST_VECTOR_NUNITS (x) - 1))
8255 fputc (',', f);
8256 }
8257 fprintf (f, ">");
8258 }
8259 return;
8260
8261 case CONST_STRING:
8262 fprintf (f, "\"%s\"", XSTR (x, 0));
8263 return;
8264
8265 case SYMBOL_REF:
8266 fprintf (f, "`%s'", XSTR (x, 0));
8267 return;
8268
8269 case LABEL_REF:
8270 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
8271 return;
8272
8273 case CONST:
8274 arm_print_value (f, XEXP (x, 0));
8275 return;
8276
8277 case PLUS:
8278 arm_print_value (f, XEXP (x, 0));
8279 fprintf (f, "+");
8280 arm_print_value (f, XEXP (x, 1));
8281 return;
8282
8283 case PC:
8284 fprintf (f, "pc");
8285 return;
8286
8287 default:
8288 fprintf (f, "????");
8289 return;
8290 }
8291 }
8292 \f
8293 /* Routines for manipulation of the constant pool. */
8294
8295 /* Arm instructions cannot load a large constant directly into a
8296 register; they have to come from a pc relative load. The constant
8297 must therefore be placed in the addressable range of the pc
8298 relative load. Depending on the precise pc relative load
8299 instruction the range is somewhere between 256 bytes and 4k. This
8300 means that we often have to dump a constant inside a function, and
8301 generate code to branch around it.
8302
8303 It is important to minimize this, since the branches will slow
8304 things down and make the code larger.
8305
8306 Normally we can hide the table after an existing unconditional
8307 branch so that there is no interruption of the flow, but in the
8308 worst case the code looks like this:
8309
8310 ldr rn, L1
8311 ...
8312 b L2
8313 align
8314 L1: .long value
8315 L2:
8316 ...
8317
8318 ldr rn, L3
8319 ...
8320 b L4
8321 align
8322 L3: .long value
8323 L4:
8324 ...
8325
8326 We fix this by performing a scan after scheduling, which notices
8327 which instructions need to have their operands fetched from the
8328 constant table and builds the table.
8329
8330 The algorithm starts by building a table of all the constants that
8331 need fixing up and all the natural barriers in the function (places
8332 where a constant table can be dropped without breaking the flow).
8333 For each fixup we note how far the pc-relative replacement will be
8334 able to reach and the offset of the instruction into the function.
8335
8336 Having built the table we then group the fixes together to form
8337 tables that are as large as possible (subject to addressing
8338 constraints) and emit each table of constants after the last
8339 barrier that is within range of all the instructions in the group.
8340 If a group does not contain a barrier, then we forcibly create one
8341 by inserting a jump instruction into the flow. Once the table has
8342 been inserted, the insns are then modified to reference the
8343 relevant entry in the pool.
8344
8345 Possible enhancements to the algorithm (not implemented) are:
8346
8347 1) For some processors and object formats, there may be benefit in
8348 aligning the pools to the start of cache lines; this alignment
8349 would need to be taken into account when calculating addressability
8350 of a pool. */
8351
8352 /* These typedefs are located at the start of this file, so that
8353 they can be used in the prototypes there. This comment is to
8354 remind readers of that fact so that the following structures
8355 can be understood more easily.
8356
8357 typedef struct minipool_node Mnode;
8358 typedef struct minipool_fixup Mfix; */
8359
8360 struct minipool_node
8361 {
8362 /* Doubly linked chain of entries. */
8363 Mnode * next;
8364 Mnode * prev;
8365 /* The maximum offset into the code that this entry can be placed. While
8366 pushing fixes for forward references, all entries are sorted in order
8367 of increasing max_address. */
8368 HOST_WIDE_INT max_address;
8369 /* Similarly for an entry inserted for a backwards ref. */
8370 HOST_WIDE_INT min_address;
8371 /* The number of fixes referencing this entry. This can become zero
8372 if we "unpush" an entry. In this case we ignore the entry when we
8373 come to emit the code. */
8374 int refcount;
8375 /* The offset from the start of the minipool. */
8376 HOST_WIDE_INT offset;
8377 /* The value in table. */
8378 rtx value;
8379 /* The mode of value. */
8380 enum machine_mode mode;
8381 /* The size of the value. With iWMMXt enabled
8382 sizes > 4 also imply an alignment of 8-bytes. */
8383 int fix_size;
8384 };
8385
8386 struct minipool_fixup
8387 {
8388 Mfix * next;
8389 rtx insn;
8390 HOST_WIDE_INT address;
8391 rtx * loc;
8392 enum machine_mode mode;
8393 int fix_size;
8394 rtx value;
8395 Mnode * minipool;
8396 HOST_WIDE_INT forwards;
8397 HOST_WIDE_INT backwards;
8398 };
8399
8400 /* Fixes less than a word need padding out to a word boundary. */
8401 #define MINIPOOL_FIX_SIZE(mode) \
8402 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
8403
8404 static Mnode * minipool_vector_head;
8405 static Mnode * minipool_vector_tail;
8406 static rtx minipool_vector_label;
8407 static int minipool_pad;
8408
8409 /* The linked list of all minipool fixes required for this function. */
8410 Mfix * minipool_fix_head;
8411 Mfix * minipool_fix_tail;
8412 /* The fix entry for the current minipool, once it has been placed. */
8413 Mfix * minipool_barrier;
8414
8415 /* Determines if INSN is the start of a jump table. Returns the end
8416 of the TABLE or NULL_RTX. */
8417 static rtx
8418 is_jump_table (rtx insn)
8419 {
8420 rtx table;
8421
8422 if (GET_CODE (insn) == JUMP_INSN
8423 && JUMP_LABEL (insn) != NULL
8424 && ((table = next_real_insn (JUMP_LABEL (insn)))
8425 == next_real_insn (insn))
8426 && table != NULL
8427 && GET_CODE (table) == JUMP_INSN
8428 && (GET_CODE (PATTERN (table)) == ADDR_VEC
8429 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
8430 return table;
8431
8432 return NULL_RTX;
8433 }
8434
8435 #ifndef JUMP_TABLES_IN_TEXT_SECTION
8436 #define JUMP_TABLES_IN_TEXT_SECTION 0
8437 #endif
8438
8439 static HOST_WIDE_INT
8440 get_jump_table_size (rtx insn)
8441 {
8442 /* ADDR_VECs only take room if read-only data does into the text
8443 section. */
8444 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
8445 {
8446 rtx body = PATTERN (insn);
8447 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
8448 HOST_WIDE_INT size;
8449 HOST_WIDE_INT modesize;
8450
8451 modesize = GET_MODE_SIZE (GET_MODE (body));
8452 size = modesize * XVECLEN (body, elt);
8453 switch (modesize)
8454 {
8455 case 1:
8456 /* Round up size of TBB table to a halfword boundary. */
8457 size = (size + 1) & ~(HOST_WIDE_INT)1;
8458 break;
8459 case 2:
8460 /* No padding necessary for TBH. */
8461 break;
8462 case 4:
8463 /* Add two bytes for alignment on Thumb. */
8464 if (TARGET_THUMB)
8465 size += 2;
8466 break;
8467 default:
8468 gcc_unreachable ();
8469 }
8470 return size;
8471 }
8472
8473 return 0;
8474 }
8475
8476 /* Move a minipool fix MP from its current location to before MAX_MP.
8477 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
8478 constraints may need updating. */
8479 static Mnode *
8480 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
8481 HOST_WIDE_INT max_address)
8482 {
8483 /* The code below assumes these are different. */
8484 gcc_assert (mp != max_mp);
8485
8486 if (max_mp == NULL)
8487 {
8488 if (max_address < mp->max_address)
8489 mp->max_address = max_address;
8490 }
8491 else
8492 {
8493 if (max_address > max_mp->max_address - mp->fix_size)
8494 mp->max_address = max_mp->max_address - mp->fix_size;
8495 else
8496 mp->max_address = max_address;
8497
8498 /* Unlink MP from its current position. Since max_mp is non-null,
8499 mp->prev must be non-null. */
8500 mp->prev->next = mp->next;
8501 if (mp->next != NULL)
8502 mp->next->prev = mp->prev;
8503 else
8504 minipool_vector_tail = mp->prev;
8505
8506 /* Re-insert it before MAX_MP. */
8507 mp->next = max_mp;
8508 mp->prev = max_mp->prev;
8509 max_mp->prev = mp;
8510
8511 if (mp->prev != NULL)
8512 mp->prev->next = mp;
8513 else
8514 minipool_vector_head = mp;
8515 }
8516
8517 /* Save the new entry. */
8518 max_mp = mp;
8519
8520 /* Scan over the preceding entries and adjust their addresses as
8521 required. */
8522 while (mp->prev != NULL
8523 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
8524 {
8525 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
8526 mp = mp->prev;
8527 }
8528
8529 return max_mp;
8530 }
8531
8532 /* Add a constant to the minipool for a forward reference. Returns the
8533 node added or NULL if the constant will not fit in this pool. */
8534 static Mnode *
8535 add_minipool_forward_ref (Mfix *fix)
8536 {
8537 /* If set, max_mp is the first pool_entry that has a lower
8538 constraint than the one we are trying to add. */
8539 Mnode * max_mp = NULL;
8540 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
8541 Mnode * mp;
8542
8543 /* If the minipool starts before the end of FIX->INSN then this FIX
8544 can not be placed into the current pool. Furthermore, adding the
8545 new constant pool entry may cause the pool to start FIX_SIZE bytes
8546 earlier. */
8547 if (minipool_vector_head &&
8548 (fix->address + get_attr_length (fix->insn)
8549 >= minipool_vector_head->max_address - fix->fix_size))
8550 return NULL;
8551
8552 /* Scan the pool to see if a constant with the same value has
8553 already been added. While we are doing this, also note the
8554 location where we must insert the constant if it doesn't already
8555 exist. */
8556 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8557 {
8558 if (GET_CODE (fix->value) == GET_CODE (mp->value)
8559 && fix->mode == mp->mode
8560 && (GET_CODE (fix->value) != CODE_LABEL
8561 || (CODE_LABEL_NUMBER (fix->value)
8562 == CODE_LABEL_NUMBER (mp->value)))
8563 && rtx_equal_p (fix->value, mp->value))
8564 {
8565 /* More than one fix references this entry. */
8566 mp->refcount++;
8567 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
8568 }
8569
8570 /* Note the insertion point if necessary. */
8571 if (max_mp == NULL
8572 && mp->max_address > max_address)
8573 max_mp = mp;
8574
8575 /* If we are inserting an 8-bytes aligned quantity and
8576 we have not already found an insertion point, then
8577 make sure that all such 8-byte aligned quantities are
8578 placed at the start of the pool. */
8579 if (ARM_DOUBLEWORD_ALIGN
8580 && max_mp == NULL
8581 && fix->fix_size >= 8
8582 && mp->fix_size < 8)
8583 {
8584 max_mp = mp;
8585 max_address = mp->max_address;
8586 }
8587 }
8588
8589 /* The value is not currently in the minipool, so we need to create
8590 a new entry for it. If MAX_MP is NULL, the entry will be put on
8591 the end of the list since the placement is less constrained than
8592 any existing entry. Otherwise, we insert the new fix before
8593 MAX_MP and, if necessary, adjust the constraints on the other
8594 entries. */
8595 mp = XNEW (Mnode);
8596 mp->fix_size = fix->fix_size;
8597 mp->mode = fix->mode;
8598 mp->value = fix->value;
8599 mp->refcount = 1;
8600 /* Not yet required for a backwards ref. */
8601 mp->min_address = -65536;
8602
8603 if (max_mp == NULL)
8604 {
8605 mp->max_address = max_address;
8606 mp->next = NULL;
8607 mp->prev = minipool_vector_tail;
8608
8609 if (mp->prev == NULL)
8610 {
8611 minipool_vector_head = mp;
8612 minipool_vector_label = gen_label_rtx ();
8613 }
8614 else
8615 mp->prev->next = mp;
8616
8617 minipool_vector_tail = mp;
8618 }
8619 else
8620 {
8621 if (max_address > max_mp->max_address - mp->fix_size)
8622 mp->max_address = max_mp->max_address - mp->fix_size;
8623 else
8624 mp->max_address = max_address;
8625
8626 mp->next = max_mp;
8627 mp->prev = max_mp->prev;
8628 max_mp->prev = mp;
8629 if (mp->prev != NULL)
8630 mp->prev->next = mp;
8631 else
8632 minipool_vector_head = mp;
8633 }
8634
8635 /* Save the new entry. */
8636 max_mp = mp;
8637
8638 /* Scan over the preceding entries and adjust their addresses as
8639 required. */
8640 while (mp->prev != NULL
8641 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
8642 {
8643 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
8644 mp = mp->prev;
8645 }
8646
8647 return max_mp;
8648 }
8649
8650 static Mnode *
8651 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
8652 HOST_WIDE_INT min_address)
8653 {
8654 HOST_WIDE_INT offset;
8655
8656 /* The code below assumes these are different. */
8657 gcc_assert (mp != min_mp);
8658
8659 if (min_mp == NULL)
8660 {
8661 if (min_address > mp->min_address)
8662 mp->min_address = min_address;
8663 }
8664 else
8665 {
8666 /* We will adjust this below if it is too loose. */
8667 mp->min_address = min_address;
8668
8669 /* Unlink MP from its current position. Since min_mp is non-null,
8670 mp->next must be non-null. */
8671 mp->next->prev = mp->prev;
8672 if (mp->prev != NULL)
8673 mp->prev->next = mp->next;
8674 else
8675 minipool_vector_head = mp->next;
8676
8677 /* Reinsert it after MIN_MP. */
8678 mp->prev = min_mp;
8679 mp->next = min_mp->next;
8680 min_mp->next = mp;
8681 if (mp->next != NULL)
8682 mp->next->prev = mp;
8683 else
8684 minipool_vector_tail = mp;
8685 }
8686
8687 min_mp = mp;
8688
8689 offset = 0;
8690 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8691 {
8692 mp->offset = offset;
8693 if (mp->refcount > 0)
8694 offset += mp->fix_size;
8695
8696 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
8697 mp->next->min_address = mp->min_address + mp->fix_size;
8698 }
8699
8700 return min_mp;
8701 }
8702
8703 /* Add a constant to the minipool for a backward reference. Returns the
8704 node added or NULL if the constant will not fit in this pool.
8705
8706 Note that the code for insertion for a backwards reference can be
8707 somewhat confusing because the calculated offsets for each fix do
8708 not take into account the size of the pool (which is still under
8709 construction. */
8710 static Mnode *
8711 add_minipool_backward_ref (Mfix *fix)
8712 {
8713 /* If set, min_mp is the last pool_entry that has a lower constraint
8714 than the one we are trying to add. */
8715 Mnode *min_mp = NULL;
8716 /* This can be negative, since it is only a constraint. */
8717 HOST_WIDE_INT min_address = fix->address - fix->backwards;
8718 Mnode *mp;
8719
8720 /* If we can't reach the current pool from this insn, or if we can't
8721 insert this entry at the end of the pool without pushing other
8722 fixes out of range, then we don't try. This ensures that we
8723 can't fail later on. */
8724 if (min_address >= minipool_barrier->address
8725 || (minipool_vector_tail->min_address + fix->fix_size
8726 >= minipool_barrier->address))
8727 return NULL;
8728
8729 /* Scan the pool to see if a constant with the same value has
8730 already been added. While we are doing this, also note the
8731 location where we must insert the constant if it doesn't already
8732 exist. */
8733 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
8734 {
8735 if (GET_CODE (fix->value) == GET_CODE (mp->value)
8736 && fix->mode == mp->mode
8737 && (GET_CODE (fix->value) != CODE_LABEL
8738 || (CODE_LABEL_NUMBER (fix->value)
8739 == CODE_LABEL_NUMBER (mp->value)))
8740 && rtx_equal_p (fix->value, mp->value)
8741 /* Check that there is enough slack to move this entry to the
8742 end of the table (this is conservative). */
8743 && (mp->max_address
8744 > (minipool_barrier->address
8745 + minipool_vector_tail->offset
8746 + minipool_vector_tail->fix_size)))
8747 {
8748 mp->refcount++;
8749 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
8750 }
8751
8752 if (min_mp != NULL)
8753 mp->min_address += fix->fix_size;
8754 else
8755 {
8756 /* Note the insertion point if necessary. */
8757 if (mp->min_address < min_address)
8758 {
8759 /* For now, we do not allow the insertion of 8-byte alignment
8760 requiring nodes anywhere but at the start of the pool. */
8761 if (ARM_DOUBLEWORD_ALIGN
8762 && fix->fix_size >= 8 && mp->fix_size < 8)
8763 return NULL;
8764 else
8765 min_mp = mp;
8766 }
8767 else if (mp->max_address
8768 < minipool_barrier->address + mp->offset + fix->fix_size)
8769 {
8770 /* Inserting before this entry would push the fix beyond
8771 its maximum address (which can happen if we have
8772 re-located a forwards fix); force the new fix to come
8773 after it. */
8774 min_mp = mp;
8775 min_address = mp->min_address + fix->fix_size;
8776 }
8777 /* If we are inserting an 8-bytes aligned quantity and
8778 we have not already found an insertion point, then
8779 make sure that all such 8-byte aligned quantities are
8780 placed at the start of the pool. */
8781 else if (ARM_DOUBLEWORD_ALIGN
8782 && min_mp == NULL
8783 && fix->fix_size >= 8
8784 && mp->fix_size < 8)
8785 {
8786 min_mp = mp;
8787 min_address = mp->min_address + fix->fix_size;
8788 }
8789 }
8790 }
8791
8792 /* We need to create a new entry. */
8793 mp = XNEW (Mnode);
8794 mp->fix_size = fix->fix_size;
8795 mp->mode = fix->mode;
8796 mp->value = fix->value;
8797 mp->refcount = 1;
8798 mp->max_address = minipool_barrier->address + 65536;
8799
8800 mp->min_address = min_address;
8801
8802 if (min_mp == NULL)
8803 {
8804 mp->prev = NULL;
8805 mp->next = minipool_vector_head;
8806
8807 if (mp->next == NULL)
8808 {
8809 minipool_vector_tail = mp;
8810 minipool_vector_label = gen_label_rtx ();
8811 }
8812 else
8813 mp->next->prev = mp;
8814
8815 minipool_vector_head = mp;
8816 }
8817 else
8818 {
8819 mp->next = min_mp->next;
8820 mp->prev = min_mp;
8821 min_mp->next = mp;
8822
8823 if (mp->next != NULL)
8824 mp->next->prev = mp;
8825 else
8826 minipool_vector_tail = mp;
8827 }
8828
8829 /* Save the new entry. */
8830 min_mp = mp;
8831
8832 if (mp->prev)
8833 mp = mp->prev;
8834 else
8835 mp->offset = 0;
8836
8837 /* Scan over the following entries and adjust their offsets. */
8838 while (mp->next != NULL)
8839 {
8840 if (mp->next->min_address < mp->min_address + mp->fix_size)
8841 mp->next->min_address = mp->min_address + mp->fix_size;
8842
8843 if (mp->refcount)
8844 mp->next->offset = mp->offset + mp->fix_size;
8845 else
8846 mp->next->offset = mp->offset;
8847
8848 mp = mp->next;
8849 }
8850
8851 return min_mp;
8852 }
8853
8854 static void
8855 assign_minipool_offsets (Mfix *barrier)
8856 {
8857 HOST_WIDE_INT offset = 0;
8858 Mnode *mp;
8859
8860 minipool_barrier = barrier;
8861
8862 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8863 {
8864 mp->offset = offset;
8865
8866 if (mp->refcount > 0)
8867 offset += mp->fix_size;
8868 }
8869 }
8870
8871 /* Output the literal table */
8872 static void
8873 dump_minipool (rtx scan)
8874 {
8875 Mnode * mp;
8876 Mnode * nmp;
8877 int align64 = 0;
8878
8879 if (ARM_DOUBLEWORD_ALIGN)
8880 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8881 if (mp->refcount > 0 && mp->fix_size >= 8)
8882 {
8883 align64 = 1;
8884 break;
8885 }
8886
8887 if (dump_file)
8888 fprintf (dump_file,
8889 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
8890 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
8891
8892 scan = emit_label_after (gen_label_rtx (), scan);
8893 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
8894 scan = emit_label_after (minipool_vector_label, scan);
8895
8896 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
8897 {
8898 if (mp->refcount > 0)
8899 {
8900 if (dump_file)
8901 {
8902 fprintf (dump_file,
8903 ";; Offset %u, min %ld, max %ld ",
8904 (unsigned) mp->offset, (unsigned long) mp->min_address,
8905 (unsigned long) mp->max_address);
8906 arm_print_value (dump_file, mp->value);
8907 fputc ('\n', dump_file);
8908 }
8909
8910 switch (mp->fix_size)
8911 {
8912 #ifdef HAVE_consttable_1
8913 case 1:
8914 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
8915 break;
8916
8917 #endif
8918 #ifdef HAVE_consttable_2
8919 case 2:
8920 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
8921 break;
8922
8923 #endif
8924 #ifdef HAVE_consttable_4
8925 case 4:
8926 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
8927 break;
8928
8929 #endif
8930 #ifdef HAVE_consttable_8
8931 case 8:
8932 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
8933 break;
8934
8935 #endif
8936 #ifdef HAVE_consttable_16
8937 case 16:
8938 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
8939 break;
8940
8941 #endif
8942 default:
8943 gcc_unreachable ();
8944 }
8945 }
8946
8947 nmp = mp->next;
8948 free (mp);
8949 }
8950
8951 minipool_vector_head = minipool_vector_tail = NULL;
8952 scan = emit_insn_after (gen_consttable_end (), scan);
8953 scan = emit_barrier_after (scan);
8954 }
8955
8956 /* Return the cost of forcibly inserting a barrier after INSN. */
8957 static int
8958 arm_barrier_cost (rtx insn)
8959 {
8960 /* Basing the location of the pool on the loop depth is preferable,
8961 but at the moment, the basic block information seems to be
8962 corrupt by this stage of the compilation. */
8963 int base_cost = 50;
8964 rtx next = next_nonnote_insn (insn);
8965
8966 if (next != NULL && GET_CODE (next) == CODE_LABEL)
8967 base_cost -= 20;
8968
8969 switch (GET_CODE (insn))
8970 {
8971 case CODE_LABEL:
8972 /* It will always be better to place the table before the label, rather
8973 than after it. */
8974 return 50;
8975
8976 case INSN:
8977 case CALL_INSN:
8978 return base_cost;
8979
8980 case JUMP_INSN:
8981 return base_cost - 10;
8982
8983 default:
8984 return base_cost + 10;
8985 }
8986 }
8987
8988 /* Find the best place in the insn stream in the range
8989 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
8990 Create the barrier by inserting a jump and add a new fix entry for
8991 it. */
8992 static Mfix *
8993 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
8994 {
8995 HOST_WIDE_INT count = 0;
8996 rtx barrier;
8997 rtx from = fix->insn;
8998 /* The instruction after which we will insert the jump. */
8999 rtx selected = NULL;
9000 int selected_cost;
9001 /* The address at which the jump instruction will be placed. */
9002 HOST_WIDE_INT selected_address;
9003 Mfix * new_fix;
9004 HOST_WIDE_INT max_count = max_address - fix->address;
9005 rtx label = gen_label_rtx ();
9006
9007 selected_cost = arm_barrier_cost (from);
9008 selected_address = fix->address;
9009
9010 while (from && count < max_count)
9011 {
9012 rtx tmp;
9013 int new_cost;
9014
9015 /* This code shouldn't have been called if there was a natural barrier
9016 within range. */
9017 gcc_assert (GET_CODE (from) != BARRIER);
9018
9019 /* Count the length of this insn. */
9020 count += get_attr_length (from);
9021
9022 /* If there is a jump table, add its length. */
9023 tmp = is_jump_table (from);
9024 if (tmp != NULL)
9025 {
9026 count += get_jump_table_size (tmp);
9027
9028 /* Jump tables aren't in a basic block, so base the cost on
9029 the dispatch insn. If we select this location, we will
9030 still put the pool after the table. */
9031 new_cost = arm_barrier_cost (from);
9032
9033 if (count < max_count
9034 && (!selected || new_cost <= selected_cost))
9035 {
9036 selected = tmp;
9037 selected_cost = new_cost;
9038 selected_address = fix->address + count;
9039 }
9040
9041 /* Continue after the dispatch table. */
9042 from = NEXT_INSN (tmp);
9043 continue;
9044 }
9045
9046 new_cost = arm_barrier_cost (from);
9047
9048 if (count < max_count
9049 && (!selected || new_cost <= selected_cost))
9050 {
9051 selected = from;
9052 selected_cost = new_cost;
9053 selected_address = fix->address + count;
9054 }
9055
9056 from = NEXT_INSN (from);
9057 }
9058
9059 /* Make sure that we found a place to insert the jump. */
9060 gcc_assert (selected);
9061
9062 /* Create a new JUMP_INSN that branches around a barrier. */
9063 from = emit_jump_insn_after (gen_jump (label), selected);
9064 JUMP_LABEL (from) = label;
9065 barrier = emit_barrier_after (from);
9066 emit_label_after (label, barrier);
9067
9068 /* Create a minipool barrier entry for the new barrier. */
9069 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
9070 new_fix->insn = barrier;
9071 new_fix->address = selected_address;
9072 new_fix->next = fix->next;
9073 fix->next = new_fix;
9074
9075 return new_fix;
9076 }
9077
9078 /* Record that there is a natural barrier in the insn stream at
9079 ADDRESS. */
9080 static void
9081 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
9082 {
9083 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9084
9085 fix->insn = insn;
9086 fix->address = address;
9087
9088 fix->next = NULL;
9089 if (minipool_fix_head != NULL)
9090 minipool_fix_tail->next = fix;
9091 else
9092 minipool_fix_head = fix;
9093
9094 minipool_fix_tail = fix;
9095 }
9096
9097 /* Record INSN, which will need fixing up to load a value from the
9098 minipool. ADDRESS is the offset of the insn since the start of the
9099 function; LOC is a pointer to the part of the insn which requires
9100 fixing; VALUE is the constant that must be loaded, which is of type
9101 MODE. */
9102 static void
9103 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
9104 enum machine_mode mode, rtx value)
9105 {
9106 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9107
9108 #ifdef AOF_ASSEMBLER
9109 /* PIC symbol references need to be converted into offsets into the
9110 based area. */
9111 /* XXX This shouldn't be done here. */
9112 if (flag_pic && GET_CODE (value) == SYMBOL_REF)
9113 value = aof_pic_entry (value);
9114 #endif /* AOF_ASSEMBLER */
9115
9116 fix->insn = insn;
9117 fix->address = address;
9118 fix->loc = loc;
9119 fix->mode = mode;
9120 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
9121 fix->value = value;
9122 fix->forwards = get_attr_pool_range (insn);
9123 fix->backwards = get_attr_neg_pool_range (insn);
9124 fix->minipool = NULL;
9125
9126 /* If an insn doesn't have a range defined for it, then it isn't
9127 expecting to be reworked by this code. Better to stop now than
9128 to generate duff assembly code. */
9129 gcc_assert (fix->forwards || fix->backwards);
9130
9131 /* If an entry requires 8-byte alignment then assume all constant pools
9132 require 4 bytes of padding. Trying to do this later on a per-pool
9133 basis is awkward because existing pool entries have to be modified. */
9134 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
9135 minipool_pad = 4;
9136
9137 if (dump_file)
9138 {
9139 fprintf (dump_file,
9140 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
9141 GET_MODE_NAME (mode),
9142 INSN_UID (insn), (unsigned long) address,
9143 -1 * (long)fix->backwards, (long)fix->forwards);
9144 arm_print_value (dump_file, fix->value);
9145 fprintf (dump_file, "\n");
9146 }
9147
9148 /* Add it to the chain of fixes. */
9149 fix->next = NULL;
9150
9151 if (minipool_fix_head != NULL)
9152 minipool_fix_tail->next = fix;
9153 else
9154 minipool_fix_head = fix;
9155
9156 minipool_fix_tail = fix;
9157 }
9158
9159 /* Return the cost of synthesizing a 64-bit constant VAL inline.
9160 Returns the number of insns needed, or 99 if we don't know how to
9161 do it. */
9162 int
9163 arm_const_double_inline_cost (rtx val)
9164 {
9165 rtx lowpart, highpart;
9166 enum machine_mode mode;
9167
9168 mode = GET_MODE (val);
9169
9170 if (mode == VOIDmode)
9171 mode = DImode;
9172
9173 gcc_assert (GET_MODE_SIZE (mode) == 8);
9174
9175 lowpart = gen_lowpart (SImode, val);
9176 highpart = gen_highpart_mode (SImode, mode, val);
9177
9178 gcc_assert (GET_CODE (lowpart) == CONST_INT);
9179 gcc_assert (GET_CODE (highpart) == CONST_INT);
9180
9181 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
9182 NULL_RTX, NULL_RTX, 0, 0)
9183 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
9184 NULL_RTX, NULL_RTX, 0, 0));
9185 }
9186
9187 /* Return true if it is worthwhile to split a 64-bit constant into two
9188 32-bit operations. This is the case if optimizing for size, or
9189 if we have load delay slots, or if one 32-bit part can be done with
9190 a single data operation. */
9191 bool
9192 arm_const_double_by_parts (rtx val)
9193 {
9194 enum machine_mode mode = GET_MODE (val);
9195 rtx part;
9196
9197 if (optimize_size || arm_ld_sched)
9198 return true;
9199
9200 if (mode == VOIDmode)
9201 mode = DImode;
9202
9203 part = gen_highpart_mode (SImode, mode, val);
9204
9205 gcc_assert (GET_CODE (part) == CONST_INT);
9206
9207 if (const_ok_for_arm (INTVAL (part))
9208 || const_ok_for_arm (~INTVAL (part)))
9209 return true;
9210
9211 part = gen_lowpart (SImode, val);
9212
9213 gcc_assert (GET_CODE (part) == CONST_INT);
9214
9215 if (const_ok_for_arm (INTVAL (part))
9216 || const_ok_for_arm (~INTVAL (part)))
9217 return true;
9218
9219 return false;
9220 }
9221
9222 /* Scan INSN and note any of its operands that need fixing.
9223 If DO_PUSHES is false we do not actually push any of the fixups
9224 needed. The function returns TRUE if any fixups were needed/pushed.
9225 This is used by arm_memory_load_p() which needs to know about loads
9226 of constants that will be converted into minipool loads. */
9227 static bool
9228 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
9229 {
9230 bool result = false;
9231 int opno;
9232
9233 extract_insn (insn);
9234
9235 if (!constrain_operands (1))
9236 fatal_insn_not_found (insn);
9237
9238 if (recog_data.n_alternatives == 0)
9239 return false;
9240
9241 /* Fill in recog_op_alt with information about the constraints of
9242 this insn. */
9243 preprocess_constraints ();
9244
9245 for (opno = 0; opno < recog_data.n_operands; opno++)
9246 {
9247 /* Things we need to fix can only occur in inputs. */
9248 if (recog_data.operand_type[opno] != OP_IN)
9249 continue;
9250
9251 /* If this alternative is a memory reference, then any mention
9252 of constants in this alternative is really to fool reload
9253 into allowing us to accept one there. We need to fix them up
9254 now so that we output the right code. */
9255 if (recog_op_alt[opno][which_alternative].memory_ok)
9256 {
9257 rtx op = recog_data.operand[opno];
9258
9259 if (CONSTANT_P (op))
9260 {
9261 if (do_pushes)
9262 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
9263 recog_data.operand_mode[opno], op);
9264 result = true;
9265 }
9266 else if (GET_CODE (op) == MEM
9267 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
9268 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
9269 {
9270 if (do_pushes)
9271 {
9272 rtx cop = avoid_constant_pool_reference (op);
9273
9274 /* Casting the address of something to a mode narrower
9275 than a word can cause avoid_constant_pool_reference()
9276 to return the pool reference itself. That's no good to
9277 us here. Lets just hope that we can use the
9278 constant pool value directly. */
9279 if (op == cop)
9280 cop = get_pool_constant (XEXP (op, 0));
9281
9282 push_minipool_fix (insn, address,
9283 recog_data.operand_loc[opno],
9284 recog_data.operand_mode[opno], cop);
9285 }
9286
9287 result = true;
9288 }
9289 }
9290 }
9291
9292 return result;
9293 }
9294
9295 /* Gcc puts the pool in the wrong place for ARM, since we can only
9296 load addresses a limited distance around the pc. We do some
9297 special munging to move the constant pool values to the correct
9298 point in the code. */
9299 static void
9300 arm_reorg (void)
9301 {
9302 rtx insn;
9303 HOST_WIDE_INT address = 0;
9304 Mfix * fix;
9305
9306 minipool_fix_head = minipool_fix_tail = NULL;
9307
9308 /* The first insn must always be a note, or the code below won't
9309 scan it properly. */
9310 insn = get_insns ();
9311 gcc_assert (GET_CODE (insn) == NOTE);
9312 minipool_pad = 0;
9313
9314 /* Scan all the insns and record the operands that will need fixing. */
9315 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
9316 {
9317 if (TARGET_CIRRUS_FIX_INVALID_INSNS
9318 && (arm_cirrus_insn_p (insn)
9319 || GET_CODE (insn) == JUMP_INSN
9320 || arm_memory_load_p (insn)))
9321 cirrus_reorg (insn);
9322
9323 if (GET_CODE (insn) == BARRIER)
9324 push_minipool_barrier (insn, address);
9325 else if (INSN_P (insn))
9326 {
9327 rtx table;
9328
9329 note_invalid_constants (insn, address, true);
9330 address += get_attr_length (insn);
9331
9332 /* If the insn is a vector jump, add the size of the table
9333 and skip the table. */
9334 if ((table = is_jump_table (insn)) != NULL)
9335 {
9336 address += get_jump_table_size (table);
9337 insn = table;
9338 }
9339 }
9340 }
9341
9342 fix = minipool_fix_head;
9343
9344 /* Now scan the fixups and perform the required changes. */
9345 while (fix)
9346 {
9347 Mfix * ftmp;
9348 Mfix * fdel;
9349 Mfix * last_added_fix;
9350 Mfix * last_barrier = NULL;
9351 Mfix * this_fix;
9352
9353 /* Skip any further barriers before the next fix. */
9354 while (fix && GET_CODE (fix->insn) == BARRIER)
9355 fix = fix->next;
9356
9357 /* No more fixes. */
9358 if (fix == NULL)
9359 break;
9360
9361 last_added_fix = NULL;
9362
9363 for (ftmp = fix; ftmp; ftmp = ftmp->next)
9364 {
9365 if (GET_CODE (ftmp->insn) == BARRIER)
9366 {
9367 if (ftmp->address >= minipool_vector_head->max_address)
9368 break;
9369
9370 last_barrier = ftmp;
9371 }
9372 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
9373 break;
9374
9375 last_added_fix = ftmp; /* Keep track of the last fix added. */
9376 }
9377
9378 /* If we found a barrier, drop back to that; any fixes that we
9379 could have reached but come after the barrier will now go in
9380 the next mini-pool. */
9381 if (last_barrier != NULL)
9382 {
9383 /* Reduce the refcount for those fixes that won't go into this
9384 pool after all. */
9385 for (fdel = last_barrier->next;
9386 fdel && fdel != ftmp;
9387 fdel = fdel->next)
9388 {
9389 fdel->minipool->refcount--;
9390 fdel->minipool = NULL;
9391 }
9392
9393 ftmp = last_barrier;
9394 }
9395 else
9396 {
9397 /* ftmp is first fix that we can't fit into this pool and
9398 there no natural barriers that we could use. Insert a
9399 new barrier in the code somewhere between the previous
9400 fix and this one, and arrange to jump around it. */
9401 HOST_WIDE_INT max_address;
9402
9403 /* The last item on the list of fixes must be a barrier, so
9404 we can never run off the end of the list of fixes without
9405 last_barrier being set. */
9406 gcc_assert (ftmp);
9407
9408 max_address = minipool_vector_head->max_address;
9409 /* Check that there isn't another fix that is in range that
9410 we couldn't fit into this pool because the pool was
9411 already too large: we need to put the pool before such an
9412 instruction. The pool itself may come just after the
9413 fix because create_fix_barrier also allows space for a
9414 jump instruction. */
9415 if (ftmp->address < max_address)
9416 max_address = ftmp->address + 1;
9417
9418 last_barrier = create_fix_barrier (last_added_fix, max_address);
9419 }
9420
9421 assign_minipool_offsets (last_barrier);
9422
9423 while (ftmp)
9424 {
9425 if (GET_CODE (ftmp->insn) != BARRIER
9426 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
9427 == NULL))
9428 break;
9429
9430 ftmp = ftmp->next;
9431 }
9432
9433 /* Scan over the fixes we have identified for this pool, fixing them
9434 up and adding the constants to the pool itself. */
9435 for (this_fix = fix; this_fix && ftmp != this_fix;
9436 this_fix = this_fix->next)
9437 if (GET_CODE (this_fix->insn) != BARRIER)
9438 {
9439 rtx addr
9440 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
9441 minipool_vector_label),
9442 this_fix->minipool->offset);
9443 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
9444 }
9445
9446 dump_minipool (last_barrier->insn);
9447 fix = ftmp;
9448 }
9449
9450 /* From now on we must synthesize any constants that we can't handle
9451 directly. This can happen if the RTL gets split during final
9452 instruction generation. */
9453 after_arm_reorg = 1;
9454
9455 /* Free the minipool memory. */
9456 obstack_free (&minipool_obstack, minipool_startobj);
9457 }
9458 \f
9459 /* Routines to output assembly language. */
9460
9461 /* If the rtx is the correct value then return the string of the number.
9462 In this way we can ensure that valid double constants are generated even
9463 when cross compiling. */
9464 const char *
9465 fp_immediate_constant (rtx x)
9466 {
9467 REAL_VALUE_TYPE r;
9468 int i;
9469
9470 if (!fp_consts_inited)
9471 init_fp_table ();
9472
9473 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9474 for (i = 0; i < 8; i++)
9475 if (REAL_VALUES_EQUAL (r, values_fp[i]))
9476 return strings_fp[i];
9477
9478 gcc_unreachable ();
9479 }
9480
9481 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
9482 static const char *
9483 fp_const_from_val (REAL_VALUE_TYPE *r)
9484 {
9485 int i;
9486
9487 if (!fp_consts_inited)
9488 init_fp_table ();
9489
9490 for (i = 0; i < 8; i++)
9491 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
9492 return strings_fp[i];
9493
9494 gcc_unreachable ();
9495 }
9496
9497 /* Output the operands of a LDM/STM instruction to STREAM.
9498 MASK is the ARM register set mask of which only bits 0-15 are important.
9499 REG is the base register, either the frame pointer or the stack pointer,
9500 INSTR is the possibly suffixed load or store instruction.
9501 RFE is nonzero if the instruction should also copy spsr to cpsr. */
9502
9503 static void
9504 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
9505 unsigned long mask, int rfe)
9506 {
9507 unsigned i;
9508 bool not_first = FALSE;
9509
9510 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
9511 fputc ('\t', stream);
9512 asm_fprintf (stream, instr, reg);
9513 fputc ('{', stream);
9514
9515 for (i = 0; i <= LAST_ARM_REGNUM; i++)
9516 if (mask & (1 << i))
9517 {
9518 if (not_first)
9519 fprintf (stream, ", ");
9520
9521 asm_fprintf (stream, "%r", i);
9522 not_first = TRUE;
9523 }
9524
9525 if (rfe)
9526 fprintf (stream, "}^\n");
9527 else
9528 fprintf (stream, "}\n");
9529 }
9530
9531
9532 /* Output a FLDMD instruction to STREAM.
9533 BASE if the register containing the address.
9534 REG and COUNT specify the register range.
9535 Extra registers may be added to avoid hardware bugs.
9536
9537 We output FLDMD even for ARMv5 VFP implementations. Although
9538 FLDMD is technically not supported until ARMv6, it is believed
9539 that all VFP implementations support its use in this context. */
9540
9541 static void
9542 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
9543 {
9544 int i;
9545
9546 /* Workaround ARM10 VFPr1 bug. */
9547 if (count == 2 && !arm_arch6)
9548 {
9549 if (reg == 15)
9550 reg--;
9551 count++;
9552 }
9553
9554 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
9555 load into multiple parts if we have to handle more than 16 registers. */
9556 if (count > 16)
9557 {
9558 vfp_output_fldmd (stream, base, reg, 16);
9559 vfp_output_fldmd (stream, base, reg + 16, count - 16);
9560 return;
9561 }
9562
9563 fputc ('\t', stream);
9564 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
9565
9566 for (i = reg; i < reg + count; i++)
9567 {
9568 if (i > reg)
9569 fputs (", ", stream);
9570 asm_fprintf (stream, "d%d", i);
9571 }
9572 fputs ("}\n", stream);
9573
9574 }
9575
9576
9577 /* Output the assembly for a store multiple. */
9578
9579 const char *
9580 vfp_output_fstmd (rtx * operands)
9581 {
9582 char pattern[100];
9583 int p;
9584 int base;
9585 int i;
9586
9587 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
9588 p = strlen (pattern);
9589
9590 gcc_assert (GET_CODE (operands[1]) == REG);
9591
9592 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
9593 for (i = 1; i < XVECLEN (operands[2], 0); i++)
9594 {
9595 p += sprintf (&pattern[p], ", d%d", base + i);
9596 }
9597 strcpy (&pattern[p], "}");
9598
9599 output_asm_insn (pattern, operands);
9600 return "";
9601 }
9602
9603
9604 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
9605 number of bytes pushed. */
9606
9607 static int
9608 vfp_emit_fstmd (int base_reg, int count)
9609 {
9610 rtx par;
9611 rtx dwarf;
9612 rtx tmp, reg;
9613 int i;
9614
9615 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
9616 register pairs are stored by a store multiple insn. We avoid this
9617 by pushing an extra pair. */
9618 if (count == 2 && !arm_arch6)
9619 {
9620 if (base_reg == LAST_VFP_REGNUM - 3)
9621 base_reg -= 2;
9622 count++;
9623 }
9624
9625 /* FSTMD may not store more than 16 doubleword registers at once. Split
9626 larger stores into multiple parts (up to a maximum of two, in
9627 practice). */
9628 if (count > 16)
9629 {
9630 int saved;
9631 /* NOTE: base_reg is an internal register number, so each D register
9632 counts as 2. */
9633 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
9634 saved += vfp_emit_fstmd (base_reg, 16);
9635 return saved;
9636 }
9637
9638 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
9639 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
9640
9641 reg = gen_rtx_REG (DFmode, base_reg);
9642 base_reg += 2;
9643
9644 XVECEXP (par, 0, 0)
9645 = gen_rtx_SET (VOIDmode,
9646 gen_frame_mem (BLKmode,
9647 gen_rtx_PRE_DEC (BLKmode,
9648 stack_pointer_rtx)),
9649 gen_rtx_UNSPEC (BLKmode,
9650 gen_rtvec (1, reg),
9651 UNSPEC_PUSH_MULT));
9652
9653 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9654 plus_constant (stack_pointer_rtx, -(count * 8)));
9655 RTX_FRAME_RELATED_P (tmp) = 1;
9656 XVECEXP (dwarf, 0, 0) = tmp;
9657
9658 tmp = gen_rtx_SET (VOIDmode,
9659 gen_frame_mem (DFmode, stack_pointer_rtx),
9660 reg);
9661 RTX_FRAME_RELATED_P (tmp) = 1;
9662 XVECEXP (dwarf, 0, 1) = tmp;
9663
9664 for (i = 1; i < count; i++)
9665 {
9666 reg = gen_rtx_REG (DFmode, base_reg);
9667 base_reg += 2;
9668 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
9669
9670 tmp = gen_rtx_SET (VOIDmode,
9671 gen_frame_mem (DFmode,
9672 plus_constant (stack_pointer_rtx,
9673 i * 8)),
9674 reg);
9675 RTX_FRAME_RELATED_P (tmp) = 1;
9676 XVECEXP (dwarf, 0, i + 1) = tmp;
9677 }
9678
9679 par = emit_insn (par);
9680 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
9681 REG_NOTES (par));
9682 RTX_FRAME_RELATED_P (par) = 1;
9683
9684 return count * 8;
9685 }
9686
9687 /* Emit a call instruction with pattern PAT. ADDR is the address of
9688 the call target. */
9689
9690 void
9691 arm_emit_call_insn (rtx pat, rtx addr)
9692 {
9693 rtx insn;
9694
9695 insn = emit_call_insn (pat);
9696
9697 /* The PIC register is live on entry to VxWorks PIC PLT entries.
9698 If the call might use such an entry, add a use of the PIC register
9699 to the instruction's CALL_INSN_FUNCTION_USAGE. */
9700 if (TARGET_VXWORKS_RTP
9701 && flag_pic
9702 && GET_CODE (addr) == SYMBOL_REF
9703 && (SYMBOL_REF_DECL (addr)
9704 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
9705 : !SYMBOL_REF_LOCAL_P (addr)))
9706 {
9707 require_pic_register ();
9708 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
9709 }
9710 }
9711
9712 /* Output a 'call' insn. */
9713 const char *
9714 output_call (rtx *operands)
9715 {
9716 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
9717
9718 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
9719 if (REGNO (operands[0]) == LR_REGNUM)
9720 {
9721 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
9722 output_asm_insn ("mov%?\t%0, %|lr", operands);
9723 }
9724
9725 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9726
9727 if (TARGET_INTERWORK || arm_arch4t)
9728 output_asm_insn ("bx%?\t%0", operands);
9729 else
9730 output_asm_insn ("mov%?\t%|pc, %0", operands);
9731
9732 return "";
9733 }
9734
9735 /* Output a 'call' insn that is a reference in memory. */
9736 const char *
9737 output_call_mem (rtx *operands)
9738 {
9739 if (TARGET_INTERWORK && !arm_arch5)
9740 {
9741 output_asm_insn ("ldr%?\t%|ip, %0", operands);
9742 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9743 output_asm_insn ("bx%?\t%|ip", operands);
9744 }
9745 else if (regno_use_in (LR_REGNUM, operands[0]))
9746 {
9747 /* LR is used in the memory address. We load the address in the
9748 first instruction. It's safe to use IP as the target of the
9749 load since the call will kill it anyway. */
9750 output_asm_insn ("ldr%?\t%|ip, %0", operands);
9751 if (arm_arch5)
9752 output_asm_insn ("blx%?\t%|ip", operands);
9753 else
9754 {
9755 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9756 if (arm_arch4t)
9757 output_asm_insn ("bx%?\t%|ip", operands);
9758 else
9759 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
9760 }
9761 }
9762 else
9763 {
9764 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9765 output_asm_insn ("ldr%?\t%|pc, %0", operands);
9766 }
9767
9768 return "";
9769 }
9770
9771
9772 /* Output a move from arm registers to an fpa registers.
9773 OPERANDS[0] is an fpa register.
9774 OPERANDS[1] is the first registers of an arm register pair. */
9775 const char *
9776 output_mov_long_double_fpa_from_arm (rtx *operands)
9777 {
9778 int arm_reg0 = REGNO (operands[1]);
9779 rtx ops[3];
9780
9781 gcc_assert (arm_reg0 != IP_REGNUM);
9782
9783 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9784 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9785 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
9786
9787 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
9788 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
9789
9790 return "";
9791 }
9792
9793 /* Output a move from an fpa register to arm registers.
9794 OPERANDS[0] is the first registers of an arm register pair.
9795 OPERANDS[1] is an fpa register. */
9796 const char *
9797 output_mov_long_double_arm_from_fpa (rtx *operands)
9798 {
9799 int arm_reg0 = REGNO (operands[0]);
9800 rtx ops[3];
9801
9802 gcc_assert (arm_reg0 != IP_REGNUM);
9803
9804 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9805 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9806 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
9807
9808 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
9809 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
9810 return "";
9811 }
9812
9813 /* Output a move from arm registers to arm registers of a long double
9814 OPERANDS[0] is the destination.
9815 OPERANDS[1] is the source. */
9816 const char *
9817 output_mov_long_double_arm_from_arm (rtx *operands)
9818 {
9819 /* We have to be careful here because the two might overlap. */
9820 int dest_start = REGNO (operands[0]);
9821 int src_start = REGNO (operands[1]);
9822 rtx ops[2];
9823 int i;
9824
9825 if (dest_start < src_start)
9826 {
9827 for (i = 0; i < 3; i++)
9828 {
9829 ops[0] = gen_rtx_REG (SImode, dest_start + i);
9830 ops[1] = gen_rtx_REG (SImode, src_start + i);
9831 output_asm_insn ("mov%?\t%0, %1", ops);
9832 }
9833 }
9834 else
9835 {
9836 for (i = 2; i >= 0; i--)
9837 {
9838 ops[0] = gen_rtx_REG (SImode, dest_start + i);
9839 ops[1] = gen_rtx_REG (SImode, src_start + i);
9840 output_asm_insn ("mov%?\t%0, %1", ops);
9841 }
9842 }
9843
9844 return "";
9845 }
9846
9847
9848 /* Output a move from arm registers to an fpa registers.
9849 OPERANDS[0] is an fpa register.
9850 OPERANDS[1] is the first registers of an arm register pair. */
9851 const char *
9852 output_mov_double_fpa_from_arm (rtx *operands)
9853 {
9854 int arm_reg0 = REGNO (operands[1]);
9855 rtx ops[2];
9856
9857 gcc_assert (arm_reg0 != IP_REGNUM);
9858
9859 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9860 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9861 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
9862 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
9863 return "";
9864 }
9865
9866 /* Output a move from an fpa register to arm registers.
9867 OPERANDS[0] is the first registers of an arm register pair.
9868 OPERANDS[1] is an fpa register. */
9869 const char *
9870 output_mov_double_arm_from_fpa (rtx *operands)
9871 {
9872 int arm_reg0 = REGNO (operands[0]);
9873 rtx ops[2];
9874
9875 gcc_assert (arm_reg0 != IP_REGNUM);
9876
9877 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9878 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9879 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
9880 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
9881 return "";
9882 }
9883
9884 /* Output a move between double words.
9885 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
9886 or MEM<-REG and all MEMs must be offsettable addresses. */
9887 const char *
9888 output_move_double (rtx *operands)
9889 {
9890 enum rtx_code code0 = GET_CODE (operands[0]);
9891 enum rtx_code code1 = GET_CODE (operands[1]);
9892 rtx otherops[3];
9893
9894 if (code0 == REG)
9895 {
9896 int reg0 = REGNO (operands[0]);
9897
9898 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9899
9900 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
9901
9902 switch (GET_CODE (XEXP (operands[1], 0)))
9903 {
9904 case REG:
9905 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
9906 break;
9907
9908 case PRE_INC:
9909 gcc_assert (TARGET_LDRD);
9910 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
9911 break;
9912
9913 case PRE_DEC:
9914 if (TARGET_LDRD)
9915 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
9916 else
9917 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
9918 break;
9919
9920 case POST_INC:
9921 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
9922 break;
9923
9924 case POST_DEC:
9925 gcc_assert (TARGET_LDRD);
9926 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
9927 break;
9928
9929 case PRE_MODIFY:
9930 case POST_MODIFY:
9931 otherops[0] = operands[0];
9932 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
9933 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
9934
9935 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
9936 {
9937 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
9938 {
9939 /* Registers overlap so split out the increment. */
9940 output_asm_insn ("add%?\t%1, %1, %2", otherops);
9941 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
9942 }
9943 else
9944 {
9945 /* IWMMXT allows offsets larger than ldrd can handle,
9946 fix these up with a pair of ldr. */
9947 if (GET_CODE (otherops[2]) == CONST_INT
9948 && (INTVAL(otherops[2]) <= -256
9949 || INTVAL(otherops[2]) >= 256))
9950 {
9951 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
9952 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9953 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
9954 }
9955 else
9956 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
9957 }
9958 }
9959 else
9960 {
9961 /* IWMMXT allows offsets larger than ldrd can handle,
9962 fix these up with a pair of ldr. */
9963 if (GET_CODE (otherops[2]) == CONST_INT
9964 && (INTVAL(otherops[2]) <= -256
9965 || INTVAL(otherops[2]) >= 256))
9966 {
9967 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9968 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
9969 otherops[0] = operands[0];
9970 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
9971 }
9972 else
9973 /* We only allow constant increments, so this is safe. */
9974 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
9975 }
9976 break;
9977
9978 case LABEL_REF:
9979 case CONST:
9980 output_asm_insn ("adr%?\t%0, %1", operands);
9981 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
9982 break;
9983
9984 /* ??? This needs checking for thumb2. */
9985 default:
9986 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
9987 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
9988 {
9989 otherops[0] = operands[0];
9990 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
9991 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
9992
9993 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
9994 {
9995 if (GET_CODE (otherops[2]) == CONST_INT)
9996 {
9997 switch ((int) INTVAL (otherops[2]))
9998 {
9999 case -8:
10000 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
10001 return "";
10002 case -4:
10003 if (TARGET_THUMB2)
10004 break;
10005 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
10006 return "";
10007 case 4:
10008 if (TARGET_THUMB2)
10009 break;
10010 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
10011 return "";
10012 }
10013 }
10014 if (TARGET_LDRD
10015 && (GET_CODE (otherops[2]) == REG
10016 || (GET_CODE (otherops[2]) == CONST_INT
10017 && INTVAL (otherops[2]) > -256
10018 && INTVAL (otherops[2]) < 256)))
10019 {
10020 if (reg_overlap_mentioned_p (otherops[0],
10021 otherops[2]))
10022 {
10023 /* Swap base and index registers over to
10024 avoid a conflict. */
10025 otherops[1] = XEXP (XEXP (operands[1], 0), 1);
10026 otherops[2] = XEXP (XEXP (operands[1], 0), 0);
10027 }
10028 /* If both registers conflict, it will usually
10029 have been fixed by a splitter. */
10030 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
10031 {
10032 output_asm_insn ("add%?\t%1, %1, %2", otherops);
10033 output_asm_insn ("ldr%(d%)\t%0, [%1]",
10034 otherops);
10035 }
10036 else
10037 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
10038 return "";
10039 }
10040
10041 if (GET_CODE (otherops[2]) == CONST_INT)
10042 {
10043 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
10044 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
10045 else
10046 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10047 }
10048 else
10049 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10050 }
10051 else
10052 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
10053
10054 return "ldm%(ia%)\t%0, %M0";
10055 }
10056 else
10057 {
10058 otherops[1] = adjust_address (operands[1], SImode, 4);
10059 /* Take care of overlapping base/data reg. */
10060 if (reg_mentioned_p (operands[0], operands[1]))
10061 {
10062 output_asm_insn ("ldr%?\t%0, %1", otherops);
10063 output_asm_insn ("ldr%?\t%0, %1", operands);
10064 }
10065 else
10066 {
10067 output_asm_insn ("ldr%?\t%0, %1", operands);
10068 output_asm_insn ("ldr%?\t%0, %1", otherops);
10069 }
10070 }
10071 }
10072 }
10073 else
10074 {
10075 /* Constraints should ensure this. */
10076 gcc_assert (code0 == MEM && code1 == REG);
10077 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
10078
10079 switch (GET_CODE (XEXP (operands[0], 0)))
10080 {
10081 case REG:
10082 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10083 break;
10084
10085 case PRE_INC:
10086 gcc_assert (TARGET_LDRD);
10087 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
10088 break;
10089
10090 case PRE_DEC:
10091 if (TARGET_LDRD)
10092 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
10093 else
10094 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
10095 break;
10096
10097 case POST_INC:
10098 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
10099 break;
10100
10101 case POST_DEC:
10102 gcc_assert (TARGET_LDRD);
10103 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
10104 break;
10105
10106 case PRE_MODIFY:
10107 case POST_MODIFY:
10108 otherops[0] = operands[1];
10109 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
10110 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
10111
10112 /* IWMMXT allows offsets larger than ldrd can handle,
10113 fix these up with a pair of ldr. */
10114 if (GET_CODE (otherops[2]) == CONST_INT
10115 && (INTVAL(otherops[2]) <= -256
10116 || INTVAL(otherops[2]) >= 256))
10117 {
10118 rtx reg1;
10119 reg1 = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
10120 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10121 {
10122 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
10123 otherops[0] = reg1;
10124 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
10125 }
10126 else
10127 {
10128 otherops[0] = reg1;
10129 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
10130 otherops[0] = operands[1];
10131 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
10132 }
10133 }
10134 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10135 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
10136 else
10137 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
10138 break;
10139
10140 case PLUS:
10141 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
10142 if (GET_CODE (otherops[2]) == CONST_INT)
10143 {
10144 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
10145 {
10146 case -8:
10147 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
10148 return "";
10149
10150 case -4:
10151 if (TARGET_THUMB2)
10152 break;
10153 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
10154 return "";
10155
10156 case 4:
10157 if (TARGET_THUMB2)
10158 break;
10159 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
10160 return "";
10161 }
10162 }
10163 if (TARGET_LDRD
10164 && (GET_CODE (otherops[2]) == REG
10165 || (GET_CODE (otherops[2]) == CONST_INT
10166 && INTVAL (otherops[2]) > -256
10167 && INTVAL (otherops[2]) < 256)))
10168 {
10169 otherops[0] = operands[1];
10170 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
10171 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
10172 return "";
10173 }
10174 /* Fall through */
10175
10176 default:
10177 otherops[0] = adjust_address (operands[0], SImode, 4);
10178 otherops[1] = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
10179 output_asm_insn ("str%?\t%1, %0", operands);
10180 output_asm_insn ("str%?\t%1, %0", otherops);
10181 }
10182 }
10183
10184 return "";
10185 }
10186
10187 /* Output a move, load or store for quad-word vectors in ARM registers. Only
10188 handles MEMs accepted by neon_vector_mem_operand with CORE=true. */
10189
10190 const char *
10191 output_move_quad (rtx *operands)
10192 {
10193 if (REG_P (operands[0]))
10194 {
10195 /* Load, or reg->reg move. */
10196
10197 if (MEM_P (operands[1]))
10198 {
10199 switch (GET_CODE (XEXP (operands[1], 0)))
10200 {
10201 case REG:
10202 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
10203 break;
10204
10205 case LABEL_REF:
10206 case CONST:
10207 output_asm_insn ("adr%?\t%0, %1", operands);
10208 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
10209 break;
10210
10211 default:
10212 gcc_unreachable ();
10213 }
10214 }
10215 else
10216 {
10217 rtx ops[2];
10218 int dest, src, i;
10219
10220 gcc_assert (REG_P (operands[1]));
10221
10222 dest = REGNO (operands[0]);
10223 src = REGNO (operands[1]);
10224
10225 /* This seems pretty dumb, but hopefully GCC won't try to do it
10226 very often. */
10227 if (dest < src)
10228 for (i = 0; i < 4; i++)
10229 {
10230 ops[0] = gen_rtx_REG (SImode, dest + i);
10231 ops[1] = gen_rtx_REG (SImode, src + i);
10232 output_asm_insn ("mov%?\t%0, %1", ops);
10233 }
10234 else
10235 for (i = 3; i >= 0; i--)
10236 {
10237 ops[0] = gen_rtx_REG (SImode, dest + i);
10238 ops[1] = gen_rtx_REG (SImode, src + i);
10239 output_asm_insn ("mov%?\t%0, %1", ops);
10240 }
10241 }
10242 }
10243 else
10244 {
10245 gcc_assert (MEM_P (operands[0]));
10246 gcc_assert (REG_P (operands[1]));
10247 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
10248
10249 switch (GET_CODE (XEXP (operands[0], 0)))
10250 {
10251 case REG:
10252 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10253 break;
10254
10255 default:
10256 gcc_unreachable ();
10257 }
10258 }
10259
10260 return "";
10261 }
10262
10263 /* Output a VFP load or store instruction. */
10264
10265 const char *
10266 output_move_vfp (rtx *operands)
10267 {
10268 rtx reg, mem, addr, ops[2];
10269 int load = REG_P (operands[0]);
10270 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
10271 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
10272 const char *template;
10273 char buff[50];
10274 enum machine_mode mode;
10275
10276 reg = operands[!load];
10277 mem = operands[load];
10278
10279 mode = GET_MODE (reg);
10280
10281 gcc_assert (REG_P (reg));
10282 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
10283 gcc_assert (mode == SFmode
10284 || mode == DFmode
10285 || mode == SImode
10286 || mode == DImode
10287 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
10288 gcc_assert (MEM_P (mem));
10289
10290 addr = XEXP (mem, 0);
10291
10292 switch (GET_CODE (addr))
10293 {
10294 case PRE_DEC:
10295 template = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
10296 ops[0] = XEXP (addr, 0);
10297 ops[1] = reg;
10298 break;
10299
10300 case POST_INC:
10301 template = "f%smia%c%%?\t%%0!, {%%%s1}%s";
10302 ops[0] = XEXP (addr, 0);
10303 ops[1] = reg;
10304 break;
10305
10306 default:
10307 template = "f%s%c%%?\t%%%s0, %%1%s";
10308 ops[0] = reg;
10309 ops[1] = mem;
10310 break;
10311 }
10312
10313 sprintf (buff, template,
10314 load ? "ld" : "st",
10315 dp ? 'd' : 's',
10316 dp ? "P" : "",
10317 integer_p ? "\t%@ int" : "");
10318 output_asm_insn (buff, ops);
10319
10320 return "";
10321 }
10322
10323 /* Output a Neon quad-word load or store, or a load or store for
10324 larger structure modes. We could also support post-modify forms using
10325 VLD1/VST1 (for the vectorizer, and perhaps otherwise), but we don't do that
10326 yet.
10327 WARNING: The ordering of elements in memory is weird in big-endian mode,
10328 because we use VSTM instead of VST1, to make it easy to make vector stores
10329 via ARM registers write values in the same order as stores direct from Neon
10330 registers. For example, the byte ordering of a quadword vector with 16-byte
10331 elements like this:
10332
10333 [e7:e6:e5:e4:e3:e2:e1:e0] (highest-numbered element first)
10334
10335 will be (with lowest address first, h = most-significant byte,
10336 l = least-significant byte of element):
10337
10338 [e3h, e3l, e2h, e2l, e1h, e1l, e0h, e0l,
10339 e7h, e7l, e6h, e6l, e5h, e5l, e4h, e4l]
10340
10341 When necessary, quadword registers (dN, dN+1) are moved to ARM registers from
10342 rN in the order:
10343
10344 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
10345
10346 So that STM/LDM can be used on vectors in ARM registers, and the same memory
10347 layout will result as if VSTM/VLDM were used. */
10348
10349 const char *
10350 output_move_neon (rtx *operands)
10351 {
10352 rtx reg, mem, addr, ops[2];
10353 int regno, load = REG_P (operands[0]);
10354 const char *template;
10355 char buff[50];
10356 enum machine_mode mode;
10357
10358 reg = operands[!load];
10359 mem = operands[load];
10360
10361 mode = GET_MODE (reg);
10362
10363 gcc_assert (REG_P (reg));
10364 regno = REGNO (reg);
10365 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
10366 || NEON_REGNO_OK_FOR_QUAD (regno));
10367 gcc_assert (VALID_NEON_DREG_MODE (mode)
10368 || VALID_NEON_QREG_MODE (mode)
10369 || VALID_NEON_STRUCT_MODE (mode));
10370 gcc_assert (MEM_P (mem));
10371
10372 addr = XEXP (mem, 0);
10373
10374 /* Strip off const from addresses like (const (plus (...))). */
10375 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
10376 addr = XEXP (addr, 0);
10377
10378 switch (GET_CODE (addr))
10379 {
10380 case POST_INC:
10381 template = "v%smia%%?\t%%0!, %%h1";
10382 ops[0] = XEXP (addr, 0);
10383 ops[1] = reg;
10384 break;
10385
10386 case POST_MODIFY:
10387 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
10388 gcc_unreachable ();
10389
10390 case LABEL_REF:
10391 case PLUS:
10392 {
10393 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
10394 int i;
10395 int overlap = -1;
10396 for (i = 0; i < nregs; i++)
10397 {
10398 /* We're only using DImode here because it's a convenient size. */
10399 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
10400 ops[1] = adjust_address (mem, SImode, 8 * i);
10401 if (reg_overlap_mentioned_p (ops[0], mem))
10402 {
10403 gcc_assert (overlap == -1);
10404 overlap = i;
10405 }
10406 else
10407 {
10408 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
10409 output_asm_insn (buff, ops);
10410 }
10411 }
10412 if (overlap != -1)
10413 {
10414 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
10415 ops[1] = adjust_address (mem, SImode, 8 * overlap);
10416 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
10417 output_asm_insn (buff, ops);
10418 }
10419
10420 return "";
10421 }
10422
10423 default:
10424 template = "v%smia%%?\t%%m0, %%h1";
10425 ops[0] = mem;
10426 ops[1] = reg;
10427 }
10428
10429 sprintf (buff, template, load ? "ld" : "st");
10430 output_asm_insn (buff, ops);
10431
10432 return "";
10433 }
10434
10435 /* Output an ADD r, s, #n where n may be too big for one instruction.
10436 If adding zero to one register, output nothing. */
10437 const char *
10438 output_add_immediate (rtx *operands)
10439 {
10440 HOST_WIDE_INT n = INTVAL (operands[2]);
10441
10442 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
10443 {
10444 if (n < 0)
10445 output_multi_immediate (operands,
10446 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
10447 -n);
10448 else
10449 output_multi_immediate (operands,
10450 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
10451 n);
10452 }
10453
10454 return "";
10455 }
10456
10457 /* Output a multiple immediate operation.
10458 OPERANDS is the vector of operands referred to in the output patterns.
10459 INSTR1 is the output pattern to use for the first constant.
10460 INSTR2 is the output pattern to use for subsequent constants.
10461 IMMED_OP is the index of the constant slot in OPERANDS.
10462 N is the constant value. */
10463 static const char *
10464 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
10465 int immed_op, HOST_WIDE_INT n)
10466 {
10467 #if HOST_BITS_PER_WIDE_INT > 32
10468 n &= 0xffffffff;
10469 #endif
10470
10471 if (n == 0)
10472 {
10473 /* Quick and easy output. */
10474 operands[immed_op] = const0_rtx;
10475 output_asm_insn (instr1, operands);
10476 }
10477 else
10478 {
10479 int i;
10480 const char * instr = instr1;
10481
10482 /* Note that n is never zero here (which would give no output). */
10483 for (i = 0; i < 32; i += 2)
10484 {
10485 if (n & (3 << i))
10486 {
10487 operands[immed_op] = GEN_INT (n & (255 << i));
10488 output_asm_insn (instr, operands);
10489 instr = instr2;
10490 i += 6;
10491 }
10492 }
10493 }
10494
10495 return "";
10496 }
10497
10498 /* Return the name of a shifter operation. */
10499 static const char *
10500 arm_shift_nmem(enum rtx_code code)
10501 {
10502 switch (code)
10503 {
10504 case ASHIFT:
10505 return ARM_LSL_NAME;
10506
10507 case ASHIFTRT:
10508 return "asr";
10509
10510 case LSHIFTRT:
10511 return "lsr";
10512
10513 case ROTATERT:
10514 return "ror";
10515
10516 default:
10517 abort();
10518 }
10519 }
10520
10521 /* Return the appropriate ARM instruction for the operation code.
10522 The returned result should not be overwritten. OP is the rtx of the
10523 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
10524 was shifted. */
10525 const char *
10526 arithmetic_instr (rtx op, int shift_first_arg)
10527 {
10528 switch (GET_CODE (op))
10529 {
10530 case PLUS:
10531 return "add";
10532
10533 case MINUS:
10534 return shift_first_arg ? "rsb" : "sub";
10535
10536 case IOR:
10537 return "orr";
10538
10539 case XOR:
10540 return "eor";
10541
10542 case AND:
10543 return "and";
10544
10545 case ASHIFT:
10546 case ASHIFTRT:
10547 case LSHIFTRT:
10548 case ROTATERT:
10549 return arm_shift_nmem(GET_CODE(op));
10550
10551 default:
10552 gcc_unreachable ();
10553 }
10554 }
10555
10556 /* Ensure valid constant shifts and return the appropriate shift mnemonic
10557 for the operation code. The returned result should not be overwritten.
10558 OP is the rtx code of the shift.
10559 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
10560 shift. */
10561 static const char *
10562 shift_op (rtx op, HOST_WIDE_INT *amountp)
10563 {
10564 const char * mnem;
10565 enum rtx_code code = GET_CODE (op);
10566
10567 switch (GET_CODE (XEXP (op, 1)))
10568 {
10569 case REG:
10570 case SUBREG:
10571 *amountp = -1;
10572 break;
10573
10574 case CONST_INT:
10575 *amountp = INTVAL (XEXP (op, 1));
10576 break;
10577
10578 default:
10579 gcc_unreachable ();
10580 }
10581
10582 switch (code)
10583 {
10584 case ROTATE:
10585 gcc_assert (*amountp != -1);
10586 *amountp = 32 - *amountp;
10587 code = ROTATERT;
10588
10589 /* Fall through. */
10590
10591 case ASHIFT:
10592 case ASHIFTRT:
10593 case LSHIFTRT:
10594 case ROTATERT:
10595 mnem = arm_shift_nmem(code);
10596 break;
10597
10598 case MULT:
10599 /* We never have to worry about the amount being other than a
10600 power of 2, since this case can never be reloaded from a reg. */
10601 gcc_assert (*amountp != -1);
10602 *amountp = int_log2 (*amountp);
10603 return ARM_LSL_NAME;
10604
10605 default:
10606 gcc_unreachable ();
10607 }
10608
10609 if (*amountp != -1)
10610 {
10611 /* This is not 100% correct, but follows from the desire to merge
10612 multiplication by a power of 2 with the recognizer for a
10613 shift. >=32 is not a valid shift for "lsl", so we must try and
10614 output a shift that produces the correct arithmetical result.
10615 Using lsr #32 is identical except for the fact that the carry bit
10616 is not set correctly if we set the flags; but we never use the
10617 carry bit from such an operation, so we can ignore that. */
10618 if (code == ROTATERT)
10619 /* Rotate is just modulo 32. */
10620 *amountp &= 31;
10621 else if (*amountp != (*amountp & 31))
10622 {
10623 if (code == ASHIFT)
10624 mnem = "lsr";
10625 *amountp = 32;
10626 }
10627
10628 /* Shifts of 0 are no-ops. */
10629 if (*amountp == 0)
10630 return NULL;
10631 }
10632
10633 return mnem;
10634 }
10635
10636 /* Obtain the shift from the POWER of two. */
10637
10638 static HOST_WIDE_INT
10639 int_log2 (HOST_WIDE_INT power)
10640 {
10641 HOST_WIDE_INT shift = 0;
10642
10643 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
10644 {
10645 gcc_assert (shift <= 31);
10646 shift++;
10647 }
10648
10649 return shift;
10650 }
10651
10652 /* Output a .ascii pseudo-op, keeping track of lengths. This is
10653 because /bin/as is horribly restrictive. The judgement about
10654 whether or not each character is 'printable' (and can be output as
10655 is) or not (and must be printed with an octal escape) must be made
10656 with reference to the *host* character set -- the situation is
10657 similar to that discussed in the comments above pp_c_char in
10658 c-pretty-print.c. */
10659
10660 #define MAX_ASCII_LEN 51
10661
10662 void
10663 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
10664 {
10665 int i;
10666 int len_so_far = 0;
10667
10668 fputs ("\t.ascii\t\"", stream);
10669
10670 for (i = 0; i < len; i++)
10671 {
10672 int c = p[i];
10673
10674 if (len_so_far >= MAX_ASCII_LEN)
10675 {
10676 fputs ("\"\n\t.ascii\t\"", stream);
10677 len_so_far = 0;
10678 }
10679
10680 if (ISPRINT (c))
10681 {
10682 if (c == '\\' || c == '\"')
10683 {
10684 putc ('\\', stream);
10685 len_so_far++;
10686 }
10687 putc (c, stream);
10688 len_so_far++;
10689 }
10690 else
10691 {
10692 fprintf (stream, "\\%03o", c);
10693 len_so_far += 4;
10694 }
10695 }
10696
10697 fputs ("\"\n", stream);
10698 }
10699 \f
10700 /* Compute the register save mask for registers 0 through 12
10701 inclusive. This code is used by arm_compute_save_reg_mask. */
10702
10703 static unsigned long
10704 arm_compute_save_reg0_reg12_mask (void)
10705 {
10706 unsigned long func_type = arm_current_func_type ();
10707 unsigned long save_reg_mask = 0;
10708 unsigned int reg;
10709
10710 if (IS_INTERRUPT (func_type))
10711 {
10712 unsigned int max_reg;
10713 /* Interrupt functions must not corrupt any registers,
10714 even call clobbered ones. If this is a leaf function
10715 we can just examine the registers used by the RTL, but
10716 otherwise we have to assume that whatever function is
10717 called might clobber anything, and so we have to save
10718 all the call-clobbered registers as well. */
10719 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
10720 /* FIQ handlers have registers r8 - r12 banked, so
10721 we only need to check r0 - r7, Normal ISRs only
10722 bank r14 and r15, so we must check up to r12.
10723 r13 is the stack pointer which is always preserved,
10724 so we do not need to consider it here. */
10725 max_reg = 7;
10726 else
10727 max_reg = 12;
10728
10729 for (reg = 0; reg <= max_reg; reg++)
10730 if (df_regs_ever_live_p (reg)
10731 || (! current_function_is_leaf && call_used_regs[reg]))
10732 save_reg_mask |= (1 << reg);
10733
10734 /* Also save the pic base register if necessary. */
10735 if (flag_pic
10736 && !TARGET_SINGLE_PIC_BASE
10737 && arm_pic_register != INVALID_REGNUM
10738 && current_function_uses_pic_offset_table)
10739 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10740 }
10741 else
10742 {
10743 /* In arm mode we handle r11 (FP) as a special case. */
10744 unsigned last_reg = TARGET_ARM ? 10 : 11;
10745
10746 /* In the normal case we only need to save those registers
10747 which are call saved and which are used by this function. */
10748 for (reg = 0; reg <= last_reg; reg++)
10749 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
10750 save_reg_mask |= (1 << reg);
10751
10752 /* Handle the frame pointer as a special case. */
10753 if (! TARGET_APCS_FRAME
10754 && ! frame_pointer_needed
10755 && df_regs_ever_live_p (HARD_FRAME_POINTER_REGNUM)
10756 && ! call_used_regs[HARD_FRAME_POINTER_REGNUM])
10757 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
10758 else if (! TARGET_APCS_FRAME
10759 && ! frame_pointer_needed
10760 && df_regs_ever_live_p (HARD_FRAME_POINTER_REGNUM)
10761 && ! call_used_regs[HARD_FRAME_POINTER_REGNUM])
10762 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
10763
10764 /* If we aren't loading the PIC register,
10765 don't stack it even though it may be live. */
10766 if (flag_pic
10767 && !TARGET_SINGLE_PIC_BASE
10768 && arm_pic_register != INVALID_REGNUM
10769 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
10770 || current_function_uses_pic_offset_table))
10771 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10772
10773 /* The prologue will copy SP into R0, so save it. */
10774 if (IS_STACKALIGN (func_type))
10775 save_reg_mask |= 1;
10776 }
10777
10778 /* Save registers so the exception handler can modify them. */
10779 if (current_function_calls_eh_return)
10780 {
10781 unsigned int i;
10782
10783 for (i = 0; ; i++)
10784 {
10785 reg = EH_RETURN_DATA_REGNO (i);
10786 if (reg == INVALID_REGNUM)
10787 break;
10788 save_reg_mask |= 1 << reg;
10789 }
10790 }
10791
10792 return save_reg_mask;
10793 }
10794
10795
10796 /* Compute a bit mask of which registers need to be
10797 saved on the stack for the current function. */
10798
10799 static unsigned long
10800 arm_compute_save_reg_mask (void)
10801 {
10802 unsigned int save_reg_mask = 0;
10803 unsigned long func_type = arm_current_func_type ();
10804 unsigned int reg;
10805
10806 if (IS_NAKED (func_type))
10807 /* This should never really happen. */
10808 return 0;
10809
10810 /* If we are creating a stack frame, then we must save the frame pointer,
10811 IP (which will hold the old stack pointer), LR and the PC. */
10812 if (frame_pointer_needed && TARGET_ARM)
10813 save_reg_mask |=
10814 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
10815 | (1 << IP_REGNUM)
10816 | (1 << LR_REGNUM)
10817 | (1 << PC_REGNUM);
10818
10819 /* Volatile functions do not return, so there
10820 is no need to save any other registers. */
10821 if (IS_VOLATILE (func_type))
10822 return save_reg_mask;
10823
10824 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
10825
10826 /* Decide if we need to save the link register.
10827 Interrupt routines have their own banked link register,
10828 so they never need to save it.
10829 Otherwise if we do not use the link register we do not need to save
10830 it. If we are pushing other registers onto the stack however, we
10831 can save an instruction in the epilogue by pushing the link register
10832 now and then popping it back into the PC. This incurs extra memory
10833 accesses though, so we only do it when optimizing for size, and only
10834 if we know that we will not need a fancy return sequence. */
10835 if (df_regs_ever_live_p (LR_REGNUM)
10836 || (save_reg_mask
10837 && optimize_size
10838 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
10839 && !current_function_calls_eh_return))
10840 save_reg_mask |= 1 << LR_REGNUM;
10841
10842 if (cfun->machine->lr_save_eliminated)
10843 save_reg_mask &= ~ (1 << LR_REGNUM);
10844
10845 if (TARGET_REALLY_IWMMXT
10846 && ((bit_count (save_reg_mask)
10847 + ARM_NUM_INTS (current_function_pretend_args_size)) % 2) != 0)
10848 {
10849 /* The total number of registers that are going to be pushed
10850 onto the stack is odd. We need to ensure that the stack
10851 is 64-bit aligned before we start to save iWMMXt registers,
10852 and also before we start to create locals. (A local variable
10853 might be a double or long long which we will load/store using
10854 an iWMMXt instruction). Therefore we need to push another
10855 ARM register, so that the stack will be 64-bit aligned. We
10856 try to avoid using the arg registers (r0 -r3) as they might be
10857 used to pass values in a tail call. */
10858 for (reg = 4; reg <= 12; reg++)
10859 if ((save_reg_mask & (1 << reg)) == 0)
10860 break;
10861
10862 if (reg <= 12)
10863 save_reg_mask |= (1 << reg);
10864 else
10865 {
10866 cfun->machine->sibcall_blocked = 1;
10867 save_reg_mask |= (1 << 3);
10868 }
10869 }
10870
10871 /* We may need to push an additional register for use initializing the
10872 PIC base register. */
10873 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
10874 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
10875 {
10876 reg = thumb_find_work_register (1 << 4);
10877 if (!call_used_regs[reg])
10878 save_reg_mask |= (1 << reg);
10879 }
10880
10881 return save_reg_mask;
10882 }
10883
10884
10885 /* Compute a bit mask of which registers need to be
10886 saved on the stack for the current function. */
10887 static unsigned long
10888 thumb1_compute_save_reg_mask (void)
10889 {
10890 unsigned long mask;
10891 unsigned reg;
10892
10893 mask = 0;
10894 for (reg = 0; reg < 12; reg ++)
10895 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
10896 mask |= 1 << reg;
10897
10898 if (flag_pic
10899 && !TARGET_SINGLE_PIC_BASE
10900 && arm_pic_register != INVALID_REGNUM
10901 && current_function_uses_pic_offset_table)
10902 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10903
10904 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
10905 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
10906 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
10907
10908 /* LR will also be pushed if any lo regs are pushed. */
10909 if (mask & 0xff || thumb_force_lr_save ())
10910 mask |= (1 << LR_REGNUM);
10911
10912 /* Make sure we have a low work register if we need one.
10913 We will need one if we are going to push a high register,
10914 but we are not currently intending to push a low register. */
10915 if ((mask & 0xff) == 0
10916 && ((mask & 0x0f00) || TARGET_BACKTRACE))
10917 {
10918 /* Use thumb_find_work_register to choose which register
10919 we will use. If the register is live then we will
10920 have to push it. Use LAST_LO_REGNUM as our fallback
10921 choice for the register to select. */
10922 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
10923 /* Make sure the register returned by thumb_find_work_register is
10924 not part of the return value. */
10925 if (reg * UNITS_PER_WORD <= arm_size_return_regs ())
10926 reg = LAST_LO_REGNUM;
10927
10928 if (! call_used_regs[reg])
10929 mask |= 1 << reg;
10930 }
10931
10932 return mask;
10933 }
10934
10935
10936 /* Return the number of bytes required to save VFP registers. */
10937 static int
10938 arm_get_vfp_saved_size (void)
10939 {
10940 unsigned int regno;
10941 int count;
10942 int saved;
10943
10944 saved = 0;
10945 /* Space for saved VFP registers. */
10946 if (TARGET_HARD_FLOAT && TARGET_VFP)
10947 {
10948 count = 0;
10949 for (regno = FIRST_VFP_REGNUM;
10950 regno < LAST_VFP_REGNUM;
10951 regno += 2)
10952 {
10953 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
10954 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
10955 {
10956 if (count > 0)
10957 {
10958 /* Workaround ARM10 VFPr1 bug. */
10959 if (count == 2 && !arm_arch6)
10960 count++;
10961 saved += count * 8;
10962 }
10963 count = 0;
10964 }
10965 else
10966 count++;
10967 }
10968 if (count > 0)
10969 {
10970 if (count == 2 && !arm_arch6)
10971 count++;
10972 saved += count * 8;
10973 }
10974 }
10975 return saved;
10976 }
10977
10978
10979 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
10980 everything bar the final return instruction. */
10981 const char *
10982 output_return_instruction (rtx operand, int really_return, int reverse)
10983 {
10984 char conditional[10];
10985 char instr[100];
10986 unsigned reg;
10987 unsigned long live_regs_mask;
10988 unsigned long func_type;
10989 arm_stack_offsets *offsets;
10990
10991 func_type = arm_current_func_type ();
10992
10993 if (IS_NAKED (func_type))
10994 return "";
10995
10996 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
10997 {
10998 /* If this function was declared non-returning, and we have
10999 found a tail call, then we have to trust that the called
11000 function won't return. */
11001 if (really_return)
11002 {
11003 rtx ops[2];
11004
11005 /* Otherwise, trap an attempted return by aborting. */
11006 ops[0] = operand;
11007 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
11008 : "abort");
11009 assemble_external_libcall (ops[1]);
11010 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
11011 }
11012
11013 return "";
11014 }
11015
11016 gcc_assert (!current_function_calls_alloca || really_return);
11017
11018 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
11019
11020 return_used_this_function = 1;
11021
11022 live_regs_mask = arm_compute_save_reg_mask ();
11023
11024 if (live_regs_mask)
11025 {
11026 const char * return_reg;
11027
11028 /* If we do not have any special requirements for function exit
11029 (e.g. interworking) then we can load the return address
11030 directly into the PC. Otherwise we must load it into LR. */
11031 if (really_return
11032 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
11033 return_reg = reg_names[PC_REGNUM];
11034 else
11035 return_reg = reg_names[LR_REGNUM];
11036
11037 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
11038 {
11039 /* There are three possible reasons for the IP register
11040 being saved. 1) a stack frame was created, in which case
11041 IP contains the old stack pointer, or 2) an ISR routine
11042 corrupted it, or 3) it was saved to align the stack on
11043 iWMMXt. In case 1, restore IP into SP, otherwise just
11044 restore IP. */
11045 if (frame_pointer_needed)
11046 {
11047 live_regs_mask &= ~ (1 << IP_REGNUM);
11048 live_regs_mask |= (1 << SP_REGNUM);
11049 }
11050 else
11051 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
11052 }
11053
11054 /* On some ARM architectures it is faster to use LDR rather than
11055 LDM to load a single register. On other architectures, the
11056 cost is the same. In 26 bit mode, or for exception handlers,
11057 we have to use LDM to load the PC so that the CPSR is also
11058 restored. */
11059 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11060 if (live_regs_mask == (1U << reg))
11061 break;
11062
11063 if (reg <= LAST_ARM_REGNUM
11064 && (reg != LR_REGNUM
11065 || ! really_return
11066 || ! IS_INTERRUPT (func_type)))
11067 {
11068 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
11069 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
11070 }
11071 else
11072 {
11073 char *p;
11074 int first = 1;
11075
11076 /* Generate the load multiple instruction to restore the
11077 registers. Note we can get here, even if
11078 frame_pointer_needed is true, but only if sp already
11079 points to the base of the saved core registers. */
11080 if (live_regs_mask & (1 << SP_REGNUM))
11081 {
11082 unsigned HOST_WIDE_INT stack_adjust;
11083
11084 offsets = arm_get_frame_offsets ();
11085 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
11086 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
11087
11088 if (stack_adjust && arm_arch5 && TARGET_ARM)
11089 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
11090 else
11091 {
11092 /* If we can't use ldmib (SA110 bug),
11093 then try to pop r3 instead. */
11094 if (stack_adjust)
11095 live_regs_mask |= 1 << 3;
11096 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
11097 }
11098 }
11099 else
11100 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
11101
11102 p = instr + strlen (instr);
11103
11104 for (reg = 0; reg <= SP_REGNUM; reg++)
11105 if (live_regs_mask & (1 << reg))
11106 {
11107 int l = strlen (reg_names[reg]);
11108
11109 if (first)
11110 first = 0;
11111 else
11112 {
11113 memcpy (p, ", ", 2);
11114 p += 2;
11115 }
11116
11117 memcpy (p, "%|", 2);
11118 memcpy (p + 2, reg_names[reg], l);
11119 p += l + 2;
11120 }
11121
11122 if (live_regs_mask & (1 << LR_REGNUM))
11123 {
11124 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
11125 /* If returning from an interrupt, restore the CPSR. */
11126 if (IS_INTERRUPT (func_type))
11127 strcat (p, "^");
11128 }
11129 else
11130 strcpy (p, "}");
11131 }
11132
11133 output_asm_insn (instr, & operand);
11134
11135 /* See if we need to generate an extra instruction to
11136 perform the actual function return. */
11137 if (really_return
11138 && func_type != ARM_FT_INTERWORKED
11139 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
11140 {
11141 /* The return has already been handled
11142 by loading the LR into the PC. */
11143 really_return = 0;
11144 }
11145 }
11146
11147 if (really_return)
11148 {
11149 switch ((int) ARM_FUNC_TYPE (func_type))
11150 {
11151 case ARM_FT_ISR:
11152 case ARM_FT_FIQ:
11153 /* ??? This is wrong for unified assembly syntax. */
11154 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
11155 break;
11156
11157 case ARM_FT_INTERWORKED:
11158 sprintf (instr, "bx%s\t%%|lr", conditional);
11159 break;
11160
11161 case ARM_FT_EXCEPTION:
11162 /* ??? This is wrong for unified assembly syntax. */
11163 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
11164 break;
11165
11166 default:
11167 /* Use bx if it's available. */
11168 if (arm_arch5 || arm_arch4t)
11169 sprintf (instr, "bx%s\t%%|lr", conditional);
11170 else
11171 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
11172 break;
11173 }
11174
11175 output_asm_insn (instr, & operand);
11176 }
11177
11178 return "";
11179 }
11180
11181 /* Write the function name into the code section, directly preceding
11182 the function prologue.
11183
11184 Code will be output similar to this:
11185 t0
11186 .ascii "arm_poke_function_name", 0
11187 .align
11188 t1
11189 .word 0xff000000 + (t1 - t0)
11190 arm_poke_function_name
11191 mov ip, sp
11192 stmfd sp!, {fp, ip, lr, pc}
11193 sub fp, ip, #4
11194
11195 When performing a stack backtrace, code can inspect the value
11196 of 'pc' stored at 'fp' + 0. If the trace function then looks
11197 at location pc - 12 and the top 8 bits are set, then we know
11198 that there is a function name embedded immediately preceding this
11199 location and has length ((pc[-3]) & 0xff000000).
11200
11201 We assume that pc is declared as a pointer to an unsigned long.
11202
11203 It is of no benefit to output the function name if we are assembling
11204 a leaf function. These function types will not contain a stack
11205 backtrace structure, therefore it is not possible to determine the
11206 function name. */
11207 void
11208 arm_poke_function_name (FILE *stream, const char *name)
11209 {
11210 unsigned long alignlength;
11211 unsigned long length;
11212 rtx x;
11213
11214 length = strlen (name) + 1;
11215 alignlength = ROUND_UP_WORD (length);
11216
11217 ASM_OUTPUT_ASCII (stream, name, length);
11218 ASM_OUTPUT_ALIGN (stream, 2);
11219 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
11220 assemble_aligned_integer (UNITS_PER_WORD, x);
11221 }
11222
11223 /* Place some comments into the assembler stream
11224 describing the current function. */
11225 static void
11226 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
11227 {
11228 unsigned long func_type;
11229
11230 if (TARGET_THUMB1)
11231 {
11232 thumb1_output_function_prologue (f, frame_size);
11233 return;
11234 }
11235
11236 /* Sanity check. */
11237 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
11238
11239 func_type = arm_current_func_type ();
11240
11241 switch ((int) ARM_FUNC_TYPE (func_type))
11242 {
11243 default:
11244 case ARM_FT_NORMAL:
11245 break;
11246 case ARM_FT_INTERWORKED:
11247 asm_fprintf (f, "\t%@ Function supports interworking.\n");
11248 break;
11249 case ARM_FT_ISR:
11250 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
11251 break;
11252 case ARM_FT_FIQ:
11253 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
11254 break;
11255 case ARM_FT_EXCEPTION:
11256 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
11257 break;
11258 }
11259
11260 if (IS_NAKED (func_type))
11261 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
11262
11263 if (IS_VOLATILE (func_type))
11264 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
11265
11266 if (IS_NESTED (func_type))
11267 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
11268 if (IS_STACKALIGN (func_type))
11269 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
11270
11271 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
11272 current_function_args_size,
11273 current_function_pretend_args_size, frame_size);
11274
11275 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
11276 frame_pointer_needed,
11277 cfun->machine->uses_anonymous_args);
11278
11279 if (cfun->machine->lr_save_eliminated)
11280 asm_fprintf (f, "\t%@ link register save eliminated.\n");
11281
11282 if (current_function_calls_eh_return)
11283 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
11284
11285 #ifdef AOF_ASSEMBLER
11286 if (flag_pic)
11287 asm_fprintf (f, "\tmov\t%r, %r\n", IP_REGNUM, PIC_OFFSET_TABLE_REGNUM);
11288 #endif
11289
11290 return_used_this_function = 0;
11291 }
11292
11293 const char *
11294 arm_output_epilogue (rtx sibling)
11295 {
11296 int reg;
11297 unsigned long saved_regs_mask;
11298 unsigned long func_type;
11299 /* Floats_offset is the offset from the "virtual" frame. In an APCS
11300 frame that is $fp + 4 for a non-variadic function. */
11301 int floats_offset = 0;
11302 rtx operands[3];
11303 FILE * f = asm_out_file;
11304 unsigned int lrm_count = 0;
11305 int really_return = (sibling == NULL);
11306 int start_reg;
11307 arm_stack_offsets *offsets;
11308
11309 /* If we have already generated the return instruction
11310 then it is futile to generate anything else. */
11311 if (use_return_insn (FALSE, sibling) && return_used_this_function)
11312 return "";
11313
11314 func_type = arm_current_func_type ();
11315
11316 if (IS_NAKED (func_type))
11317 /* Naked functions don't have epilogues. */
11318 return "";
11319
11320 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
11321 {
11322 rtx op;
11323
11324 /* A volatile function should never return. Call abort. */
11325 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
11326 assemble_external_libcall (op);
11327 output_asm_insn ("bl\t%a0", &op);
11328
11329 return "";
11330 }
11331
11332 /* If we are throwing an exception, then we really must be doing a
11333 return, so we can't tail-call. */
11334 gcc_assert (!current_function_calls_eh_return || really_return);
11335
11336 offsets = arm_get_frame_offsets ();
11337 saved_regs_mask = arm_compute_save_reg_mask ();
11338
11339 if (TARGET_IWMMXT)
11340 lrm_count = bit_count (saved_regs_mask);
11341
11342 floats_offset = offsets->saved_args;
11343 /* Compute how far away the floats will be. */
11344 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11345 if (saved_regs_mask & (1 << reg))
11346 floats_offset += 4;
11347
11348 if (frame_pointer_needed && TARGET_ARM)
11349 {
11350 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
11351 int vfp_offset = offsets->frame;
11352
11353 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
11354 {
11355 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
11356 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11357 {
11358 floats_offset += 12;
11359 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
11360 reg, FP_REGNUM, floats_offset - vfp_offset);
11361 }
11362 }
11363 else
11364 {
11365 start_reg = LAST_FPA_REGNUM;
11366
11367 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
11368 {
11369 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11370 {
11371 floats_offset += 12;
11372
11373 /* We can't unstack more than four registers at once. */
11374 if (start_reg - reg == 3)
11375 {
11376 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
11377 reg, FP_REGNUM, floats_offset - vfp_offset);
11378 start_reg = reg - 1;
11379 }
11380 }
11381 else
11382 {
11383 if (reg != start_reg)
11384 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
11385 reg + 1, start_reg - reg,
11386 FP_REGNUM, floats_offset - vfp_offset);
11387 start_reg = reg - 1;
11388 }
11389 }
11390
11391 /* Just in case the last register checked also needs unstacking. */
11392 if (reg != start_reg)
11393 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
11394 reg + 1, start_reg - reg,
11395 FP_REGNUM, floats_offset - vfp_offset);
11396 }
11397
11398 if (TARGET_HARD_FLOAT && TARGET_VFP)
11399 {
11400 int saved_size;
11401
11402 /* The fldmd insns do not have base+offset addressing
11403 modes, so we use IP to hold the address. */
11404 saved_size = arm_get_vfp_saved_size ();
11405
11406 if (saved_size > 0)
11407 {
11408 floats_offset += saved_size;
11409 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
11410 FP_REGNUM, floats_offset - vfp_offset);
11411 }
11412 start_reg = FIRST_VFP_REGNUM;
11413 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
11414 {
11415 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
11416 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
11417 {
11418 if (start_reg != reg)
11419 vfp_output_fldmd (f, IP_REGNUM,
11420 (start_reg - FIRST_VFP_REGNUM) / 2,
11421 (reg - start_reg) / 2);
11422 start_reg = reg + 2;
11423 }
11424 }
11425 if (start_reg != reg)
11426 vfp_output_fldmd (f, IP_REGNUM,
11427 (start_reg - FIRST_VFP_REGNUM) / 2,
11428 (reg - start_reg) / 2);
11429 }
11430
11431 if (TARGET_IWMMXT)
11432 {
11433 /* The frame pointer is guaranteed to be non-double-word aligned.
11434 This is because it is set to (old_stack_pointer - 4) and the
11435 old_stack_pointer was double word aligned. Thus the offset to
11436 the iWMMXt registers to be loaded must also be non-double-word
11437 sized, so that the resultant address *is* double-word aligned.
11438 We can ignore floats_offset since that was already included in
11439 the live_regs_mask. */
11440 lrm_count += (lrm_count % 2 ? 2 : 1);
11441
11442 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
11443 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11444 {
11445 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
11446 reg, FP_REGNUM, lrm_count * 4);
11447 lrm_count += 2;
11448 }
11449 }
11450
11451 /* saved_regs_mask should contain the IP, which at the time of stack
11452 frame generation actually contains the old stack pointer. So a
11453 quick way to unwind the stack is just pop the IP register directly
11454 into the stack pointer. */
11455 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
11456 saved_regs_mask &= ~ (1 << IP_REGNUM);
11457 saved_regs_mask |= (1 << SP_REGNUM);
11458
11459 /* There are two registers left in saved_regs_mask - LR and PC. We
11460 only need to restore the LR register (the return address), but to
11461 save time we can load it directly into the PC, unless we need a
11462 special function exit sequence, or we are not really returning. */
11463 if (really_return
11464 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
11465 && !current_function_calls_eh_return)
11466 /* Delete the LR from the register mask, so that the LR on
11467 the stack is loaded into the PC in the register mask. */
11468 saved_regs_mask &= ~ (1 << LR_REGNUM);
11469 else
11470 saved_regs_mask &= ~ (1 << PC_REGNUM);
11471
11472 /* We must use SP as the base register, because SP is one of the
11473 registers being restored. If an interrupt or page fault
11474 happens in the ldm instruction, the SP might or might not
11475 have been restored. That would be bad, as then SP will no
11476 longer indicate the safe area of stack, and we can get stack
11477 corruption. Using SP as the base register means that it will
11478 be reset correctly to the original value, should an interrupt
11479 occur. If the stack pointer already points at the right
11480 place, then omit the subtraction. */
11481 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
11482 || current_function_calls_alloca)
11483 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
11484 4 * bit_count (saved_regs_mask));
11485 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
11486
11487 if (IS_INTERRUPT (func_type))
11488 /* Interrupt handlers will have pushed the
11489 IP onto the stack, so restore it now. */
11490 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
11491 }
11492 else
11493 {
11494 HOST_WIDE_INT amount;
11495 int rfe;
11496 /* Restore stack pointer if necessary. */
11497 if (frame_pointer_needed)
11498 {
11499 /* For Thumb-2 restore sp from the frame pointer.
11500 Operand restrictions mean we have to increment FP, then copy
11501 to SP. */
11502 amount = offsets->locals_base - offsets->saved_regs;
11503 operands[0] = hard_frame_pointer_rtx;
11504 }
11505 else
11506 {
11507 operands[0] = stack_pointer_rtx;
11508 amount = offsets->outgoing_args - offsets->saved_regs;
11509 }
11510
11511 if (amount)
11512 {
11513 operands[1] = operands[0];
11514 operands[2] = GEN_INT (amount);
11515 output_add_immediate (operands);
11516 }
11517 if (frame_pointer_needed)
11518 asm_fprintf (f, "\tmov\t%r, %r\n",
11519 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
11520
11521 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
11522 {
11523 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
11524 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11525 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
11526 reg, SP_REGNUM);
11527 }
11528 else
11529 {
11530 start_reg = FIRST_FPA_REGNUM;
11531
11532 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
11533 {
11534 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11535 {
11536 if (reg - start_reg == 3)
11537 {
11538 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
11539 start_reg, SP_REGNUM);
11540 start_reg = reg + 1;
11541 }
11542 }
11543 else
11544 {
11545 if (reg != start_reg)
11546 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
11547 start_reg, reg - start_reg,
11548 SP_REGNUM);
11549
11550 start_reg = reg + 1;
11551 }
11552 }
11553
11554 /* Just in case the last register checked also needs unstacking. */
11555 if (reg != start_reg)
11556 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
11557 start_reg, reg - start_reg, SP_REGNUM);
11558 }
11559
11560 if (TARGET_HARD_FLOAT && TARGET_VFP)
11561 {
11562 start_reg = FIRST_VFP_REGNUM;
11563 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
11564 {
11565 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
11566 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
11567 {
11568 if (start_reg != reg)
11569 vfp_output_fldmd (f, SP_REGNUM,
11570 (start_reg - FIRST_VFP_REGNUM) / 2,
11571 (reg - start_reg) / 2);
11572 start_reg = reg + 2;
11573 }
11574 }
11575 if (start_reg != reg)
11576 vfp_output_fldmd (f, SP_REGNUM,
11577 (start_reg - FIRST_VFP_REGNUM) / 2,
11578 (reg - start_reg) / 2);
11579 }
11580 if (TARGET_IWMMXT)
11581 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
11582 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11583 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
11584
11585 /* If we can, restore the LR into the PC. */
11586 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
11587 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
11588 && !IS_STACKALIGN (func_type)
11589 && really_return
11590 && current_function_pretend_args_size == 0
11591 && saved_regs_mask & (1 << LR_REGNUM)
11592 && !current_function_calls_eh_return)
11593 {
11594 saved_regs_mask &= ~ (1 << LR_REGNUM);
11595 saved_regs_mask |= (1 << PC_REGNUM);
11596 rfe = IS_INTERRUPT (func_type);
11597 }
11598 else
11599 rfe = 0;
11600
11601 /* Load the registers off the stack. If we only have one register
11602 to load use the LDR instruction - it is faster. For Thumb-2
11603 always use pop and the assembler will pick the best instruction.*/
11604 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
11605 && !IS_INTERRUPT(func_type))
11606 {
11607 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
11608 }
11609 else if (saved_regs_mask)
11610 {
11611 if (saved_regs_mask & (1 << SP_REGNUM))
11612 /* Note - write back to the stack register is not enabled
11613 (i.e. "ldmfd sp!..."). We know that the stack pointer is
11614 in the list of registers and if we add writeback the
11615 instruction becomes UNPREDICTABLE. */
11616 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
11617 rfe);
11618 else if (TARGET_ARM)
11619 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
11620 rfe);
11621 else
11622 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
11623 }
11624
11625 if (current_function_pretend_args_size)
11626 {
11627 /* Unwind the pre-pushed regs. */
11628 operands[0] = operands[1] = stack_pointer_rtx;
11629 operands[2] = GEN_INT (current_function_pretend_args_size);
11630 output_add_immediate (operands);
11631 }
11632 }
11633
11634 /* We may have already restored PC directly from the stack. */
11635 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
11636 return "";
11637
11638 /* Stack adjustment for exception handler. */
11639 if (current_function_calls_eh_return)
11640 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
11641 ARM_EH_STACKADJ_REGNUM);
11642
11643 /* Generate the return instruction. */
11644 switch ((int) ARM_FUNC_TYPE (func_type))
11645 {
11646 case ARM_FT_ISR:
11647 case ARM_FT_FIQ:
11648 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
11649 break;
11650
11651 case ARM_FT_EXCEPTION:
11652 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
11653 break;
11654
11655 case ARM_FT_INTERWORKED:
11656 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
11657 break;
11658
11659 default:
11660 if (IS_STACKALIGN (func_type))
11661 {
11662 /* See comment in arm_expand_prologue. */
11663 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
11664 }
11665 if (arm_arch5 || arm_arch4t)
11666 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
11667 else
11668 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
11669 break;
11670 }
11671
11672 return "";
11673 }
11674
11675 static void
11676 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11677 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
11678 {
11679 arm_stack_offsets *offsets;
11680
11681 if (TARGET_THUMB1)
11682 {
11683 int regno;
11684
11685 /* Emit any call-via-reg trampolines that are needed for v4t support
11686 of call_reg and call_value_reg type insns. */
11687 for (regno = 0; regno < LR_REGNUM; regno++)
11688 {
11689 rtx label = cfun->machine->call_via[regno];
11690
11691 if (label != NULL)
11692 {
11693 switch_to_section (function_section (current_function_decl));
11694 targetm.asm_out.internal_label (asm_out_file, "L",
11695 CODE_LABEL_NUMBER (label));
11696 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
11697 }
11698 }
11699
11700 /* ??? Probably not safe to set this here, since it assumes that a
11701 function will be emitted as assembly immediately after we generate
11702 RTL for it. This does not happen for inline functions. */
11703 return_used_this_function = 0;
11704 }
11705 else /* TARGET_32BIT */
11706 {
11707 /* We need to take into account any stack-frame rounding. */
11708 offsets = arm_get_frame_offsets ();
11709
11710 gcc_assert (!use_return_insn (FALSE, NULL)
11711 || !return_used_this_function
11712 || offsets->saved_regs == offsets->outgoing_args
11713 || frame_pointer_needed);
11714
11715 /* Reset the ARM-specific per-function variables. */
11716 after_arm_reorg = 0;
11717 }
11718 }
11719
11720 /* Generate and emit an insn that we will recognize as a push_multi.
11721 Unfortunately, since this insn does not reflect very well the actual
11722 semantics of the operation, we need to annotate the insn for the benefit
11723 of DWARF2 frame unwind information. */
11724 static rtx
11725 emit_multi_reg_push (unsigned long mask)
11726 {
11727 int num_regs = 0;
11728 int num_dwarf_regs;
11729 int i, j;
11730 rtx par;
11731 rtx dwarf;
11732 int dwarf_par_index;
11733 rtx tmp, reg;
11734
11735 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11736 if (mask & (1 << i))
11737 num_regs++;
11738
11739 gcc_assert (num_regs && num_regs <= 16);
11740
11741 /* We don't record the PC in the dwarf frame information. */
11742 num_dwarf_regs = num_regs;
11743 if (mask & (1 << PC_REGNUM))
11744 num_dwarf_regs--;
11745
11746 /* For the body of the insn we are going to generate an UNSPEC in
11747 parallel with several USEs. This allows the insn to be recognized
11748 by the push_multi pattern in the arm.md file. The insn looks
11749 something like this:
11750
11751 (parallel [
11752 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
11753 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
11754 (use (reg:SI 11 fp))
11755 (use (reg:SI 12 ip))
11756 (use (reg:SI 14 lr))
11757 (use (reg:SI 15 pc))
11758 ])
11759
11760 For the frame note however, we try to be more explicit and actually
11761 show each register being stored into the stack frame, plus a (single)
11762 decrement of the stack pointer. We do it this way in order to be
11763 friendly to the stack unwinding code, which only wants to see a single
11764 stack decrement per instruction. The RTL we generate for the note looks
11765 something like this:
11766
11767 (sequence [
11768 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
11769 (set (mem:SI (reg:SI sp)) (reg:SI r4))
11770 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
11771 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
11772 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
11773 ])
11774
11775 This sequence is used both by the code to support stack unwinding for
11776 exceptions handlers and the code to generate dwarf2 frame debugging. */
11777
11778 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
11779 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
11780 dwarf_par_index = 1;
11781
11782 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11783 {
11784 if (mask & (1 << i))
11785 {
11786 reg = gen_rtx_REG (SImode, i);
11787
11788 XVECEXP (par, 0, 0)
11789 = gen_rtx_SET (VOIDmode,
11790 gen_frame_mem (BLKmode,
11791 gen_rtx_PRE_DEC (BLKmode,
11792 stack_pointer_rtx)),
11793 gen_rtx_UNSPEC (BLKmode,
11794 gen_rtvec (1, reg),
11795 UNSPEC_PUSH_MULT));
11796
11797 if (i != PC_REGNUM)
11798 {
11799 tmp = gen_rtx_SET (VOIDmode,
11800 gen_frame_mem (SImode, stack_pointer_rtx),
11801 reg);
11802 RTX_FRAME_RELATED_P (tmp) = 1;
11803 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
11804 dwarf_par_index++;
11805 }
11806
11807 break;
11808 }
11809 }
11810
11811 for (j = 1, i++; j < num_regs; i++)
11812 {
11813 if (mask & (1 << i))
11814 {
11815 reg = gen_rtx_REG (SImode, i);
11816
11817 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
11818
11819 if (i != PC_REGNUM)
11820 {
11821 tmp
11822 = gen_rtx_SET (VOIDmode,
11823 gen_frame_mem (SImode,
11824 plus_constant (stack_pointer_rtx,
11825 4 * j)),
11826 reg);
11827 RTX_FRAME_RELATED_P (tmp) = 1;
11828 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
11829 }
11830
11831 j++;
11832 }
11833 }
11834
11835 par = emit_insn (par);
11836
11837 tmp = gen_rtx_SET (VOIDmode,
11838 stack_pointer_rtx,
11839 plus_constant (stack_pointer_rtx, -4 * num_regs));
11840 RTX_FRAME_RELATED_P (tmp) = 1;
11841 XVECEXP (dwarf, 0, 0) = tmp;
11842
11843 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
11844 REG_NOTES (par));
11845 return par;
11846 }
11847
11848 /* Calculate the size of the return value that is passed in registers. */
11849 static int
11850 arm_size_return_regs (void)
11851 {
11852 enum machine_mode mode;
11853
11854 if (current_function_return_rtx != 0)
11855 mode = GET_MODE (current_function_return_rtx);
11856 else
11857 mode = DECL_MODE (DECL_RESULT (current_function_decl));
11858
11859 return GET_MODE_SIZE (mode);
11860 }
11861
11862 static rtx
11863 emit_sfm (int base_reg, int count)
11864 {
11865 rtx par;
11866 rtx dwarf;
11867 rtx tmp, reg;
11868 int i;
11869
11870 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
11871 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
11872
11873 reg = gen_rtx_REG (XFmode, base_reg++);
11874
11875 XVECEXP (par, 0, 0)
11876 = gen_rtx_SET (VOIDmode,
11877 gen_frame_mem (BLKmode,
11878 gen_rtx_PRE_DEC (BLKmode,
11879 stack_pointer_rtx)),
11880 gen_rtx_UNSPEC (BLKmode,
11881 gen_rtvec (1, reg),
11882 UNSPEC_PUSH_MULT));
11883 tmp = gen_rtx_SET (VOIDmode,
11884 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
11885 RTX_FRAME_RELATED_P (tmp) = 1;
11886 XVECEXP (dwarf, 0, 1) = tmp;
11887
11888 for (i = 1; i < count; i++)
11889 {
11890 reg = gen_rtx_REG (XFmode, base_reg++);
11891 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
11892
11893 tmp = gen_rtx_SET (VOIDmode,
11894 gen_frame_mem (XFmode,
11895 plus_constant (stack_pointer_rtx,
11896 i * 12)),
11897 reg);
11898 RTX_FRAME_RELATED_P (tmp) = 1;
11899 XVECEXP (dwarf, 0, i + 1) = tmp;
11900 }
11901
11902 tmp = gen_rtx_SET (VOIDmode,
11903 stack_pointer_rtx,
11904 plus_constant (stack_pointer_rtx, -12 * count));
11905
11906 RTX_FRAME_RELATED_P (tmp) = 1;
11907 XVECEXP (dwarf, 0, 0) = tmp;
11908
11909 par = emit_insn (par);
11910 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
11911 REG_NOTES (par));
11912 return par;
11913 }
11914
11915
11916 /* Return true if the current function needs to save/restore LR. */
11917
11918 static bool
11919 thumb_force_lr_save (void)
11920 {
11921 return !cfun->machine->lr_save_eliminated
11922 && (!leaf_function_p ()
11923 || thumb_far_jump_used_p ()
11924 || df_regs_ever_live_p (LR_REGNUM));
11925 }
11926
11927
11928 /* Compute the distance from register FROM to register TO.
11929 These can be the arg pointer (26), the soft frame pointer (25),
11930 the stack pointer (13) or the hard frame pointer (11).
11931 In thumb mode r7 is used as the soft frame pointer, if needed.
11932 Typical stack layout looks like this:
11933
11934 old stack pointer -> | |
11935 ----
11936 | | \
11937 | | saved arguments for
11938 | | vararg functions
11939 | | /
11940 --
11941 hard FP & arg pointer -> | | \
11942 | | stack
11943 | | frame
11944 | | /
11945 --
11946 | | \
11947 | | call saved
11948 | | registers
11949 soft frame pointer -> | | /
11950 --
11951 | | \
11952 | | local
11953 | | variables
11954 locals base pointer -> | | /
11955 --
11956 | | \
11957 | | outgoing
11958 | | arguments
11959 current stack pointer -> | | /
11960 --
11961
11962 For a given function some or all of these stack components
11963 may not be needed, giving rise to the possibility of
11964 eliminating some of the registers.
11965
11966 The values returned by this function must reflect the behavior
11967 of arm_expand_prologue() and arm_compute_save_reg_mask().
11968
11969 The sign of the number returned reflects the direction of stack
11970 growth, so the values are positive for all eliminations except
11971 from the soft frame pointer to the hard frame pointer.
11972
11973 SFP may point just inside the local variables block to ensure correct
11974 alignment. */
11975
11976
11977 /* Calculate stack offsets. These are used to calculate register elimination
11978 offsets and in prologue/epilogue code. */
11979
11980 static arm_stack_offsets *
11981 arm_get_frame_offsets (void)
11982 {
11983 struct arm_stack_offsets *offsets;
11984 unsigned long func_type;
11985 int leaf;
11986 int saved;
11987 HOST_WIDE_INT frame_size;
11988
11989 offsets = &cfun->machine->stack_offsets;
11990
11991 /* We need to know if we are a leaf function. Unfortunately, it
11992 is possible to be called after start_sequence has been called,
11993 which causes get_insns to return the insns for the sequence,
11994 not the function, which will cause leaf_function_p to return
11995 the incorrect result.
11996
11997 to know about leaf functions once reload has completed, and the
11998 frame size cannot be changed after that time, so we can safely
11999 use the cached value. */
12000
12001 if (reload_completed)
12002 return offsets;
12003
12004 /* Initially this is the size of the local variables. It will translated
12005 into an offset once we have determined the size of preceding data. */
12006 frame_size = ROUND_UP_WORD (get_frame_size ());
12007
12008 leaf = leaf_function_p ();
12009
12010 /* Space for variadic functions. */
12011 offsets->saved_args = current_function_pretend_args_size;
12012
12013 /* In Thumb mode this is incorrect, but never used. */
12014 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0);
12015
12016 if (TARGET_32BIT)
12017 {
12018 unsigned int regno;
12019
12020 saved = bit_count (arm_compute_save_reg_mask ()) * 4;
12021
12022 /* We know that SP will be doubleword aligned on entry, and we must
12023 preserve that condition at any subroutine call. We also require the
12024 soft frame pointer to be doubleword aligned. */
12025
12026 if (TARGET_REALLY_IWMMXT)
12027 {
12028 /* Check for the call-saved iWMMXt registers. */
12029 for (regno = FIRST_IWMMXT_REGNUM;
12030 regno <= LAST_IWMMXT_REGNUM;
12031 regno++)
12032 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12033 saved += 8;
12034 }
12035
12036 func_type = arm_current_func_type ();
12037 if (! IS_VOLATILE (func_type))
12038 {
12039 /* Space for saved FPA registers. */
12040 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
12041 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12042 saved += 12;
12043
12044 /* Space for saved VFP registers. */
12045 if (TARGET_HARD_FLOAT && TARGET_VFP)
12046 saved += arm_get_vfp_saved_size ();
12047 }
12048 }
12049 else /* TARGET_THUMB1 */
12050 {
12051 saved = bit_count (thumb1_compute_save_reg_mask ()) * 4;
12052 if (TARGET_BACKTRACE)
12053 saved += 16;
12054 }
12055
12056 /* Saved registers include the stack frame. */
12057 offsets->saved_regs = offsets->saved_args + saved;
12058 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
12059 /* A leaf function does not need any stack alignment if it has nothing
12060 on the stack. */
12061 if (leaf && frame_size == 0)
12062 {
12063 offsets->outgoing_args = offsets->soft_frame;
12064 offsets->locals_base = offsets->soft_frame;
12065 return offsets;
12066 }
12067
12068 /* Ensure SFP has the correct alignment. */
12069 if (ARM_DOUBLEWORD_ALIGN
12070 && (offsets->soft_frame & 7))
12071 offsets->soft_frame += 4;
12072
12073 offsets->locals_base = offsets->soft_frame + frame_size;
12074 offsets->outgoing_args = (offsets->locals_base
12075 + current_function_outgoing_args_size);
12076
12077 if (ARM_DOUBLEWORD_ALIGN)
12078 {
12079 /* Ensure SP remains doubleword aligned. */
12080 if (offsets->outgoing_args & 7)
12081 offsets->outgoing_args += 4;
12082 gcc_assert (!(offsets->outgoing_args & 7));
12083 }
12084
12085 return offsets;
12086 }
12087
12088
12089 /* Calculate the relative offsets for the different stack pointers. Positive
12090 offsets are in the direction of stack growth. */
12091
12092 HOST_WIDE_INT
12093 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
12094 {
12095 arm_stack_offsets *offsets;
12096
12097 offsets = arm_get_frame_offsets ();
12098
12099 /* OK, now we have enough information to compute the distances.
12100 There must be an entry in these switch tables for each pair
12101 of registers in ELIMINABLE_REGS, even if some of the entries
12102 seem to be redundant or useless. */
12103 switch (from)
12104 {
12105 case ARG_POINTER_REGNUM:
12106 switch (to)
12107 {
12108 case THUMB_HARD_FRAME_POINTER_REGNUM:
12109 return 0;
12110
12111 case FRAME_POINTER_REGNUM:
12112 /* This is the reverse of the soft frame pointer
12113 to hard frame pointer elimination below. */
12114 return offsets->soft_frame - offsets->saved_args;
12115
12116 case ARM_HARD_FRAME_POINTER_REGNUM:
12117 /* If there is no stack frame then the hard
12118 frame pointer and the arg pointer coincide. */
12119 if (offsets->frame == offsets->saved_regs)
12120 return 0;
12121 /* FIXME: Not sure about this. Maybe we should always return 0 ? */
12122 return (frame_pointer_needed
12123 && cfun->static_chain_decl != NULL
12124 && ! cfun->machine->uses_anonymous_args) ? 4 : 0;
12125
12126 case STACK_POINTER_REGNUM:
12127 /* If nothing has been pushed on the stack at all
12128 then this will return -4. This *is* correct! */
12129 return offsets->outgoing_args - (offsets->saved_args + 4);
12130
12131 default:
12132 gcc_unreachable ();
12133 }
12134 gcc_unreachable ();
12135
12136 case FRAME_POINTER_REGNUM:
12137 switch (to)
12138 {
12139 case THUMB_HARD_FRAME_POINTER_REGNUM:
12140 return 0;
12141
12142 case ARM_HARD_FRAME_POINTER_REGNUM:
12143 /* The hard frame pointer points to the top entry in the
12144 stack frame. The soft frame pointer to the bottom entry
12145 in the stack frame. If there is no stack frame at all,
12146 then they are identical. */
12147
12148 return offsets->frame - offsets->soft_frame;
12149
12150 case STACK_POINTER_REGNUM:
12151 return offsets->outgoing_args - offsets->soft_frame;
12152
12153 default:
12154 gcc_unreachable ();
12155 }
12156 gcc_unreachable ();
12157
12158 default:
12159 /* You cannot eliminate from the stack pointer.
12160 In theory you could eliminate from the hard frame
12161 pointer to the stack pointer, but this will never
12162 happen, since if a stack frame is not needed the
12163 hard frame pointer will never be used. */
12164 gcc_unreachable ();
12165 }
12166 }
12167
12168
12169 /* Emit RTL to save coprocessor registers on function entry. Returns the
12170 number of bytes pushed. */
12171
12172 static int
12173 arm_save_coproc_regs(void)
12174 {
12175 int saved_size = 0;
12176 unsigned reg;
12177 unsigned start_reg;
12178 rtx insn;
12179
12180 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
12181 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
12182 {
12183 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
12184 insn = gen_rtx_MEM (V2SImode, insn);
12185 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
12186 RTX_FRAME_RELATED_P (insn) = 1;
12187 saved_size += 8;
12188 }
12189
12190 /* Save any floating point call-saved registers used by this
12191 function. */
12192 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
12193 {
12194 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12195 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12196 {
12197 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
12198 insn = gen_rtx_MEM (XFmode, insn);
12199 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
12200 RTX_FRAME_RELATED_P (insn) = 1;
12201 saved_size += 12;
12202 }
12203 }
12204 else
12205 {
12206 start_reg = LAST_FPA_REGNUM;
12207
12208 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12209 {
12210 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12211 {
12212 if (start_reg - reg == 3)
12213 {
12214 insn = emit_sfm (reg, 4);
12215 RTX_FRAME_RELATED_P (insn) = 1;
12216 saved_size += 48;
12217 start_reg = reg - 1;
12218 }
12219 }
12220 else
12221 {
12222 if (start_reg != reg)
12223 {
12224 insn = emit_sfm (reg + 1, start_reg - reg);
12225 RTX_FRAME_RELATED_P (insn) = 1;
12226 saved_size += (start_reg - reg) * 12;
12227 }
12228 start_reg = reg - 1;
12229 }
12230 }
12231
12232 if (start_reg != reg)
12233 {
12234 insn = emit_sfm (reg + 1, start_reg - reg);
12235 saved_size += (start_reg - reg) * 12;
12236 RTX_FRAME_RELATED_P (insn) = 1;
12237 }
12238 }
12239 if (TARGET_HARD_FLOAT && TARGET_VFP)
12240 {
12241 start_reg = FIRST_VFP_REGNUM;
12242
12243 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
12244 {
12245 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
12246 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
12247 {
12248 if (start_reg != reg)
12249 saved_size += vfp_emit_fstmd (start_reg,
12250 (reg - start_reg) / 2);
12251 start_reg = reg + 2;
12252 }
12253 }
12254 if (start_reg != reg)
12255 saved_size += vfp_emit_fstmd (start_reg,
12256 (reg - start_reg) / 2);
12257 }
12258 return saved_size;
12259 }
12260
12261
12262 /* Set the Thumb frame pointer from the stack pointer. */
12263
12264 static void
12265 thumb_set_frame_pointer (arm_stack_offsets *offsets)
12266 {
12267 HOST_WIDE_INT amount;
12268 rtx insn, dwarf;
12269
12270 amount = offsets->outgoing_args - offsets->locals_base;
12271 if (amount < 1024)
12272 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12273 stack_pointer_rtx, GEN_INT (amount)));
12274 else
12275 {
12276 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
12277 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12278 hard_frame_pointer_rtx,
12279 stack_pointer_rtx));
12280 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
12281 plus_constant (stack_pointer_rtx, amount));
12282 RTX_FRAME_RELATED_P (dwarf) = 1;
12283 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
12284 REG_NOTES (insn));
12285 }
12286
12287 RTX_FRAME_RELATED_P (insn) = 1;
12288 }
12289
12290 /* Generate the prologue instructions for entry into an ARM or Thumb-2
12291 function. */
12292 void
12293 arm_expand_prologue (void)
12294 {
12295 rtx amount;
12296 rtx insn;
12297 rtx ip_rtx;
12298 unsigned long live_regs_mask;
12299 unsigned long func_type;
12300 int fp_offset = 0;
12301 int saved_pretend_args = 0;
12302 int saved_regs = 0;
12303 unsigned HOST_WIDE_INT args_to_push;
12304 arm_stack_offsets *offsets;
12305
12306 func_type = arm_current_func_type ();
12307
12308 /* Naked functions don't have prologues. */
12309 if (IS_NAKED (func_type))
12310 return;
12311
12312 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
12313 args_to_push = current_function_pretend_args_size;
12314
12315 /* Compute which register we will have to save onto the stack. */
12316 live_regs_mask = arm_compute_save_reg_mask ();
12317
12318 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
12319
12320 if (IS_STACKALIGN (func_type))
12321 {
12322 rtx dwarf;
12323 rtx r0;
12324 rtx r1;
12325 /* Handle a word-aligned stack pointer. We generate the following:
12326
12327 mov r0, sp
12328 bic r1, r0, #7
12329 mov sp, r1
12330 <save and restore r0 in normal prologue/epilogue>
12331 mov sp, r0
12332 bx lr
12333
12334 The unwinder doesn't need to know about the stack realignment.
12335 Just tell it we saved SP in r0. */
12336 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
12337
12338 r0 = gen_rtx_REG (SImode, 0);
12339 r1 = gen_rtx_REG (SImode, 1);
12340 dwarf = gen_rtx_UNSPEC (SImode, NULL_RTVEC, UNSPEC_STACK_ALIGN);
12341 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
12342 insn = gen_movsi (r0, stack_pointer_rtx);
12343 RTX_FRAME_RELATED_P (insn) = 1;
12344 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
12345 dwarf, REG_NOTES (insn));
12346 emit_insn (insn);
12347 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
12348 emit_insn (gen_movsi (stack_pointer_rtx, r1));
12349 }
12350
12351 if (frame_pointer_needed && TARGET_ARM)
12352 {
12353 if (IS_INTERRUPT (func_type))
12354 {
12355 /* Interrupt functions must not corrupt any registers.
12356 Creating a frame pointer however, corrupts the IP
12357 register, so we must push it first. */
12358 insn = emit_multi_reg_push (1 << IP_REGNUM);
12359
12360 /* Do not set RTX_FRAME_RELATED_P on this insn.
12361 The dwarf stack unwinding code only wants to see one
12362 stack decrement per function, and this is not it. If
12363 this instruction is labeled as being part of the frame
12364 creation sequence then dwarf2out_frame_debug_expr will
12365 die when it encounters the assignment of IP to FP
12366 later on, since the use of SP here establishes SP as
12367 the CFA register and not IP.
12368
12369 Anyway this instruction is not really part of the stack
12370 frame creation although it is part of the prologue. */
12371 }
12372 else if (IS_NESTED (func_type))
12373 {
12374 /* The Static chain register is the same as the IP register
12375 used as a scratch register during stack frame creation.
12376 To get around this need to find somewhere to store IP
12377 whilst the frame is being created. We try the following
12378 places in order:
12379
12380 1. The last argument register.
12381 2. A slot on the stack above the frame. (This only
12382 works if the function is not a varargs function).
12383 3. Register r3, after pushing the argument registers
12384 onto the stack.
12385
12386 Note - we only need to tell the dwarf2 backend about the SP
12387 adjustment in the second variant; the static chain register
12388 doesn't need to be unwound, as it doesn't contain a value
12389 inherited from the caller. */
12390
12391 if (df_regs_ever_live_p (3) == false)
12392 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
12393 else if (args_to_push == 0)
12394 {
12395 rtx dwarf;
12396
12397 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
12398 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
12399 fp_offset = 4;
12400
12401 /* Just tell the dwarf backend that we adjusted SP. */
12402 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12403 plus_constant (stack_pointer_rtx,
12404 -fp_offset));
12405 RTX_FRAME_RELATED_P (insn) = 1;
12406 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
12407 dwarf, REG_NOTES (insn));
12408 }
12409 else
12410 {
12411 /* Store the args on the stack. */
12412 if (cfun->machine->uses_anonymous_args)
12413 insn = emit_multi_reg_push
12414 ((0xf0 >> (args_to_push / 4)) & 0xf);
12415 else
12416 insn = emit_insn
12417 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12418 GEN_INT (- args_to_push)));
12419
12420 RTX_FRAME_RELATED_P (insn) = 1;
12421
12422 saved_pretend_args = 1;
12423 fp_offset = args_to_push;
12424 args_to_push = 0;
12425
12426 /* Now reuse r3 to preserve IP. */
12427 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
12428 }
12429 }
12430
12431 insn = emit_set_insn (ip_rtx,
12432 plus_constant (stack_pointer_rtx, fp_offset));
12433 RTX_FRAME_RELATED_P (insn) = 1;
12434 }
12435
12436 if (args_to_push)
12437 {
12438 /* Push the argument registers, or reserve space for them. */
12439 if (cfun->machine->uses_anonymous_args)
12440 insn = emit_multi_reg_push
12441 ((0xf0 >> (args_to_push / 4)) & 0xf);
12442 else
12443 insn = emit_insn
12444 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12445 GEN_INT (- args_to_push)));
12446 RTX_FRAME_RELATED_P (insn) = 1;
12447 }
12448
12449 /* If this is an interrupt service routine, and the link register
12450 is going to be pushed, and we are not creating a stack frame,
12451 (which would involve an extra push of IP and a pop in the epilogue)
12452 subtracting four from LR now will mean that the function return
12453 can be done with a single instruction. */
12454 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
12455 && (live_regs_mask & (1 << LR_REGNUM)) != 0
12456 && ! frame_pointer_needed
12457 && TARGET_ARM)
12458 {
12459 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
12460
12461 emit_set_insn (lr, plus_constant (lr, -4));
12462 }
12463
12464 if (live_regs_mask)
12465 {
12466 insn = emit_multi_reg_push (live_regs_mask);
12467 saved_regs += bit_count (live_regs_mask) * 4;
12468 RTX_FRAME_RELATED_P (insn) = 1;
12469 }
12470
12471 if (! IS_VOLATILE (func_type))
12472 saved_regs += arm_save_coproc_regs ();
12473
12474 if (frame_pointer_needed && TARGET_ARM)
12475 {
12476 /* Create the new frame pointer. */
12477 {
12478 insn = GEN_INT (-(4 + args_to_push + fp_offset));
12479 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
12480 RTX_FRAME_RELATED_P (insn) = 1;
12481
12482 if (IS_NESTED (func_type))
12483 {
12484 /* Recover the static chain register. */
12485 if (!df_regs_ever_live_p (3)
12486 || saved_pretend_args)
12487 insn = gen_rtx_REG (SImode, 3);
12488 else /* if (current_function_pretend_args_size == 0) */
12489 {
12490 insn = plus_constant (hard_frame_pointer_rtx, 4);
12491 insn = gen_frame_mem (SImode, insn);
12492 }
12493 emit_set_insn (ip_rtx, insn);
12494 /* Add a USE to stop propagate_one_insn() from barfing. */
12495 emit_insn (gen_prologue_use (ip_rtx));
12496 }
12497 }
12498 }
12499
12500 offsets = arm_get_frame_offsets ();
12501 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
12502 {
12503 /* This add can produce multiple insns for a large constant, so we
12504 need to get tricky. */
12505 rtx last = get_last_insn ();
12506
12507 amount = GEN_INT (offsets->saved_args + saved_regs
12508 - offsets->outgoing_args);
12509
12510 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12511 amount));
12512 do
12513 {
12514 last = last ? NEXT_INSN (last) : get_insns ();
12515 RTX_FRAME_RELATED_P (last) = 1;
12516 }
12517 while (last != insn);
12518
12519 /* If the frame pointer is needed, emit a special barrier that
12520 will prevent the scheduler from moving stores to the frame
12521 before the stack adjustment. */
12522 if (frame_pointer_needed)
12523 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
12524 hard_frame_pointer_rtx));
12525 }
12526
12527
12528 if (frame_pointer_needed && TARGET_THUMB2)
12529 thumb_set_frame_pointer (offsets);
12530
12531 if (flag_pic && arm_pic_register != INVALID_REGNUM)
12532 {
12533 unsigned long mask;
12534
12535 mask = live_regs_mask;
12536 mask &= THUMB2_WORK_REGS;
12537 if (!IS_NESTED (func_type))
12538 mask |= (1 << IP_REGNUM);
12539 arm_load_pic_register (mask);
12540 }
12541
12542 /* If we are profiling, make sure no instructions are scheduled before
12543 the call to mcount. Similarly if the user has requested no
12544 scheduling in the prolog. Similarly if we want non-call exceptions
12545 using the EABI unwinder, to prevent faulting instructions from being
12546 swapped with a stack adjustment. */
12547 if (current_function_profile || !TARGET_SCHED_PROLOG
12548 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
12549 emit_insn (gen_blockage ());
12550
12551 /* If the link register is being kept alive, with the return address in it,
12552 then make sure that it does not get reused by the ce2 pass. */
12553 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
12554 cfun->machine->lr_save_eliminated = 1;
12555 }
12556 \f
12557 /* Print condition code to STREAM. Helper function for arm_print_operand. */
12558 static void
12559 arm_print_condition (FILE *stream)
12560 {
12561 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
12562 {
12563 /* Branch conversion is not implemented for Thumb-2. */
12564 if (TARGET_THUMB)
12565 {
12566 output_operand_lossage ("predicated Thumb instruction");
12567 return;
12568 }
12569 if (current_insn_predicate != NULL)
12570 {
12571 output_operand_lossage
12572 ("predicated instruction in conditional sequence");
12573 return;
12574 }
12575
12576 fputs (arm_condition_codes[arm_current_cc], stream);
12577 }
12578 else if (current_insn_predicate)
12579 {
12580 enum arm_cond_code code;
12581
12582 if (TARGET_THUMB1)
12583 {
12584 output_operand_lossage ("predicated Thumb instruction");
12585 return;
12586 }
12587
12588 code = get_arm_condition_code (current_insn_predicate);
12589 fputs (arm_condition_codes[code], stream);
12590 }
12591 }
12592
12593
12594 /* If CODE is 'd', then the X is a condition operand and the instruction
12595 should only be executed if the condition is true.
12596 if CODE is 'D', then the X is a condition operand and the instruction
12597 should only be executed if the condition is false: however, if the mode
12598 of the comparison is CCFPEmode, then always execute the instruction -- we
12599 do this because in these circumstances !GE does not necessarily imply LT;
12600 in these cases the instruction pattern will take care to make sure that
12601 an instruction containing %d will follow, thereby undoing the effects of
12602 doing this instruction unconditionally.
12603 If CODE is 'N' then X is a floating point operand that must be negated
12604 before output.
12605 If CODE is 'B' then output a bitwise inverted value of X (a const int).
12606 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
12607 void
12608 arm_print_operand (FILE *stream, rtx x, int code)
12609 {
12610 switch (code)
12611 {
12612 case '@':
12613 fputs (ASM_COMMENT_START, stream);
12614 return;
12615
12616 case '_':
12617 fputs (user_label_prefix, stream);
12618 return;
12619
12620 case '|':
12621 fputs (REGISTER_PREFIX, stream);
12622 return;
12623
12624 case '?':
12625 arm_print_condition (stream);
12626 return;
12627
12628 case '(':
12629 /* Nothing in unified syntax, otherwise the current condition code. */
12630 if (!TARGET_UNIFIED_ASM)
12631 arm_print_condition (stream);
12632 break;
12633
12634 case ')':
12635 /* The current condition code in unified syntax, otherwise nothing. */
12636 if (TARGET_UNIFIED_ASM)
12637 arm_print_condition (stream);
12638 break;
12639
12640 case '.':
12641 /* The current condition code for a condition code setting instruction.
12642 Preceded by 's' in unified syntax, otherwise followed by 's'. */
12643 if (TARGET_UNIFIED_ASM)
12644 {
12645 fputc('s', stream);
12646 arm_print_condition (stream);
12647 }
12648 else
12649 {
12650 arm_print_condition (stream);
12651 fputc('s', stream);
12652 }
12653 return;
12654
12655 case '!':
12656 /* If the instruction is conditionally executed then print
12657 the current condition code, otherwise print 's'. */
12658 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
12659 if (current_insn_predicate)
12660 arm_print_condition (stream);
12661 else
12662 fputc('s', stream);
12663 break;
12664
12665 /* %# is a "break" sequence. It doesn't output anything, but is used to
12666 separate e.g. operand numbers from following text, if that text consists
12667 of further digits which we don't want to be part of the operand
12668 number. */
12669 case '#':
12670 return;
12671
12672 case 'N':
12673 {
12674 REAL_VALUE_TYPE r;
12675 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12676 r = REAL_VALUE_NEGATE (r);
12677 fprintf (stream, "%s", fp_const_from_val (&r));
12678 }
12679 return;
12680
12681 /* An integer without a preceding # sign. */
12682 case 'c':
12683 gcc_assert (GET_CODE (x) == CONST_INT);
12684 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12685 return;
12686
12687 case 'B':
12688 if (GET_CODE (x) == CONST_INT)
12689 {
12690 HOST_WIDE_INT val;
12691 val = ARM_SIGN_EXTEND (~INTVAL (x));
12692 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
12693 }
12694 else
12695 {
12696 putc ('~', stream);
12697 output_addr_const (stream, x);
12698 }
12699 return;
12700
12701 case 'L':
12702 /* The low 16 bits of an immediate constant. */
12703 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
12704 return;
12705
12706 case 'i':
12707 fprintf (stream, "%s", arithmetic_instr (x, 1));
12708 return;
12709
12710 /* Truncate Cirrus shift counts. */
12711 case 's':
12712 if (GET_CODE (x) == CONST_INT)
12713 {
12714 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
12715 return;
12716 }
12717 arm_print_operand (stream, x, 0);
12718 return;
12719
12720 case 'I':
12721 fprintf (stream, "%s", arithmetic_instr (x, 0));
12722 return;
12723
12724 case 'S':
12725 {
12726 HOST_WIDE_INT val;
12727 const char *shift;
12728
12729 if (!shift_operator (x, SImode))
12730 {
12731 output_operand_lossage ("invalid shift operand");
12732 break;
12733 }
12734
12735 shift = shift_op (x, &val);
12736
12737 if (shift)
12738 {
12739 fprintf (stream, ", %s ", shift);
12740 if (val == -1)
12741 arm_print_operand (stream, XEXP (x, 1), 0);
12742 else
12743 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
12744 }
12745 }
12746 return;
12747
12748 /* An explanation of the 'Q', 'R' and 'H' register operands:
12749
12750 In a pair of registers containing a DI or DF value the 'Q'
12751 operand returns the register number of the register containing
12752 the least significant part of the value. The 'R' operand returns
12753 the register number of the register containing the most
12754 significant part of the value.
12755
12756 The 'H' operand returns the higher of the two register numbers.
12757 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
12758 same as the 'Q' operand, since the most significant part of the
12759 value is held in the lower number register. The reverse is true
12760 on systems where WORDS_BIG_ENDIAN is false.
12761
12762 The purpose of these operands is to distinguish between cases
12763 where the endian-ness of the values is important (for example
12764 when they are added together), and cases where the endian-ness
12765 is irrelevant, but the order of register operations is important.
12766 For example when loading a value from memory into a register
12767 pair, the endian-ness does not matter. Provided that the value
12768 from the lower memory address is put into the lower numbered
12769 register, and the value from the higher address is put into the
12770 higher numbered register, the load will work regardless of whether
12771 the value being loaded is big-wordian or little-wordian. The
12772 order of the two register loads can matter however, if the address
12773 of the memory location is actually held in one of the registers
12774 being overwritten by the load. */
12775 case 'Q':
12776 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12777 {
12778 output_operand_lossage ("invalid operand for code '%c'", code);
12779 return;
12780 }
12781
12782 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
12783 return;
12784
12785 case 'R':
12786 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12787 {
12788 output_operand_lossage ("invalid operand for code '%c'", code);
12789 return;
12790 }
12791
12792 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
12793 return;
12794
12795 case 'H':
12796 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12797 {
12798 output_operand_lossage ("invalid operand for code '%c'", code);
12799 return;
12800 }
12801
12802 asm_fprintf (stream, "%r", REGNO (x) + 1);
12803 return;
12804
12805 case 'J':
12806 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12807 {
12808 output_operand_lossage ("invalid operand for code '%c'", code);
12809 return;
12810 }
12811
12812 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
12813 return;
12814
12815 case 'K':
12816 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12817 {
12818 output_operand_lossage ("invalid operand for code '%c'", code);
12819 return;
12820 }
12821
12822 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
12823 return;
12824
12825 case 'm':
12826 asm_fprintf (stream, "%r",
12827 GET_CODE (XEXP (x, 0)) == REG
12828 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
12829 return;
12830
12831 case 'M':
12832 asm_fprintf (stream, "{%r-%r}",
12833 REGNO (x),
12834 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
12835 return;
12836
12837 /* Like 'M', but writing doubleword vector registers, for use by Neon
12838 insns. */
12839 case 'h':
12840 {
12841 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
12842 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
12843 if (numregs == 1)
12844 asm_fprintf (stream, "{d%d}", regno);
12845 else
12846 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
12847 }
12848 return;
12849
12850 case 'd':
12851 /* CONST_TRUE_RTX means always -- that's the default. */
12852 if (x == const_true_rtx)
12853 return;
12854
12855 if (!COMPARISON_P (x))
12856 {
12857 output_operand_lossage ("invalid operand for code '%c'", code);
12858 return;
12859 }
12860
12861 fputs (arm_condition_codes[get_arm_condition_code (x)],
12862 stream);
12863 return;
12864
12865 case 'D':
12866 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
12867 want to do that. */
12868 if (x == const_true_rtx)
12869 {
12870 output_operand_lossage ("instruction never executed");
12871 return;
12872 }
12873 if (!COMPARISON_P (x))
12874 {
12875 output_operand_lossage ("invalid operand for code '%c'", code);
12876 return;
12877 }
12878
12879 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
12880 (get_arm_condition_code (x))],
12881 stream);
12882 return;
12883
12884 /* Cirrus registers can be accessed in a variety of ways:
12885 single floating point (f)
12886 double floating point (d)
12887 32bit integer (fx)
12888 64bit integer (dx). */
12889 case 'W': /* Cirrus register in F mode. */
12890 case 'X': /* Cirrus register in D mode. */
12891 case 'Y': /* Cirrus register in FX mode. */
12892 case 'Z': /* Cirrus register in DX mode. */
12893 gcc_assert (GET_CODE (x) == REG
12894 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
12895
12896 fprintf (stream, "mv%s%s",
12897 code == 'W' ? "f"
12898 : code == 'X' ? "d"
12899 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
12900
12901 return;
12902
12903 /* Print cirrus register in the mode specified by the register's mode. */
12904 case 'V':
12905 {
12906 int mode = GET_MODE (x);
12907
12908 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
12909 {
12910 output_operand_lossage ("invalid operand for code '%c'", code);
12911 return;
12912 }
12913
12914 fprintf (stream, "mv%s%s",
12915 mode == DFmode ? "d"
12916 : mode == SImode ? "fx"
12917 : mode == DImode ? "dx"
12918 : "f", reg_names[REGNO (x)] + 2);
12919
12920 return;
12921 }
12922
12923 case 'U':
12924 if (GET_CODE (x) != REG
12925 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
12926 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
12927 /* Bad value for wCG register number. */
12928 {
12929 output_operand_lossage ("invalid operand for code '%c'", code);
12930 return;
12931 }
12932
12933 else
12934 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
12935 return;
12936
12937 /* Print an iWMMXt control register name. */
12938 case 'w':
12939 if (GET_CODE (x) != CONST_INT
12940 || INTVAL (x) < 0
12941 || INTVAL (x) >= 16)
12942 /* Bad value for wC register number. */
12943 {
12944 output_operand_lossage ("invalid operand for code '%c'", code);
12945 return;
12946 }
12947
12948 else
12949 {
12950 static const char * wc_reg_names [16] =
12951 {
12952 "wCID", "wCon", "wCSSF", "wCASF",
12953 "wC4", "wC5", "wC6", "wC7",
12954 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
12955 "wC12", "wC13", "wC14", "wC15"
12956 };
12957
12958 fprintf (stream, wc_reg_names [INTVAL (x)]);
12959 }
12960 return;
12961
12962 /* Print a VFP/Neon double precision or quad precision register name. */
12963 case 'P':
12964 case 'q':
12965 {
12966 int mode = GET_MODE (x);
12967 int is_quad = (code == 'q');
12968 int regno;
12969
12970 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
12971 {
12972 output_operand_lossage ("invalid operand for code '%c'", code);
12973 return;
12974 }
12975
12976 if (GET_CODE (x) != REG
12977 || !IS_VFP_REGNUM (REGNO (x)))
12978 {
12979 output_operand_lossage ("invalid operand for code '%c'", code);
12980 return;
12981 }
12982
12983 regno = REGNO (x);
12984 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
12985 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
12986 {
12987 output_operand_lossage ("invalid operand for code '%c'", code);
12988 return;
12989 }
12990
12991 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
12992 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
12993 }
12994 return;
12995
12996 /* These two codes print the low/high doubleword register of a Neon quad
12997 register, respectively. For pair-structure types, can also print
12998 low/high quadword registers. */
12999 case 'e':
13000 case 'f':
13001 {
13002 int mode = GET_MODE (x);
13003 int regno;
13004
13005 if ((GET_MODE_SIZE (mode) != 16
13006 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
13007 {
13008 output_operand_lossage ("invalid operand for code '%c'", code);
13009 return;
13010 }
13011
13012 regno = REGNO (x);
13013 if (!NEON_REGNO_OK_FOR_QUAD (regno))
13014 {
13015 output_operand_lossage ("invalid operand for code '%c'", code);
13016 return;
13017 }
13018
13019 if (GET_MODE_SIZE (mode) == 16)
13020 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
13021 + (code == 'f' ? 1 : 0));
13022 else
13023 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
13024 + (code == 'f' ? 1 : 0));
13025 }
13026 return;
13027
13028 /* Print a VFPv3 floating-point constant, represented as an integer
13029 index. */
13030 case 'G':
13031 {
13032 int index = vfp3_const_double_index (x);
13033 gcc_assert (index != -1);
13034 fprintf (stream, "%d", index);
13035 }
13036 return;
13037
13038 /* Print bits representing opcode features for Neon.
13039
13040 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
13041 and polynomials as unsigned.
13042
13043 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
13044
13045 Bit 2 is 1 for rounding functions, 0 otherwise. */
13046
13047 /* Identify the type as 's', 'u', 'p' or 'f'. */
13048 case 'T':
13049 {
13050 HOST_WIDE_INT bits = INTVAL (x);
13051 fputc ("uspf"[bits & 3], stream);
13052 }
13053 return;
13054
13055 /* Likewise, but signed and unsigned integers are both 'i'. */
13056 case 'F':
13057 {
13058 HOST_WIDE_INT bits = INTVAL (x);
13059 fputc ("iipf"[bits & 3], stream);
13060 }
13061 return;
13062
13063 /* As for 'T', but emit 'u' instead of 'p'. */
13064 case 't':
13065 {
13066 HOST_WIDE_INT bits = INTVAL (x);
13067 fputc ("usuf"[bits & 3], stream);
13068 }
13069 return;
13070
13071 /* Bit 2: rounding (vs none). */
13072 case 'O':
13073 {
13074 HOST_WIDE_INT bits = INTVAL (x);
13075 fputs ((bits & 4) != 0 ? "r" : "", stream);
13076 }
13077 return;
13078
13079 default:
13080 if (x == 0)
13081 {
13082 output_operand_lossage ("missing operand");
13083 return;
13084 }
13085
13086 switch (GET_CODE (x))
13087 {
13088 case REG:
13089 asm_fprintf (stream, "%r", REGNO (x));
13090 break;
13091
13092 case MEM:
13093 output_memory_reference_mode = GET_MODE (x);
13094 output_address (XEXP (x, 0));
13095 break;
13096
13097 case CONST_DOUBLE:
13098 if (TARGET_NEON)
13099 {
13100 char fpstr[20];
13101 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
13102 sizeof (fpstr), 0, 1);
13103 fprintf (stream, "#%s", fpstr);
13104 }
13105 else
13106 fprintf (stream, "#%s", fp_immediate_constant (x));
13107 break;
13108
13109 default:
13110 gcc_assert (GET_CODE (x) != NEG);
13111 fputc ('#', stream);
13112 output_addr_const (stream, x);
13113 break;
13114 }
13115 }
13116 }
13117 \f
13118 #ifndef AOF_ASSEMBLER
13119 /* Target hook for assembling integer objects. The ARM version needs to
13120 handle word-sized values specially. */
13121 static bool
13122 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
13123 {
13124 enum machine_mode mode;
13125
13126 if (size == UNITS_PER_WORD && aligned_p)
13127 {
13128 fputs ("\t.word\t", asm_out_file);
13129 output_addr_const (asm_out_file, x);
13130
13131 /* Mark symbols as position independent. We only do this in the
13132 .text segment, not in the .data segment. */
13133 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
13134 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
13135 {
13136 /* See legitimize_pic_address for an explanation of the
13137 TARGET_VXWORKS_RTP check. */
13138 if (TARGET_VXWORKS_RTP
13139 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
13140 fputs ("(GOT)", asm_out_file);
13141 else
13142 fputs ("(GOTOFF)", asm_out_file);
13143 }
13144 fputc ('\n', asm_out_file);
13145 return true;
13146 }
13147
13148 mode = GET_MODE (x);
13149
13150 if (arm_vector_mode_supported_p (mode))
13151 {
13152 int i, units;
13153 unsigned int invmask = 0, parts_per_word;
13154
13155 gcc_assert (GET_CODE (x) == CONST_VECTOR);
13156
13157 units = CONST_VECTOR_NUNITS (x);
13158 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
13159
13160 /* For big-endian Neon vectors, we must permute the vector to the form
13161 which, when loaded by a VLDR or VLDM instruction, will give a vector
13162 with the elements in the right order. */
13163 if (TARGET_NEON && WORDS_BIG_ENDIAN)
13164 {
13165 parts_per_word = UNITS_PER_WORD / size;
13166 /* FIXME: This might be wrong for 64-bit vector elements, but we don't
13167 support those anywhere yet. */
13168 invmask = (parts_per_word == 0) ? 0 : (1 << (parts_per_word - 1)) - 1;
13169 }
13170
13171 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13172 for (i = 0; i < units; i++)
13173 {
13174 rtx elt = CONST_VECTOR_ELT (x, i ^ invmask);
13175 assemble_integer
13176 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
13177 }
13178 else
13179 for (i = 0; i < units; i++)
13180 {
13181 rtx elt = CONST_VECTOR_ELT (x, i);
13182 REAL_VALUE_TYPE rval;
13183
13184 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
13185
13186 assemble_real
13187 (rval, GET_MODE_INNER (mode),
13188 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
13189 }
13190
13191 return true;
13192 }
13193
13194 return default_assemble_integer (x, size, aligned_p);
13195 }
13196
13197 static void
13198 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
13199 {
13200 section *s;
13201
13202 if (!TARGET_AAPCS_BASED)
13203 {
13204 (is_ctor ?
13205 default_named_section_asm_out_constructor
13206 : default_named_section_asm_out_destructor) (symbol, priority);
13207 return;
13208 }
13209
13210 /* Put these in the .init_array section, using a special relocation. */
13211 if (priority != DEFAULT_INIT_PRIORITY)
13212 {
13213 char buf[18];
13214 sprintf (buf, "%s.%.5u",
13215 is_ctor ? ".init_array" : ".fini_array",
13216 priority);
13217 s = get_section (buf, SECTION_WRITE, NULL_TREE);
13218 }
13219 else if (is_ctor)
13220 s = ctors_section;
13221 else
13222 s = dtors_section;
13223
13224 switch_to_section (s);
13225 assemble_align (POINTER_SIZE);
13226 fputs ("\t.word\t", asm_out_file);
13227 output_addr_const (asm_out_file, symbol);
13228 fputs ("(target1)\n", asm_out_file);
13229 }
13230
13231 /* Add a function to the list of static constructors. */
13232
13233 static void
13234 arm_elf_asm_constructor (rtx symbol, int priority)
13235 {
13236 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
13237 }
13238
13239 /* Add a function to the list of static destructors. */
13240
13241 static void
13242 arm_elf_asm_destructor (rtx symbol, int priority)
13243 {
13244 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
13245 }
13246 #endif
13247 \f
13248 /* A finite state machine takes care of noticing whether or not instructions
13249 can be conditionally executed, and thus decrease execution time and code
13250 size by deleting branch instructions. The fsm is controlled by
13251 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
13252
13253 /* The state of the fsm controlling condition codes are:
13254 0: normal, do nothing special
13255 1: make ASM_OUTPUT_OPCODE not output this instruction
13256 2: make ASM_OUTPUT_OPCODE not output this instruction
13257 3: make instructions conditional
13258 4: make instructions conditional
13259
13260 State transitions (state->state by whom under condition):
13261 0 -> 1 final_prescan_insn if the `target' is a label
13262 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
13263 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
13264 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
13265 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
13266 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
13267 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
13268 (the target insn is arm_target_insn).
13269
13270 If the jump clobbers the conditions then we use states 2 and 4.
13271
13272 A similar thing can be done with conditional return insns.
13273
13274 XXX In case the `target' is an unconditional branch, this conditionalising
13275 of the instructions always reduces code size, but not always execution
13276 time. But then, I want to reduce the code size to somewhere near what
13277 /bin/cc produces. */
13278
13279 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
13280 instructions. When a COND_EXEC instruction is seen the subsequent
13281 instructions are scanned so that multiple conditional instructions can be
13282 combined into a single IT block. arm_condexec_count and arm_condexec_mask
13283 specify the length and true/false mask for the IT block. These will be
13284 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
13285
13286 /* Returns the index of the ARM condition code string in
13287 `arm_condition_codes'. COMPARISON should be an rtx like
13288 `(eq (...) (...))'. */
13289 static enum arm_cond_code
13290 get_arm_condition_code (rtx comparison)
13291 {
13292 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
13293 int code;
13294 enum rtx_code comp_code = GET_CODE (comparison);
13295
13296 if (GET_MODE_CLASS (mode) != MODE_CC)
13297 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
13298 XEXP (comparison, 1));
13299
13300 switch (mode)
13301 {
13302 case CC_DNEmode: code = ARM_NE; goto dominance;
13303 case CC_DEQmode: code = ARM_EQ; goto dominance;
13304 case CC_DGEmode: code = ARM_GE; goto dominance;
13305 case CC_DGTmode: code = ARM_GT; goto dominance;
13306 case CC_DLEmode: code = ARM_LE; goto dominance;
13307 case CC_DLTmode: code = ARM_LT; goto dominance;
13308 case CC_DGEUmode: code = ARM_CS; goto dominance;
13309 case CC_DGTUmode: code = ARM_HI; goto dominance;
13310 case CC_DLEUmode: code = ARM_LS; goto dominance;
13311 case CC_DLTUmode: code = ARM_CC;
13312
13313 dominance:
13314 gcc_assert (comp_code == EQ || comp_code == NE);
13315
13316 if (comp_code == EQ)
13317 return ARM_INVERSE_CONDITION_CODE (code);
13318 return code;
13319
13320 case CC_NOOVmode:
13321 switch (comp_code)
13322 {
13323 case NE: return ARM_NE;
13324 case EQ: return ARM_EQ;
13325 case GE: return ARM_PL;
13326 case LT: return ARM_MI;
13327 default: gcc_unreachable ();
13328 }
13329
13330 case CC_Zmode:
13331 switch (comp_code)
13332 {
13333 case NE: return ARM_NE;
13334 case EQ: return ARM_EQ;
13335 default: gcc_unreachable ();
13336 }
13337
13338 case CC_Nmode:
13339 switch (comp_code)
13340 {
13341 case NE: return ARM_MI;
13342 case EQ: return ARM_PL;
13343 default: gcc_unreachable ();
13344 }
13345
13346 case CCFPEmode:
13347 case CCFPmode:
13348 /* These encodings assume that AC=1 in the FPA system control
13349 byte. This allows us to handle all cases except UNEQ and
13350 LTGT. */
13351 switch (comp_code)
13352 {
13353 case GE: return ARM_GE;
13354 case GT: return ARM_GT;
13355 case LE: return ARM_LS;
13356 case LT: return ARM_MI;
13357 case NE: return ARM_NE;
13358 case EQ: return ARM_EQ;
13359 case ORDERED: return ARM_VC;
13360 case UNORDERED: return ARM_VS;
13361 case UNLT: return ARM_LT;
13362 case UNLE: return ARM_LE;
13363 case UNGT: return ARM_HI;
13364 case UNGE: return ARM_PL;
13365 /* UNEQ and LTGT do not have a representation. */
13366 case UNEQ: /* Fall through. */
13367 case LTGT: /* Fall through. */
13368 default: gcc_unreachable ();
13369 }
13370
13371 case CC_SWPmode:
13372 switch (comp_code)
13373 {
13374 case NE: return ARM_NE;
13375 case EQ: return ARM_EQ;
13376 case GE: return ARM_LE;
13377 case GT: return ARM_LT;
13378 case LE: return ARM_GE;
13379 case LT: return ARM_GT;
13380 case GEU: return ARM_LS;
13381 case GTU: return ARM_CC;
13382 case LEU: return ARM_CS;
13383 case LTU: return ARM_HI;
13384 default: gcc_unreachable ();
13385 }
13386
13387 case CC_Cmode:
13388 switch (comp_code)
13389 {
13390 case LTU: return ARM_CS;
13391 case GEU: return ARM_CC;
13392 default: gcc_unreachable ();
13393 }
13394
13395 case CCmode:
13396 switch (comp_code)
13397 {
13398 case NE: return ARM_NE;
13399 case EQ: return ARM_EQ;
13400 case GE: return ARM_GE;
13401 case GT: return ARM_GT;
13402 case LE: return ARM_LE;
13403 case LT: return ARM_LT;
13404 case GEU: return ARM_CS;
13405 case GTU: return ARM_HI;
13406 case LEU: return ARM_LS;
13407 case LTU: return ARM_CC;
13408 default: gcc_unreachable ();
13409 }
13410
13411 default: gcc_unreachable ();
13412 }
13413 }
13414
13415 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
13416 instructions. */
13417 void
13418 thumb2_final_prescan_insn (rtx insn)
13419 {
13420 rtx first_insn = insn;
13421 rtx body = PATTERN (insn);
13422 rtx predicate;
13423 enum arm_cond_code code;
13424 int n;
13425 int mask;
13426
13427 /* Remove the previous insn from the count of insns to be output. */
13428 if (arm_condexec_count)
13429 arm_condexec_count--;
13430
13431 /* Nothing to do if we are already inside a conditional block. */
13432 if (arm_condexec_count)
13433 return;
13434
13435 if (GET_CODE (body) != COND_EXEC)
13436 return;
13437
13438 /* Conditional jumps are implemented directly. */
13439 if (GET_CODE (insn) == JUMP_INSN)
13440 return;
13441
13442 predicate = COND_EXEC_TEST (body);
13443 arm_current_cc = get_arm_condition_code (predicate);
13444
13445 n = get_attr_ce_count (insn);
13446 arm_condexec_count = 1;
13447 arm_condexec_mask = (1 << n) - 1;
13448 arm_condexec_masklen = n;
13449 /* See if subsequent instructions can be combined into the same block. */
13450 for (;;)
13451 {
13452 insn = next_nonnote_insn (insn);
13453
13454 /* Jumping into the middle of an IT block is illegal, so a label or
13455 barrier terminates the block. */
13456 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
13457 break;
13458
13459 body = PATTERN (insn);
13460 /* USE and CLOBBER aren't really insns, so just skip them. */
13461 if (GET_CODE (body) == USE
13462 || GET_CODE (body) == CLOBBER)
13463 continue;
13464
13465 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
13466 if (GET_CODE (body) != COND_EXEC)
13467 break;
13468 /* Allow up to 4 conditionally executed instructions in a block. */
13469 n = get_attr_ce_count (insn);
13470 if (arm_condexec_masklen + n > 4)
13471 break;
13472
13473 predicate = COND_EXEC_TEST (body);
13474 code = get_arm_condition_code (predicate);
13475 mask = (1 << n) - 1;
13476 if (arm_current_cc == code)
13477 arm_condexec_mask |= (mask << arm_condexec_masklen);
13478 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
13479 break;
13480
13481 arm_condexec_count++;
13482 arm_condexec_masklen += n;
13483
13484 /* A jump must be the last instruction in a conditional block. */
13485 if (GET_CODE(insn) == JUMP_INSN)
13486 break;
13487 }
13488 /* Restore recog_data (getting the attributes of other insns can
13489 destroy this array, but final.c assumes that it remains intact
13490 across this call). */
13491 extract_constrain_insn_cached (first_insn);
13492 }
13493
13494 void
13495 arm_final_prescan_insn (rtx insn)
13496 {
13497 /* BODY will hold the body of INSN. */
13498 rtx body = PATTERN (insn);
13499
13500 /* This will be 1 if trying to repeat the trick, and things need to be
13501 reversed if it appears to fail. */
13502 int reverse = 0;
13503
13504 /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
13505 taken are clobbered, even if the rtl suggests otherwise. It also
13506 means that we have to grub around within the jump expression to find
13507 out what the conditions are when the jump isn't taken. */
13508 int jump_clobbers = 0;
13509
13510 /* If we start with a return insn, we only succeed if we find another one. */
13511 int seeking_return = 0;
13512
13513 /* START_INSN will hold the insn from where we start looking. This is the
13514 first insn after the following code_label if REVERSE is true. */
13515 rtx start_insn = insn;
13516
13517 /* If in state 4, check if the target branch is reached, in order to
13518 change back to state 0. */
13519 if (arm_ccfsm_state == 4)
13520 {
13521 if (insn == arm_target_insn)
13522 {
13523 arm_target_insn = NULL;
13524 arm_ccfsm_state = 0;
13525 }
13526 return;
13527 }
13528
13529 /* If in state 3, it is possible to repeat the trick, if this insn is an
13530 unconditional branch to a label, and immediately following this branch
13531 is the previous target label which is only used once, and the label this
13532 branch jumps to is not too far off. */
13533 if (arm_ccfsm_state == 3)
13534 {
13535 if (simplejump_p (insn))
13536 {
13537 start_insn = next_nonnote_insn (start_insn);
13538 if (GET_CODE (start_insn) == BARRIER)
13539 {
13540 /* XXX Isn't this always a barrier? */
13541 start_insn = next_nonnote_insn (start_insn);
13542 }
13543 if (GET_CODE (start_insn) == CODE_LABEL
13544 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
13545 && LABEL_NUSES (start_insn) == 1)
13546 reverse = TRUE;
13547 else
13548 return;
13549 }
13550 else if (GET_CODE (body) == RETURN)
13551 {
13552 start_insn = next_nonnote_insn (start_insn);
13553 if (GET_CODE (start_insn) == BARRIER)
13554 start_insn = next_nonnote_insn (start_insn);
13555 if (GET_CODE (start_insn) == CODE_LABEL
13556 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
13557 && LABEL_NUSES (start_insn) == 1)
13558 {
13559 reverse = TRUE;
13560 seeking_return = 1;
13561 }
13562 else
13563 return;
13564 }
13565 else
13566 return;
13567 }
13568
13569 gcc_assert (!arm_ccfsm_state || reverse);
13570 if (GET_CODE (insn) != JUMP_INSN)
13571 return;
13572
13573 /* This jump might be paralleled with a clobber of the condition codes
13574 the jump should always come first */
13575 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
13576 body = XVECEXP (body, 0, 0);
13577
13578 if (reverse
13579 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
13580 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
13581 {
13582 int insns_skipped;
13583 int fail = FALSE, succeed = FALSE;
13584 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
13585 int then_not_else = TRUE;
13586 rtx this_insn = start_insn, label = 0;
13587
13588 /* If the jump cannot be done with one instruction, we cannot
13589 conditionally execute the instruction in the inverse case. */
13590 if (get_attr_conds (insn) == CONDS_JUMP_CLOB)
13591 {
13592 jump_clobbers = 1;
13593 return;
13594 }
13595
13596 /* Register the insn jumped to. */
13597 if (reverse)
13598 {
13599 if (!seeking_return)
13600 label = XEXP (SET_SRC (body), 0);
13601 }
13602 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
13603 label = XEXP (XEXP (SET_SRC (body), 1), 0);
13604 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
13605 {
13606 label = XEXP (XEXP (SET_SRC (body), 2), 0);
13607 then_not_else = FALSE;
13608 }
13609 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
13610 seeking_return = 1;
13611 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
13612 {
13613 seeking_return = 1;
13614 then_not_else = FALSE;
13615 }
13616 else
13617 gcc_unreachable ();
13618
13619 /* See how many insns this branch skips, and what kind of insns. If all
13620 insns are okay, and the label or unconditional branch to the same
13621 label is not too far away, succeed. */
13622 for (insns_skipped = 0;
13623 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
13624 {
13625 rtx scanbody;
13626
13627 this_insn = next_nonnote_insn (this_insn);
13628 if (!this_insn)
13629 break;
13630
13631 switch (GET_CODE (this_insn))
13632 {
13633 case CODE_LABEL:
13634 /* Succeed if it is the target label, otherwise fail since
13635 control falls in from somewhere else. */
13636 if (this_insn == label)
13637 {
13638 if (jump_clobbers)
13639 {
13640 arm_ccfsm_state = 2;
13641 this_insn = next_nonnote_insn (this_insn);
13642 }
13643 else
13644 arm_ccfsm_state = 1;
13645 succeed = TRUE;
13646 }
13647 else
13648 fail = TRUE;
13649 break;
13650
13651 case BARRIER:
13652 /* Succeed if the following insn is the target label.
13653 Otherwise fail.
13654 If return insns are used then the last insn in a function
13655 will be a barrier. */
13656 this_insn = next_nonnote_insn (this_insn);
13657 if (this_insn && this_insn == label)
13658 {
13659 if (jump_clobbers)
13660 {
13661 arm_ccfsm_state = 2;
13662 this_insn = next_nonnote_insn (this_insn);
13663 }
13664 else
13665 arm_ccfsm_state = 1;
13666 succeed = TRUE;
13667 }
13668 else
13669 fail = TRUE;
13670 break;
13671
13672 case CALL_INSN:
13673 /* The AAPCS says that conditional calls should not be
13674 used since they make interworking inefficient (the
13675 linker can't transform BL<cond> into BLX). That's
13676 only a problem if the machine has BLX. */
13677 if (arm_arch5)
13678 {
13679 fail = TRUE;
13680 break;
13681 }
13682
13683 /* Succeed if the following insn is the target label, or
13684 if the following two insns are a barrier and the
13685 target label. */
13686 this_insn = next_nonnote_insn (this_insn);
13687 if (this_insn && GET_CODE (this_insn) == BARRIER)
13688 this_insn = next_nonnote_insn (this_insn);
13689
13690 if (this_insn && this_insn == label
13691 && insns_skipped < max_insns_skipped)
13692 {
13693 if (jump_clobbers)
13694 {
13695 arm_ccfsm_state = 2;
13696 this_insn = next_nonnote_insn (this_insn);
13697 }
13698 else
13699 arm_ccfsm_state = 1;
13700 succeed = TRUE;
13701 }
13702 else
13703 fail = TRUE;
13704 break;
13705
13706 case JUMP_INSN:
13707 /* If this is an unconditional branch to the same label, succeed.
13708 If it is to another label, do nothing. If it is conditional,
13709 fail. */
13710 /* XXX Probably, the tests for SET and the PC are
13711 unnecessary. */
13712
13713 scanbody = PATTERN (this_insn);
13714 if (GET_CODE (scanbody) == SET
13715 && GET_CODE (SET_DEST (scanbody)) == PC)
13716 {
13717 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
13718 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
13719 {
13720 arm_ccfsm_state = 2;
13721 succeed = TRUE;
13722 }
13723 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
13724 fail = TRUE;
13725 }
13726 /* Fail if a conditional return is undesirable (e.g. on a
13727 StrongARM), but still allow this if optimizing for size. */
13728 else if (GET_CODE (scanbody) == RETURN
13729 && !use_return_insn (TRUE, NULL)
13730 && !optimize_size)
13731 fail = TRUE;
13732 else if (GET_CODE (scanbody) == RETURN
13733 && seeking_return)
13734 {
13735 arm_ccfsm_state = 2;
13736 succeed = TRUE;
13737 }
13738 else if (GET_CODE (scanbody) == PARALLEL)
13739 {
13740 switch (get_attr_conds (this_insn))
13741 {
13742 case CONDS_NOCOND:
13743 break;
13744 default:
13745 fail = TRUE;
13746 break;
13747 }
13748 }
13749 else
13750 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
13751
13752 break;
13753
13754 case INSN:
13755 /* Instructions using or affecting the condition codes make it
13756 fail. */
13757 scanbody = PATTERN (this_insn);
13758 if (!(GET_CODE (scanbody) == SET
13759 || GET_CODE (scanbody) == PARALLEL)
13760 || get_attr_conds (this_insn) != CONDS_NOCOND)
13761 fail = TRUE;
13762
13763 /* A conditional cirrus instruction must be followed by
13764 a non Cirrus instruction. However, since we
13765 conditionalize instructions in this function and by
13766 the time we get here we can't add instructions
13767 (nops), because shorten_branches() has already been
13768 called, we will disable conditionalizing Cirrus
13769 instructions to be safe. */
13770 if (GET_CODE (scanbody) != USE
13771 && GET_CODE (scanbody) != CLOBBER
13772 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
13773 fail = TRUE;
13774 break;
13775
13776 default:
13777 break;
13778 }
13779 }
13780 if (succeed)
13781 {
13782 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
13783 arm_target_label = CODE_LABEL_NUMBER (label);
13784 else
13785 {
13786 gcc_assert (seeking_return || arm_ccfsm_state == 2);
13787
13788 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
13789 {
13790 this_insn = next_nonnote_insn (this_insn);
13791 gcc_assert (!this_insn
13792 || (GET_CODE (this_insn) != BARRIER
13793 && GET_CODE (this_insn) != CODE_LABEL));
13794 }
13795 if (!this_insn)
13796 {
13797 /* Oh, dear! we ran off the end.. give up. */
13798 extract_constrain_insn_cached (insn);
13799 arm_ccfsm_state = 0;
13800 arm_target_insn = NULL;
13801 return;
13802 }
13803 arm_target_insn = this_insn;
13804 }
13805 if (jump_clobbers)
13806 {
13807 gcc_assert (!reverse);
13808 arm_current_cc =
13809 get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),
13810 0), 0), 1));
13811 if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)
13812 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13813 if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)
13814 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13815 }
13816 else
13817 {
13818 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
13819 what it was. */
13820 if (!reverse)
13821 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),
13822 0));
13823 }
13824
13825 if (reverse || then_not_else)
13826 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13827 }
13828
13829 /* Restore recog_data (getting the attributes of other insns can
13830 destroy this array, but final.c assumes that it remains intact
13831 across this call. */
13832 extract_constrain_insn_cached (insn);
13833 }
13834 }
13835
13836 /* Output IT instructions. */
13837 void
13838 thumb2_asm_output_opcode (FILE * stream)
13839 {
13840 char buff[5];
13841 int n;
13842
13843 if (arm_condexec_mask)
13844 {
13845 for (n = 0; n < arm_condexec_masklen; n++)
13846 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
13847 buff[n] = 0;
13848 asm_fprintf(stream, "i%s\t%s\n\t", buff,
13849 arm_condition_codes[arm_current_cc]);
13850 arm_condexec_mask = 0;
13851 }
13852 }
13853
13854 /* Returns true if REGNO is a valid register
13855 for holding a quantity of type MODE. */
13856 int
13857 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
13858 {
13859 if (GET_MODE_CLASS (mode) == MODE_CC)
13860 return (regno == CC_REGNUM
13861 || (TARGET_HARD_FLOAT && TARGET_VFP
13862 && regno == VFPCC_REGNUM));
13863
13864 if (TARGET_THUMB1)
13865 /* For the Thumb we only allow values bigger than SImode in
13866 registers 0 - 6, so that there is always a second low
13867 register available to hold the upper part of the value.
13868 We probably we ought to ensure that the register is the
13869 start of an even numbered register pair. */
13870 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
13871
13872 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
13873 && IS_CIRRUS_REGNUM (regno))
13874 /* We have outlawed SI values in Cirrus registers because they
13875 reside in the lower 32 bits, but SF values reside in the
13876 upper 32 bits. This causes gcc all sorts of grief. We can't
13877 even split the registers into pairs because Cirrus SI values
13878 get sign extended to 64bits-- aldyh. */
13879 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
13880
13881 if (TARGET_HARD_FLOAT && TARGET_VFP
13882 && IS_VFP_REGNUM (regno))
13883 {
13884 if (mode == SFmode || mode == SImode)
13885 return VFP_REGNO_OK_FOR_SINGLE (regno);
13886
13887 if (mode == DFmode)
13888 return VFP_REGNO_OK_FOR_DOUBLE (regno);
13889
13890 if (TARGET_NEON)
13891 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
13892 || (VALID_NEON_QREG_MODE (mode)
13893 && NEON_REGNO_OK_FOR_QUAD (regno))
13894 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
13895 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
13896 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
13897 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
13898 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
13899
13900 return FALSE;
13901 }
13902
13903 if (TARGET_REALLY_IWMMXT)
13904 {
13905 if (IS_IWMMXT_GR_REGNUM (regno))
13906 return mode == SImode;
13907
13908 if (IS_IWMMXT_REGNUM (regno))
13909 return VALID_IWMMXT_REG_MODE (mode);
13910 }
13911
13912 /* We allow any value to be stored in the general registers.
13913 Restrict doubleword quantities to even register pairs so that we can
13914 use ldrd. Do not allow Neon structure opaque modes in general registers;
13915 they would use too many. */
13916 if (regno <= LAST_ARM_REGNUM)
13917 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
13918 && !VALID_NEON_STRUCT_MODE (mode);
13919
13920 if (regno == FRAME_POINTER_REGNUM
13921 || regno == ARG_POINTER_REGNUM)
13922 /* We only allow integers in the fake hard registers. */
13923 return GET_MODE_CLASS (mode) == MODE_INT;
13924
13925 /* The only registers left are the FPA registers
13926 which we only allow to hold FP values. */
13927 return (TARGET_HARD_FLOAT && TARGET_FPA
13928 && GET_MODE_CLASS (mode) == MODE_FLOAT
13929 && regno >= FIRST_FPA_REGNUM
13930 && regno <= LAST_FPA_REGNUM);
13931 }
13932
13933 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
13934 not used in arm mode. */
13935 int
13936 arm_regno_class (int regno)
13937 {
13938 if (TARGET_THUMB1)
13939 {
13940 if (regno == STACK_POINTER_REGNUM)
13941 return STACK_REG;
13942 if (regno == CC_REGNUM)
13943 return CC_REG;
13944 if (regno < 8)
13945 return LO_REGS;
13946 return HI_REGS;
13947 }
13948
13949 if (TARGET_THUMB2 && regno < 8)
13950 return LO_REGS;
13951
13952 if ( regno <= LAST_ARM_REGNUM
13953 || regno == FRAME_POINTER_REGNUM
13954 || regno == ARG_POINTER_REGNUM)
13955 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
13956
13957 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
13958 return TARGET_THUMB2 ? CC_REG : NO_REGS;
13959
13960 if (IS_CIRRUS_REGNUM (regno))
13961 return CIRRUS_REGS;
13962
13963 if (IS_VFP_REGNUM (regno))
13964 {
13965 if (regno <= D7_VFP_REGNUM)
13966 return VFP_D0_D7_REGS;
13967 else if (regno <= LAST_LO_VFP_REGNUM)
13968 return VFP_LO_REGS;
13969 else
13970 return VFP_HI_REGS;
13971 }
13972
13973 if (IS_IWMMXT_REGNUM (regno))
13974 return IWMMXT_REGS;
13975
13976 if (IS_IWMMXT_GR_REGNUM (regno))
13977 return IWMMXT_GR_REGS;
13978
13979 return FPA_REGS;
13980 }
13981
13982 /* Handle a special case when computing the offset
13983 of an argument from the frame pointer. */
13984 int
13985 arm_debugger_arg_offset (int value, rtx addr)
13986 {
13987 rtx insn;
13988
13989 /* We are only interested if dbxout_parms() failed to compute the offset. */
13990 if (value != 0)
13991 return 0;
13992
13993 /* We can only cope with the case where the address is held in a register. */
13994 if (GET_CODE (addr) != REG)
13995 return 0;
13996
13997 /* If we are using the frame pointer to point at the argument, then
13998 an offset of 0 is correct. */
13999 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
14000 return 0;
14001
14002 /* If we are using the stack pointer to point at the
14003 argument, then an offset of 0 is correct. */
14004 /* ??? Check this is consistent with thumb2 frame layout. */
14005 if ((TARGET_THUMB || !frame_pointer_needed)
14006 && REGNO (addr) == SP_REGNUM)
14007 return 0;
14008
14009 /* Oh dear. The argument is pointed to by a register rather
14010 than being held in a register, or being stored at a known
14011 offset from the frame pointer. Since GDB only understands
14012 those two kinds of argument we must translate the address
14013 held in the register into an offset from the frame pointer.
14014 We do this by searching through the insns for the function
14015 looking to see where this register gets its value. If the
14016 register is initialized from the frame pointer plus an offset
14017 then we are in luck and we can continue, otherwise we give up.
14018
14019 This code is exercised by producing debugging information
14020 for a function with arguments like this:
14021
14022 double func (double a, double b, int c, double d) {return d;}
14023
14024 Without this code the stab for parameter 'd' will be set to
14025 an offset of 0 from the frame pointer, rather than 8. */
14026
14027 /* The if() statement says:
14028
14029 If the insn is a normal instruction
14030 and if the insn is setting the value in a register
14031 and if the register being set is the register holding the address of the argument
14032 and if the address is computing by an addition
14033 that involves adding to a register
14034 which is the frame pointer
14035 a constant integer
14036
14037 then... */
14038
14039 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14040 {
14041 if ( GET_CODE (insn) == INSN
14042 && GET_CODE (PATTERN (insn)) == SET
14043 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
14044 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
14045 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
14046 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
14047 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
14048 )
14049 {
14050 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
14051
14052 break;
14053 }
14054 }
14055
14056 if (value == 0)
14057 {
14058 debug_rtx (addr);
14059 warning (0, "unable to compute real location of stacked parameter");
14060 value = 8; /* XXX magic hack */
14061 }
14062
14063 return value;
14064 }
14065 \f
14066 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
14067 do \
14068 { \
14069 if ((MASK) & insn_flags) \
14070 add_builtin_function ((NAME), (TYPE), (CODE), \
14071 BUILT_IN_MD, NULL, NULL_TREE); \
14072 } \
14073 while (0)
14074
14075 struct builtin_description
14076 {
14077 const unsigned int mask;
14078 const enum insn_code icode;
14079 const char * const name;
14080 const enum arm_builtins code;
14081 const enum rtx_code comparison;
14082 const unsigned int flag;
14083 };
14084
14085 static const struct builtin_description bdesc_2arg[] =
14086 {
14087 #define IWMMXT_BUILTIN(code, string, builtin) \
14088 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
14089 ARM_BUILTIN_##builtin, 0, 0 },
14090
14091 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
14092 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
14093 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
14094 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
14095 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
14096 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
14097 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
14098 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
14099 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
14100 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
14101 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
14102 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
14103 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
14104 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
14105 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
14106 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
14107 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
14108 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
14109 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
14110 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
14111 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
14112 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
14113 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
14114 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
14115 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
14116 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
14117 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
14118 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
14119 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
14120 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
14121 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
14122 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
14123 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
14124 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
14125 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
14126 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
14127 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
14128 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
14129 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
14130 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
14131 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
14132 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
14133 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
14134 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
14135 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
14136 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
14137 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
14138 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
14139 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
14140 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
14141 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
14142 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
14143 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
14144 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
14145 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
14146 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
14147 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
14148 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
14149
14150 #define IWMMXT_BUILTIN2(code, builtin) \
14151 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 },
14152
14153 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
14154 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
14155 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
14156 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
14157 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
14158 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
14159 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
14160 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
14161 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
14162 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
14163 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
14164 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
14165 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
14166 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
14167 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
14168 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
14169 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
14170 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
14171 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
14172 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
14173 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
14174 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
14175 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
14176 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
14177 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
14178 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
14179 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
14180 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
14181 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
14182 IWMMXT_BUILTIN2 (rordi3, WRORDI)
14183 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
14184 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
14185 };
14186
14187 static const struct builtin_description bdesc_1arg[] =
14188 {
14189 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
14190 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
14191 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
14192 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
14193 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
14194 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
14195 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
14196 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
14197 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
14198 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
14199 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
14200 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
14201 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
14202 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
14203 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
14204 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
14205 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
14206 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
14207 };
14208
14209 /* Set up all the iWMMXt builtins. This is
14210 not called if TARGET_IWMMXT is zero. */
14211
14212 static void
14213 arm_init_iwmmxt_builtins (void)
14214 {
14215 const struct builtin_description * d;
14216 size_t i;
14217 tree endlink = void_list_node;
14218
14219 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14220 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14221 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14222
14223 tree int_ftype_int
14224 = build_function_type (integer_type_node,
14225 tree_cons (NULL_TREE, integer_type_node, endlink));
14226 tree v8qi_ftype_v8qi_v8qi_int
14227 = build_function_type (V8QI_type_node,
14228 tree_cons (NULL_TREE, V8QI_type_node,
14229 tree_cons (NULL_TREE, V8QI_type_node,
14230 tree_cons (NULL_TREE,
14231 integer_type_node,
14232 endlink))));
14233 tree v4hi_ftype_v4hi_int
14234 = build_function_type (V4HI_type_node,
14235 tree_cons (NULL_TREE, V4HI_type_node,
14236 tree_cons (NULL_TREE, integer_type_node,
14237 endlink)));
14238 tree v2si_ftype_v2si_int
14239 = build_function_type (V2SI_type_node,
14240 tree_cons (NULL_TREE, V2SI_type_node,
14241 tree_cons (NULL_TREE, integer_type_node,
14242 endlink)));
14243 tree v2si_ftype_di_di
14244 = build_function_type (V2SI_type_node,
14245 tree_cons (NULL_TREE, long_long_integer_type_node,
14246 tree_cons (NULL_TREE, long_long_integer_type_node,
14247 endlink)));
14248 tree di_ftype_di_int
14249 = build_function_type (long_long_integer_type_node,
14250 tree_cons (NULL_TREE, long_long_integer_type_node,
14251 tree_cons (NULL_TREE, integer_type_node,
14252 endlink)));
14253 tree di_ftype_di_int_int
14254 = build_function_type (long_long_integer_type_node,
14255 tree_cons (NULL_TREE, long_long_integer_type_node,
14256 tree_cons (NULL_TREE, integer_type_node,
14257 tree_cons (NULL_TREE,
14258 integer_type_node,
14259 endlink))));
14260 tree int_ftype_v8qi
14261 = build_function_type (integer_type_node,
14262 tree_cons (NULL_TREE, V8QI_type_node,
14263 endlink));
14264 tree int_ftype_v4hi
14265 = build_function_type (integer_type_node,
14266 tree_cons (NULL_TREE, V4HI_type_node,
14267 endlink));
14268 tree int_ftype_v2si
14269 = build_function_type (integer_type_node,
14270 tree_cons (NULL_TREE, V2SI_type_node,
14271 endlink));
14272 tree int_ftype_v8qi_int
14273 = build_function_type (integer_type_node,
14274 tree_cons (NULL_TREE, V8QI_type_node,
14275 tree_cons (NULL_TREE, integer_type_node,
14276 endlink)));
14277 tree int_ftype_v4hi_int
14278 = build_function_type (integer_type_node,
14279 tree_cons (NULL_TREE, V4HI_type_node,
14280 tree_cons (NULL_TREE, integer_type_node,
14281 endlink)));
14282 tree int_ftype_v2si_int
14283 = build_function_type (integer_type_node,
14284 tree_cons (NULL_TREE, V2SI_type_node,
14285 tree_cons (NULL_TREE, integer_type_node,
14286 endlink)));
14287 tree v8qi_ftype_v8qi_int_int
14288 = build_function_type (V8QI_type_node,
14289 tree_cons (NULL_TREE, V8QI_type_node,
14290 tree_cons (NULL_TREE, integer_type_node,
14291 tree_cons (NULL_TREE,
14292 integer_type_node,
14293 endlink))));
14294 tree v4hi_ftype_v4hi_int_int
14295 = build_function_type (V4HI_type_node,
14296 tree_cons (NULL_TREE, V4HI_type_node,
14297 tree_cons (NULL_TREE, integer_type_node,
14298 tree_cons (NULL_TREE,
14299 integer_type_node,
14300 endlink))));
14301 tree v2si_ftype_v2si_int_int
14302 = build_function_type (V2SI_type_node,
14303 tree_cons (NULL_TREE, V2SI_type_node,
14304 tree_cons (NULL_TREE, integer_type_node,
14305 tree_cons (NULL_TREE,
14306 integer_type_node,
14307 endlink))));
14308 /* Miscellaneous. */
14309 tree v8qi_ftype_v4hi_v4hi
14310 = build_function_type (V8QI_type_node,
14311 tree_cons (NULL_TREE, V4HI_type_node,
14312 tree_cons (NULL_TREE, V4HI_type_node,
14313 endlink)));
14314 tree v4hi_ftype_v2si_v2si
14315 = build_function_type (V4HI_type_node,
14316 tree_cons (NULL_TREE, V2SI_type_node,
14317 tree_cons (NULL_TREE, V2SI_type_node,
14318 endlink)));
14319 tree v2si_ftype_v4hi_v4hi
14320 = build_function_type (V2SI_type_node,
14321 tree_cons (NULL_TREE, V4HI_type_node,
14322 tree_cons (NULL_TREE, V4HI_type_node,
14323 endlink)));
14324 tree v2si_ftype_v8qi_v8qi
14325 = build_function_type (V2SI_type_node,
14326 tree_cons (NULL_TREE, V8QI_type_node,
14327 tree_cons (NULL_TREE, V8QI_type_node,
14328 endlink)));
14329 tree v4hi_ftype_v4hi_di
14330 = build_function_type (V4HI_type_node,
14331 tree_cons (NULL_TREE, V4HI_type_node,
14332 tree_cons (NULL_TREE,
14333 long_long_integer_type_node,
14334 endlink)));
14335 tree v2si_ftype_v2si_di
14336 = build_function_type (V2SI_type_node,
14337 tree_cons (NULL_TREE, V2SI_type_node,
14338 tree_cons (NULL_TREE,
14339 long_long_integer_type_node,
14340 endlink)));
14341 tree void_ftype_int_int
14342 = build_function_type (void_type_node,
14343 tree_cons (NULL_TREE, integer_type_node,
14344 tree_cons (NULL_TREE, integer_type_node,
14345 endlink)));
14346 tree di_ftype_void
14347 = build_function_type (long_long_unsigned_type_node, endlink);
14348 tree di_ftype_v8qi
14349 = build_function_type (long_long_integer_type_node,
14350 tree_cons (NULL_TREE, V8QI_type_node,
14351 endlink));
14352 tree di_ftype_v4hi
14353 = build_function_type (long_long_integer_type_node,
14354 tree_cons (NULL_TREE, V4HI_type_node,
14355 endlink));
14356 tree di_ftype_v2si
14357 = build_function_type (long_long_integer_type_node,
14358 tree_cons (NULL_TREE, V2SI_type_node,
14359 endlink));
14360 tree v2si_ftype_v4hi
14361 = build_function_type (V2SI_type_node,
14362 tree_cons (NULL_TREE, V4HI_type_node,
14363 endlink));
14364 tree v4hi_ftype_v8qi
14365 = build_function_type (V4HI_type_node,
14366 tree_cons (NULL_TREE, V8QI_type_node,
14367 endlink));
14368
14369 tree di_ftype_di_v4hi_v4hi
14370 = build_function_type (long_long_unsigned_type_node,
14371 tree_cons (NULL_TREE,
14372 long_long_unsigned_type_node,
14373 tree_cons (NULL_TREE, V4HI_type_node,
14374 tree_cons (NULL_TREE,
14375 V4HI_type_node,
14376 endlink))));
14377
14378 tree di_ftype_v4hi_v4hi
14379 = build_function_type (long_long_unsigned_type_node,
14380 tree_cons (NULL_TREE, V4HI_type_node,
14381 tree_cons (NULL_TREE, V4HI_type_node,
14382 endlink)));
14383
14384 /* Normal vector binops. */
14385 tree v8qi_ftype_v8qi_v8qi
14386 = build_function_type (V8QI_type_node,
14387 tree_cons (NULL_TREE, V8QI_type_node,
14388 tree_cons (NULL_TREE, V8QI_type_node,
14389 endlink)));
14390 tree v4hi_ftype_v4hi_v4hi
14391 = build_function_type (V4HI_type_node,
14392 tree_cons (NULL_TREE, V4HI_type_node,
14393 tree_cons (NULL_TREE, V4HI_type_node,
14394 endlink)));
14395 tree v2si_ftype_v2si_v2si
14396 = build_function_type (V2SI_type_node,
14397 tree_cons (NULL_TREE, V2SI_type_node,
14398 tree_cons (NULL_TREE, V2SI_type_node,
14399 endlink)));
14400 tree di_ftype_di_di
14401 = build_function_type (long_long_unsigned_type_node,
14402 tree_cons (NULL_TREE, long_long_unsigned_type_node,
14403 tree_cons (NULL_TREE,
14404 long_long_unsigned_type_node,
14405 endlink)));
14406
14407 /* Add all builtins that are more or less simple operations on two
14408 operands. */
14409 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14410 {
14411 /* Use one of the operands; the target can have a different mode for
14412 mask-generating compares. */
14413 enum machine_mode mode;
14414 tree type;
14415
14416 if (d->name == 0)
14417 continue;
14418
14419 mode = insn_data[d->icode].operand[1].mode;
14420
14421 switch (mode)
14422 {
14423 case V8QImode:
14424 type = v8qi_ftype_v8qi_v8qi;
14425 break;
14426 case V4HImode:
14427 type = v4hi_ftype_v4hi_v4hi;
14428 break;
14429 case V2SImode:
14430 type = v2si_ftype_v2si_v2si;
14431 break;
14432 case DImode:
14433 type = di_ftype_di_di;
14434 break;
14435
14436 default:
14437 gcc_unreachable ();
14438 }
14439
14440 def_mbuiltin (d->mask, d->name, type, d->code);
14441 }
14442
14443 /* Add the remaining MMX insns with somewhat more complicated types. */
14444 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
14445 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
14446 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
14447
14448 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
14449 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
14450 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
14451 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
14452 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
14453 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
14454
14455 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
14456 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
14457 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
14458 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
14459 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
14460 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
14461
14462 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
14463 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
14464 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
14465 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
14466 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
14467 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
14468
14469 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
14470 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
14471 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
14472 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
14473 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
14474 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
14475
14476 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
14477
14478 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
14479 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
14480 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
14481 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
14482
14483 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
14484 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
14485 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
14486 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
14487 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
14488 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
14489 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
14490 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
14491 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
14492
14493 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
14494 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
14495 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
14496
14497 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
14498 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
14499 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
14500
14501 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
14502 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
14503 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
14504 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
14505 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
14506 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
14507
14508 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
14509 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
14510 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
14511 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
14512 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
14513 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
14514 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
14515 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
14516 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
14517 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
14518 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
14519 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
14520
14521 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
14522 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
14523 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
14524 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
14525
14526 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
14527 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
14528 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
14529 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
14530 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
14531 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
14532 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
14533 }
14534
14535 static void
14536 arm_init_tls_builtins (void)
14537 {
14538 tree ftype;
14539 tree nothrow = tree_cons (get_identifier ("nothrow"), NULL, NULL);
14540 tree const_nothrow = tree_cons (get_identifier ("const"), NULL, nothrow);
14541
14542 ftype = build_function_type (ptr_type_node, void_list_node);
14543 add_builtin_function ("__builtin_thread_pointer", ftype,
14544 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
14545 NULL, const_nothrow);
14546 }
14547
14548 typedef enum {
14549 T_V8QI = 0x0001,
14550 T_V4HI = 0x0002,
14551 T_V2SI = 0x0004,
14552 T_V2SF = 0x0008,
14553 T_DI = 0x0010,
14554 T_V16QI = 0x0020,
14555 T_V8HI = 0x0040,
14556 T_V4SI = 0x0080,
14557 T_V4SF = 0x0100,
14558 T_V2DI = 0x0200,
14559 T_TI = 0x0400,
14560 T_EI = 0x0800,
14561 T_OI = 0x1000
14562 } neon_builtin_type_bits;
14563
14564 #define v8qi_UP T_V8QI
14565 #define v4hi_UP T_V4HI
14566 #define v2si_UP T_V2SI
14567 #define v2sf_UP T_V2SF
14568 #define di_UP T_DI
14569 #define v16qi_UP T_V16QI
14570 #define v8hi_UP T_V8HI
14571 #define v4si_UP T_V4SI
14572 #define v4sf_UP T_V4SF
14573 #define v2di_UP T_V2DI
14574 #define ti_UP T_TI
14575 #define ei_UP T_EI
14576 #define oi_UP T_OI
14577
14578 #define UP(X) X##_UP
14579
14580 #define T_MAX 13
14581
14582 typedef enum {
14583 NEON_BINOP,
14584 NEON_TERNOP,
14585 NEON_UNOP,
14586 NEON_GETLANE,
14587 NEON_SETLANE,
14588 NEON_CREATE,
14589 NEON_DUP,
14590 NEON_DUPLANE,
14591 NEON_COMBINE,
14592 NEON_SPLIT,
14593 NEON_LANEMUL,
14594 NEON_LANEMULL,
14595 NEON_LANEMULH,
14596 NEON_LANEMAC,
14597 NEON_SCALARMUL,
14598 NEON_SCALARMULL,
14599 NEON_SCALARMULH,
14600 NEON_SCALARMAC,
14601 NEON_CONVERT,
14602 NEON_FIXCONV,
14603 NEON_SELECT,
14604 NEON_RESULTPAIR,
14605 NEON_REINTERP,
14606 NEON_VTBL,
14607 NEON_VTBX,
14608 NEON_LOAD1,
14609 NEON_LOAD1LANE,
14610 NEON_STORE1,
14611 NEON_STORE1LANE,
14612 NEON_LOADSTRUCT,
14613 NEON_LOADSTRUCTLANE,
14614 NEON_STORESTRUCT,
14615 NEON_STORESTRUCTLANE,
14616 NEON_LOGICBINOP,
14617 NEON_SHIFTINSERT,
14618 NEON_SHIFTIMM,
14619 NEON_SHIFTACC
14620 } neon_itype;
14621
14622 typedef struct {
14623 const char *name;
14624 const neon_itype itype;
14625 const neon_builtin_type_bits bits;
14626 const enum insn_code codes[T_MAX];
14627 const unsigned int num_vars;
14628 unsigned int base_fcode;
14629 } neon_builtin_datum;
14630
14631 #define CF(N,X) CODE_FOR_neon_##N##X
14632
14633 #define VAR1(T, N, A) \
14634 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
14635 #define VAR2(T, N, A, B) \
14636 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
14637 #define VAR3(T, N, A, B, C) \
14638 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
14639 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
14640 #define VAR4(T, N, A, B, C, D) \
14641 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
14642 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
14643 #define VAR5(T, N, A, B, C, D, E) \
14644 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
14645 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
14646 #define VAR6(T, N, A, B, C, D, E, F) \
14647 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
14648 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
14649 #define VAR7(T, N, A, B, C, D, E, F, G) \
14650 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
14651 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14652 CF (N, G) }, 7, 0
14653 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
14654 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14655 | UP (H), \
14656 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14657 CF (N, G), CF (N, H) }, 8, 0
14658 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
14659 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14660 | UP (H) | UP (I), \
14661 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14662 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
14663 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
14664 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14665 | UP (H) | UP (I) | UP (J), \
14666 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14667 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
14668
14669 /* The mode entries in the following table correspond to the "key" type of the
14670 instruction variant, i.e. equivalent to that which would be specified after
14671 the assembler mnemonic, which usually refers to the last vector operand.
14672 (Signed/unsigned/polynomial types are not differentiated between though, and
14673 are all mapped onto the same mode for a given element size.) The modes
14674 listed per instruction should be the same as those defined for that
14675 instruction's pattern in neon.md.
14676 WARNING: Variants should be listed in the same increasing order as
14677 neon_builtin_type_bits. */
14678
14679 static neon_builtin_datum neon_builtin_data[] =
14680 {
14681 { VAR10 (BINOP, vadd,
14682 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14683 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
14684 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
14685 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14686 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14687 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
14688 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14689 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14690 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
14691 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14692 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
14693 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
14694 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
14695 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
14696 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
14697 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
14698 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
14699 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
14700 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
14701 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
14702 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
14703 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
14704 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14705 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14706 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14707 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
14708 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
14709 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
14710 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14711 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14712 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14713 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
14714 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14715 { VAR10 (BINOP, vsub,
14716 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14717 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
14718 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
14719 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14720 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14721 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
14722 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14723 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14724 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14725 { VAR2 (BINOP, vcage, v2sf, v4sf) },
14726 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
14727 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14728 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14729 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
14730 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14731 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
14732 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14733 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14734 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
14735 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14736 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14737 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
14738 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
14739 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
14740 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
14741 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14742 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14743 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14744 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14745 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14746 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14747 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14748 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14749 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
14750 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
14751 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
14752 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14753 /* FIXME: vget_lane supports more variants than this! */
14754 { VAR10 (GETLANE, vget_lane,
14755 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14756 { VAR10 (SETLANE, vset_lane,
14757 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14758 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
14759 { VAR10 (DUP, vdup_n,
14760 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14761 { VAR10 (DUPLANE, vdup_lane,
14762 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14763 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
14764 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
14765 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
14766 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
14767 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
14768 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
14769 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
14770 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14771 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14772 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
14773 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
14774 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14775 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
14776 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
14777 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14778 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14779 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
14780 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
14781 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14782 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
14783 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
14784 { VAR10 (BINOP, vext,
14785 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14786 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14787 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
14788 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
14789 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
14790 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
14791 { VAR10 (SELECT, vbsl,
14792 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14793 { VAR1 (VTBL, vtbl1, v8qi) },
14794 { VAR1 (VTBL, vtbl2, v8qi) },
14795 { VAR1 (VTBL, vtbl3, v8qi) },
14796 { VAR1 (VTBL, vtbl4, v8qi) },
14797 { VAR1 (VTBX, vtbx1, v8qi) },
14798 { VAR1 (VTBX, vtbx2, v8qi) },
14799 { VAR1 (VTBX, vtbx3, v8qi) },
14800 { VAR1 (VTBX, vtbx4, v8qi) },
14801 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14802 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14803 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14804 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
14805 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
14806 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
14807 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
14808 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
14809 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
14810 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
14811 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
14812 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
14813 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
14814 { VAR10 (LOAD1, vld1,
14815 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14816 { VAR10 (LOAD1LANE, vld1_lane,
14817 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14818 { VAR10 (LOAD1, vld1_dup,
14819 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14820 { VAR10 (STORE1, vst1,
14821 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14822 { VAR10 (STORE1LANE, vst1_lane,
14823 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14824 { VAR9 (LOADSTRUCT,
14825 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14826 { VAR7 (LOADSTRUCTLANE, vld2_lane,
14827 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14828 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
14829 { VAR9 (STORESTRUCT, vst2,
14830 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14831 { VAR7 (STORESTRUCTLANE, vst2_lane,
14832 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14833 { VAR9 (LOADSTRUCT,
14834 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14835 { VAR7 (LOADSTRUCTLANE, vld3_lane,
14836 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14837 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
14838 { VAR9 (STORESTRUCT, vst3,
14839 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14840 { VAR7 (STORESTRUCTLANE, vst3_lane,
14841 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14842 { VAR9 (LOADSTRUCT, vld4,
14843 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14844 { VAR7 (LOADSTRUCTLANE, vld4_lane,
14845 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14846 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
14847 { VAR9 (STORESTRUCT, vst4,
14848 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14849 { VAR7 (STORESTRUCTLANE, vst4_lane,
14850 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14851 { VAR10 (LOGICBINOP, vand,
14852 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14853 { VAR10 (LOGICBINOP, vorr,
14854 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14855 { VAR10 (BINOP, veor,
14856 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14857 { VAR10 (LOGICBINOP, vbic,
14858 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14859 { VAR10 (LOGICBINOP, vorn,
14860 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
14861 };
14862
14863 #undef CF
14864 #undef VAR1
14865 #undef VAR2
14866 #undef VAR3
14867 #undef VAR4
14868 #undef VAR5
14869 #undef VAR6
14870 #undef VAR7
14871 #undef VAR8
14872 #undef VAR9
14873 #undef VAR10
14874
14875 static void
14876 arm_init_neon_builtins (void)
14877 {
14878 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
14879
14880 /* Create distinguished type nodes for NEON vector element types,
14881 and pointers to values of such types, so we can detect them later. */
14882 tree neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
14883 tree neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
14884 tree neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
14885 tree neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
14886 tree neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
14887 tree neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
14888 tree neon_float_type_node = make_node (REAL_TYPE);
14889 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
14890 layout_type (neon_float_type_node);
14891
14892 /* Define typedefs which exactly correspond to the modes we are basing vector
14893 types on. If you change these names you'll need to change
14894 the table used by arm_mangle_type too. */
14895 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
14896 "__builtin_neon_qi");
14897 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
14898 "__builtin_neon_hi");
14899 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
14900 "__builtin_neon_si");
14901 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
14902 "__builtin_neon_sf");
14903 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
14904 "__builtin_neon_di");
14905
14906 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
14907 "__builtin_neon_poly8");
14908 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
14909 "__builtin_neon_poly16");
14910
14911 tree intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
14912 tree intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
14913 tree intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
14914 tree intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
14915 tree float_pointer_node = build_pointer_type (neon_float_type_node);
14916
14917 /* Next create constant-qualified versions of the above types. */
14918 tree const_intQI_node = build_qualified_type (neon_intQI_type_node,
14919 TYPE_QUAL_CONST);
14920 tree const_intHI_node = build_qualified_type (neon_intHI_type_node,
14921 TYPE_QUAL_CONST);
14922 tree const_intSI_node = build_qualified_type (neon_intSI_type_node,
14923 TYPE_QUAL_CONST);
14924 tree const_intDI_node = build_qualified_type (neon_intDI_type_node,
14925 TYPE_QUAL_CONST);
14926 tree const_float_node = build_qualified_type (neon_float_type_node,
14927 TYPE_QUAL_CONST);
14928
14929 tree const_intQI_pointer_node = build_pointer_type (const_intQI_node);
14930 tree const_intHI_pointer_node = build_pointer_type (const_intHI_node);
14931 tree const_intSI_pointer_node = build_pointer_type (const_intSI_node);
14932 tree const_intDI_pointer_node = build_pointer_type (const_intDI_node);
14933 tree const_float_pointer_node = build_pointer_type (const_float_node);
14934
14935 /* Now create vector types based on our NEON element types. */
14936 /* 64-bit vectors. */
14937 tree V8QI_type_node =
14938 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
14939 tree V4HI_type_node =
14940 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
14941 tree V2SI_type_node =
14942 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
14943 tree V2SF_type_node =
14944 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
14945 /* 128-bit vectors. */
14946 tree V16QI_type_node =
14947 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
14948 tree V8HI_type_node =
14949 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
14950 tree V4SI_type_node =
14951 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
14952 tree V4SF_type_node =
14953 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
14954 tree V2DI_type_node =
14955 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
14956
14957 /* Unsigned integer types for various mode sizes. */
14958 tree intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
14959 tree intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
14960 tree intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
14961 tree intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
14962
14963 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
14964 "__builtin_neon_uqi");
14965 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
14966 "__builtin_neon_uhi");
14967 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
14968 "__builtin_neon_usi");
14969 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
14970 "__builtin_neon_udi");
14971
14972 /* Opaque integer types for structures of vectors. */
14973 tree intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
14974 tree intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
14975 tree intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
14976 tree intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
14977
14978 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
14979 "__builtin_neon_ti");
14980 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
14981 "__builtin_neon_ei");
14982 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
14983 "__builtin_neon_oi");
14984 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
14985 "__builtin_neon_ci");
14986 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
14987 "__builtin_neon_xi");
14988
14989 /* Pointers to vector types. */
14990 tree V8QI_pointer_node = build_pointer_type (V8QI_type_node);
14991 tree V4HI_pointer_node = build_pointer_type (V4HI_type_node);
14992 tree V2SI_pointer_node = build_pointer_type (V2SI_type_node);
14993 tree V2SF_pointer_node = build_pointer_type (V2SF_type_node);
14994 tree V16QI_pointer_node = build_pointer_type (V16QI_type_node);
14995 tree V8HI_pointer_node = build_pointer_type (V8HI_type_node);
14996 tree V4SI_pointer_node = build_pointer_type (V4SI_type_node);
14997 tree V4SF_pointer_node = build_pointer_type (V4SF_type_node);
14998 tree V2DI_pointer_node = build_pointer_type (V2DI_type_node);
14999
15000 /* Operations which return results as pairs. */
15001 tree void_ftype_pv8qi_v8qi_v8qi =
15002 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
15003 V8QI_type_node, NULL);
15004 tree void_ftype_pv4hi_v4hi_v4hi =
15005 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
15006 V4HI_type_node, NULL);
15007 tree void_ftype_pv2si_v2si_v2si =
15008 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
15009 V2SI_type_node, NULL);
15010 tree void_ftype_pv2sf_v2sf_v2sf =
15011 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
15012 V2SF_type_node, NULL);
15013 tree void_ftype_pdi_di_di =
15014 build_function_type_list (void_type_node, intDI_pointer_node,
15015 neon_intDI_type_node, neon_intDI_type_node, NULL);
15016 tree void_ftype_pv16qi_v16qi_v16qi =
15017 build_function_type_list (void_type_node, V16QI_pointer_node,
15018 V16QI_type_node, V16QI_type_node, NULL);
15019 tree void_ftype_pv8hi_v8hi_v8hi =
15020 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
15021 V8HI_type_node, NULL);
15022 tree void_ftype_pv4si_v4si_v4si =
15023 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
15024 V4SI_type_node, NULL);
15025 tree void_ftype_pv4sf_v4sf_v4sf =
15026 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
15027 V4SF_type_node, NULL);
15028 tree void_ftype_pv2di_v2di_v2di =
15029 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
15030 V2DI_type_node, NULL);
15031
15032 tree reinterp_ftype_dreg[5][5];
15033 tree reinterp_ftype_qreg[5][5];
15034 tree dreg_types[5], qreg_types[5];
15035
15036 dreg_types[0] = V8QI_type_node;
15037 dreg_types[1] = V4HI_type_node;
15038 dreg_types[2] = V2SI_type_node;
15039 dreg_types[3] = V2SF_type_node;
15040 dreg_types[4] = neon_intDI_type_node;
15041
15042 qreg_types[0] = V16QI_type_node;
15043 qreg_types[1] = V8HI_type_node;
15044 qreg_types[2] = V4SI_type_node;
15045 qreg_types[3] = V4SF_type_node;
15046 qreg_types[4] = V2DI_type_node;
15047
15048 for (i = 0; i < 5; i++)
15049 {
15050 int j;
15051 for (j = 0; j < 5; j++)
15052 {
15053 reinterp_ftype_dreg[i][j]
15054 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
15055 reinterp_ftype_qreg[i][j]
15056 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
15057 }
15058 }
15059
15060 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
15061 {
15062 neon_builtin_datum *d = &neon_builtin_data[i];
15063 unsigned int j, codeidx = 0;
15064
15065 d->base_fcode = fcode;
15066
15067 for (j = 0; j < T_MAX; j++)
15068 {
15069 const char* const modenames[] = {
15070 "v8qi", "v4hi", "v2si", "v2sf", "di",
15071 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
15072 };
15073 char namebuf[60];
15074 tree ftype = NULL;
15075 enum insn_code icode;
15076 int is_load = 0, is_store = 0;
15077
15078 if ((d->bits & (1 << j)) == 0)
15079 continue;
15080
15081 icode = d->codes[codeidx++];
15082
15083 switch (d->itype)
15084 {
15085 case NEON_LOAD1:
15086 case NEON_LOAD1LANE:
15087 case NEON_LOADSTRUCT:
15088 case NEON_LOADSTRUCTLANE:
15089 is_load = 1;
15090 /* Fall through. */
15091 case NEON_STORE1:
15092 case NEON_STORE1LANE:
15093 case NEON_STORESTRUCT:
15094 case NEON_STORESTRUCTLANE:
15095 if (!is_load)
15096 is_store = 1;
15097 /* Fall through. */
15098 case NEON_UNOP:
15099 case NEON_BINOP:
15100 case NEON_LOGICBINOP:
15101 case NEON_SHIFTINSERT:
15102 case NEON_TERNOP:
15103 case NEON_GETLANE:
15104 case NEON_SETLANE:
15105 case NEON_CREATE:
15106 case NEON_DUP:
15107 case NEON_DUPLANE:
15108 case NEON_SHIFTIMM:
15109 case NEON_SHIFTACC:
15110 case NEON_COMBINE:
15111 case NEON_SPLIT:
15112 case NEON_CONVERT:
15113 case NEON_FIXCONV:
15114 case NEON_LANEMUL:
15115 case NEON_LANEMULL:
15116 case NEON_LANEMULH:
15117 case NEON_LANEMAC:
15118 case NEON_SCALARMUL:
15119 case NEON_SCALARMULL:
15120 case NEON_SCALARMULH:
15121 case NEON_SCALARMAC:
15122 case NEON_SELECT:
15123 case NEON_VTBL:
15124 case NEON_VTBX:
15125 {
15126 int k;
15127 tree return_type = void_type_node, args = void_list_node;
15128
15129 /* Build a function type directly from the insn_data for this
15130 builtin. The build_function_type() function takes care of
15131 removing duplicates for us. */
15132 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
15133 {
15134 tree eltype;
15135
15136 if (is_load && k == 1)
15137 {
15138 /* Neon load patterns always have the memory operand
15139 (a SImode pointer) in the operand 1 position. We
15140 want a const pointer to the element type in that
15141 position. */
15142 gcc_assert (insn_data[icode].operand[k].mode == SImode);
15143
15144 switch (1 << j)
15145 {
15146 case T_V8QI:
15147 case T_V16QI:
15148 eltype = const_intQI_pointer_node;
15149 break;
15150
15151 case T_V4HI:
15152 case T_V8HI:
15153 eltype = const_intHI_pointer_node;
15154 break;
15155
15156 case T_V2SI:
15157 case T_V4SI:
15158 eltype = const_intSI_pointer_node;
15159 break;
15160
15161 case T_V2SF:
15162 case T_V4SF:
15163 eltype = const_float_pointer_node;
15164 break;
15165
15166 case T_DI:
15167 case T_V2DI:
15168 eltype = const_intDI_pointer_node;
15169 break;
15170
15171 default: gcc_unreachable ();
15172 }
15173 }
15174 else if (is_store && k == 0)
15175 {
15176 /* Similarly, Neon store patterns use operand 0 as
15177 the memory location to store to (a SImode pointer).
15178 Use a pointer to the element type of the store in
15179 that position. */
15180 gcc_assert (insn_data[icode].operand[k].mode == SImode);
15181
15182 switch (1 << j)
15183 {
15184 case T_V8QI:
15185 case T_V16QI:
15186 eltype = intQI_pointer_node;
15187 break;
15188
15189 case T_V4HI:
15190 case T_V8HI:
15191 eltype = intHI_pointer_node;
15192 break;
15193
15194 case T_V2SI:
15195 case T_V4SI:
15196 eltype = intSI_pointer_node;
15197 break;
15198
15199 case T_V2SF:
15200 case T_V4SF:
15201 eltype = float_pointer_node;
15202 break;
15203
15204 case T_DI:
15205 case T_V2DI:
15206 eltype = intDI_pointer_node;
15207 break;
15208
15209 default: gcc_unreachable ();
15210 }
15211 }
15212 else
15213 {
15214 switch (insn_data[icode].operand[k].mode)
15215 {
15216 case VOIDmode: eltype = void_type_node; break;
15217 /* Scalars. */
15218 case QImode: eltype = neon_intQI_type_node; break;
15219 case HImode: eltype = neon_intHI_type_node; break;
15220 case SImode: eltype = neon_intSI_type_node; break;
15221 case SFmode: eltype = neon_float_type_node; break;
15222 case DImode: eltype = neon_intDI_type_node; break;
15223 case TImode: eltype = intTI_type_node; break;
15224 case EImode: eltype = intEI_type_node; break;
15225 case OImode: eltype = intOI_type_node; break;
15226 case CImode: eltype = intCI_type_node; break;
15227 case XImode: eltype = intXI_type_node; break;
15228 /* 64-bit vectors. */
15229 case V8QImode: eltype = V8QI_type_node; break;
15230 case V4HImode: eltype = V4HI_type_node; break;
15231 case V2SImode: eltype = V2SI_type_node; break;
15232 case V2SFmode: eltype = V2SF_type_node; break;
15233 /* 128-bit vectors. */
15234 case V16QImode: eltype = V16QI_type_node; break;
15235 case V8HImode: eltype = V8HI_type_node; break;
15236 case V4SImode: eltype = V4SI_type_node; break;
15237 case V4SFmode: eltype = V4SF_type_node; break;
15238 case V2DImode: eltype = V2DI_type_node; break;
15239 default: gcc_unreachable ();
15240 }
15241 }
15242
15243 if (k == 0 && !is_store)
15244 return_type = eltype;
15245 else
15246 args = tree_cons (NULL_TREE, eltype, args);
15247 }
15248
15249 ftype = build_function_type (return_type, args);
15250 }
15251 break;
15252
15253 case NEON_RESULTPAIR:
15254 {
15255 switch (insn_data[icode].operand[1].mode)
15256 {
15257 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
15258 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
15259 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
15260 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
15261 case DImode: ftype = void_ftype_pdi_di_di; break;
15262 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
15263 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
15264 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
15265 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
15266 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
15267 default: gcc_unreachable ();
15268 }
15269 }
15270 break;
15271
15272 case NEON_REINTERP:
15273 {
15274 /* We iterate over 5 doubleword types, then 5 quadword
15275 types. */
15276 int rhs = j % 5;
15277 switch (insn_data[icode].operand[0].mode)
15278 {
15279 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
15280 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
15281 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
15282 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
15283 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
15284 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
15285 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
15286 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
15287 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
15288 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
15289 default: gcc_unreachable ();
15290 }
15291 }
15292 break;
15293
15294 default:
15295 gcc_unreachable ();
15296 }
15297
15298 gcc_assert (ftype != NULL);
15299
15300 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
15301
15302 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
15303 NULL_TREE);
15304 }
15305 }
15306 }
15307
15308 static void
15309 arm_init_builtins (void)
15310 {
15311 arm_init_tls_builtins ();
15312
15313 if (TARGET_REALLY_IWMMXT)
15314 arm_init_iwmmxt_builtins ();
15315
15316 if (TARGET_NEON)
15317 arm_init_neon_builtins ();
15318 }
15319
15320 /* Errors in the source file can cause expand_expr to return const0_rtx
15321 where we expect a vector. To avoid crashing, use one of the vector
15322 clear instructions. */
15323
15324 static rtx
15325 safe_vector_operand (rtx x, enum machine_mode mode)
15326 {
15327 if (x != const0_rtx)
15328 return x;
15329 x = gen_reg_rtx (mode);
15330
15331 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
15332 : gen_rtx_SUBREG (DImode, x, 0)));
15333 return x;
15334 }
15335
15336 /* Subroutine of arm_expand_builtin to take care of binop insns. */
15337
15338 static rtx
15339 arm_expand_binop_builtin (enum insn_code icode,
15340 tree exp, rtx target)
15341 {
15342 rtx pat;
15343 tree arg0 = CALL_EXPR_ARG (exp, 0);
15344 tree arg1 = CALL_EXPR_ARG (exp, 1);
15345 rtx op0 = expand_normal (arg0);
15346 rtx op1 = expand_normal (arg1);
15347 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15348 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15349 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15350
15351 if (VECTOR_MODE_P (mode0))
15352 op0 = safe_vector_operand (op0, mode0);
15353 if (VECTOR_MODE_P (mode1))
15354 op1 = safe_vector_operand (op1, mode1);
15355
15356 if (! target
15357 || GET_MODE (target) != tmode
15358 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15359 target = gen_reg_rtx (tmode);
15360
15361 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
15362
15363 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15364 op0 = copy_to_mode_reg (mode0, op0);
15365 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15366 op1 = copy_to_mode_reg (mode1, op1);
15367
15368 pat = GEN_FCN (icode) (target, op0, op1);
15369 if (! pat)
15370 return 0;
15371 emit_insn (pat);
15372 return target;
15373 }
15374
15375 /* Subroutine of arm_expand_builtin to take care of unop insns. */
15376
15377 static rtx
15378 arm_expand_unop_builtin (enum insn_code icode,
15379 tree exp, rtx target, int do_load)
15380 {
15381 rtx pat;
15382 tree arg0 = CALL_EXPR_ARG (exp, 0);
15383 rtx op0 = expand_normal (arg0);
15384 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15385 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15386
15387 if (! target
15388 || GET_MODE (target) != tmode
15389 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15390 target = gen_reg_rtx (tmode);
15391 if (do_load)
15392 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15393 else
15394 {
15395 if (VECTOR_MODE_P (mode0))
15396 op0 = safe_vector_operand (op0, mode0);
15397
15398 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15399 op0 = copy_to_mode_reg (mode0, op0);
15400 }
15401
15402 pat = GEN_FCN (icode) (target, op0);
15403 if (! pat)
15404 return 0;
15405 emit_insn (pat);
15406 return target;
15407 }
15408
15409 static int
15410 neon_builtin_compare (const void *a, const void *b)
15411 {
15412 const neon_builtin_datum *key = a;
15413 const neon_builtin_datum *memb = b;
15414 unsigned int soughtcode = key->base_fcode;
15415
15416 if (soughtcode >= memb->base_fcode
15417 && soughtcode < memb->base_fcode + memb->num_vars)
15418 return 0;
15419 else if (soughtcode < memb->base_fcode)
15420 return -1;
15421 else
15422 return 1;
15423 }
15424
15425 static enum insn_code
15426 locate_neon_builtin_icode (int fcode, neon_itype *itype)
15427 {
15428 neon_builtin_datum key, *found;
15429 int idx;
15430
15431 key.base_fcode = fcode;
15432 found = bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
15433 sizeof (neon_builtin_data[0]), neon_builtin_compare);
15434 gcc_assert (found);
15435 idx = fcode - (int) found->base_fcode;
15436 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
15437
15438 if (itype)
15439 *itype = found->itype;
15440
15441 return found->codes[idx];
15442 }
15443
15444 typedef enum {
15445 NEON_ARG_COPY_TO_REG,
15446 NEON_ARG_CONSTANT,
15447 NEON_ARG_STOP
15448 } builtin_arg;
15449
15450 #define NEON_MAX_BUILTIN_ARGS 5
15451
15452 /* Expand a Neon builtin. */
15453 static rtx
15454 arm_expand_neon_args (rtx target, int icode, int have_retval,
15455 tree exp, ...)
15456 {
15457 va_list ap;
15458 rtx pat;
15459 tree arg[NEON_MAX_BUILTIN_ARGS];
15460 rtx op[NEON_MAX_BUILTIN_ARGS];
15461 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15462 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
15463 int argc = 0;
15464
15465 if (have_retval
15466 && (!target
15467 || GET_MODE (target) != tmode
15468 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
15469 target = gen_reg_rtx (tmode);
15470
15471 va_start (ap, exp);
15472
15473 for (;;)
15474 {
15475 builtin_arg thisarg = va_arg (ap, int);
15476
15477 if (thisarg == NEON_ARG_STOP)
15478 break;
15479 else
15480 {
15481 arg[argc] = CALL_EXPR_ARG (exp, argc);
15482 op[argc] = expand_normal (arg[argc]);
15483 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
15484
15485 switch (thisarg)
15486 {
15487 case NEON_ARG_COPY_TO_REG:
15488 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
15489 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
15490 (op[argc], mode[argc]))
15491 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
15492 break;
15493
15494 case NEON_ARG_CONSTANT:
15495 /* FIXME: This error message is somewhat unhelpful. */
15496 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
15497 (op[argc], mode[argc]))
15498 error ("argument must be a constant");
15499 break;
15500
15501 case NEON_ARG_STOP:
15502 gcc_unreachable ();
15503 }
15504
15505 argc++;
15506 }
15507 }
15508
15509 va_end (ap);
15510
15511 if (have_retval)
15512 switch (argc)
15513 {
15514 case 1:
15515 pat = GEN_FCN (icode) (target, op[0]);
15516 break;
15517
15518 case 2:
15519 pat = GEN_FCN (icode) (target, op[0], op[1]);
15520 break;
15521
15522 case 3:
15523 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
15524 break;
15525
15526 case 4:
15527 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
15528 break;
15529
15530 case 5:
15531 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
15532 break;
15533
15534 default:
15535 gcc_unreachable ();
15536 }
15537 else
15538 switch (argc)
15539 {
15540 case 1:
15541 pat = GEN_FCN (icode) (op[0]);
15542 break;
15543
15544 case 2:
15545 pat = GEN_FCN (icode) (op[0], op[1]);
15546 break;
15547
15548 case 3:
15549 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
15550 break;
15551
15552 case 4:
15553 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
15554 break;
15555
15556 case 5:
15557 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
15558 break;
15559
15560 default:
15561 gcc_unreachable ();
15562 }
15563
15564 if (!pat)
15565 return 0;
15566
15567 emit_insn (pat);
15568
15569 return target;
15570 }
15571
15572 /* Expand a Neon builtin. These are "special" because they don't have symbolic
15573 constants defined per-instruction or per instruction-variant. Instead, the
15574 required info is looked up in the table neon_builtin_data. */
15575 static rtx
15576 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
15577 {
15578 neon_itype itype;
15579 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
15580
15581 switch (itype)
15582 {
15583 case NEON_UNOP:
15584 case NEON_CONVERT:
15585 case NEON_DUPLANE:
15586 return arm_expand_neon_args (target, icode, 1, exp,
15587 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
15588
15589 case NEON_BINOP:
15590 case NEON_SETLANE:
15591 case NEON_SCALARMUL:
15592 case NEON_SCALARMULL:
15593 case NEON_SCALARMULH:
15594 case NEON_SHIFTINSERT:
15595 case NEON_LOGICBINOP:
15596 return arm_expand_neon_args (target, icode, 1, exp,
15597 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15598 NEON_ARG_STOP);
15599
15600 case NEON_TERNOP:
15601 return arm_expand_neon_args (target, icode, 1, exp,
15602 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15603 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15604
15605 case NEON_GETLANE:
15606 case NEON_FIXCONV:
15607 case NEON_SHIFTIMM:
15608 return arm_expand_neon_args (target, icode, 1, exp,
15609 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
15610 NEON_ARG_STOP);
15611
15612 case NEON_CREATE:
15613 return arm_expand_neon_args (target, icode, 1, exp,
15614 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15615
15616 case NEON_DUP:
15617 case NEON_SPLIT:
15618 case NEON_REINTERP:
15619 return arm_expand_neon_args (target, icode, 1, exp,
15620 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15621
15622 case NEON_COMBINE:
15623 case NEON_VTBL:
15624 return arm_expand_neon_args (target, icode, 1, exp,
15625 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15626
15627 case NEON_RESULTPAIR:
15628 return arm_expand_neon_args (target, icode, 0, exp,
15629 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15630 NEON_ARG_STOP);
15631
15632 case NEON_LANEMUL:
15633 case NEON_LANEMULL:
15634 case NEON_LANEMULH:
15635 return arm_expand_neon_args (target, icode, 1, exp,
15636 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15637 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15638
15639 case NEON_LANEMAC:
15640 return arm_expand_neon_args (target, icode, 1, exp,
15641 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15642 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
15643
15644 case NEON_SHIFTACC:
15645 return arm_expand_neon_args (target, icode, 1, exp,
15646 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15647 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15648
15649 case NEON_SCALARMAC:
15650 return arm_expand_neon_args (target, icode, 1, exp,
15651 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15652 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15653
15654 case NEON_SELECT:
15655 case NEON_VTBX:
15656 return arm_expand_neon_args (target, icode, 1, exp,
15657 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15658 NEON_ARG_STOP);
15659
15660 case NEON_LOAD1:
15661 case NEON_LOADSTRUCT:
15662 return arm_expand_neon_args (target, icode, 1, exp,
15663 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15664
15665 case NEON_LOAD1LANE:
15666 case NEON_LOADSTRUCTLANE:
15667 return arm_expand_neon_args (target, icode, 1, exp,
15668 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15669 NEON_ARG_STOP);
15670
15671 case NEON_STORE1:
15672 case NEON_STORESTRUCT:
15673 return arm_expand_neon_args (target, icode, 0, exp,
15674 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15675
15676 case NEON_STORE1LANE:
15677 case NEON_STORESTRUCTLANE:
15678 return arm_expand_neon_args (target, icode, 0, exp,
15679 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15680 NEON_ARG_STOP);
15681 }
15682
15683 gcc_unreachable ();
15684 }
15685
15686 /* Emit code to reinterpret one Neon type as another, without altering bits. */
15687 void
15688 neon_reinterpret (rtx dest, rtx src)
15689 {
15690 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
15691 }
15692
15693 /* Emit code to place a Neon pair result in memory locations (with equal
15694 registers). */
15695 void
15696 neon_emit_pair_result_insn (enum machine_mode mode,
15697 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
15698 rtx op1, rtx op2)
15699 {
15700 rtx mem = gen_rtx_MEM (mode, destaddr);
15701 rtx tmp1 = gen_reg_rtx (mode);
15702 rtx tmp2 = gen_reg_rtx (mode);
15703
15704 emit_insn (intfn (tmp1, op1, tmp2, op2));
15705
15706 emit_move_insn (mem, tmp1);
15707 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
15708 emit_move_insn (mem, tmp2);
15709 }
15710
15711 /* Set up operands for a register copy from src to dest, taking care not to
15712 clobber registers in the process.
15713 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
15714 be called with a large N, so that should be OK. */
15715
15716 void
15717 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
15718 {
15719 unsigned int copied = 0, opctr = 0;
15720 unsigned int done = (1 << count) - 1;
15721 unsigned int i, j;
15722
15723 while (copied != done)
15724 {
15725 for (i = 0; i < count; i++)
15726 {
15727 int good = 1;
15728
15729 for (j = 0; good && j < count; j++)
15730 if (i != j && (copied & (1 << j)) == 0
15731 && reg_overlap_mentioned_p (src[j], dest[i]))
15732 good = 0;
15733
15734 if (good)
15735 {
15736 operands[opctr++] = dest[i];
15737 operands[opctr++] = src[i];
15738 copied |= 1 << i;
15739 }
15740 }
15741 }
15742
15743 gcc_assert (opctr == count * 2);
15744 }
15745
15746 /* Expand an expression EXP that calls a built-in function,
15747 with result going to TARGET if that's convenient
15748 (and in mode MODE if that's convenient).
15749 SUBTARGET may be used as the target for computing one of EXP's operands.
15750 IGNORE is nonzero if the value is to be ignored. */
15751
15752 static rtx
15753 arm_expand_builtin (tree exp,
15754 rtx target,
15755 rtx subtarget ATTRIBUTE_UNUSED,
15756 enum machine_mode mode ATTRIBUTE_UNUSED,
15757 int ignore ATTRIBUTE_UNUSED)
15758 {
15759 const struct builtin_description * d;
15760 enum insn_code icode;
15761 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15762 tree arg0;
15763 tree arg1;
15764 tree arg2;
15765 rtx op0;
15766 rtx op1;
15767 rtx op2;
15768 rtx pat;
15769 int fcode = DECL_FUNCTION_CODE (fndecl);
15770 size_t i;
15771 enum machine_mode tmode;
15772 enum machine_mode mode0;
15773 enum machine_mode mode1;
15774 enum machine_mode mode2;
15775
15776 if (fcode >= ARM_BUILTIN_NEON_BASE)
15777 return arm_expand_neon_builtin (fcode, exp, target);
15778
15779 switch (fcode)
15780 {
15781 case ARM_BUILTIN_TEXTRMSB:
15782 case ARM_BUILTIN_TEXTRMUB:
15783 case ARM_BUILTIN_TEXTRMSH:
15784 case ARM_BUILTIN_TEXTRMUH:
15785 case ARM_BUILTIN_TEXTRMSW:
15786 case ARM_BUILTIN_TEXTRMUW:
15787 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
15788 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
15789 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
15790 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
15791 : CODE_FOR_iwmmxt_textrmw);
15792
15793 arg0 = CALL_EXPR_ARG (exp, 0);
15794 arg1 = CALL_EXPR_ARG (exp, 1);
15795 op0 = expand_normal (arg0);
15796 op1 = expand_normal (arg1);
15797 tmode = insn_data[icode].operand[0].mode;
15798 mode0 = insn_data[icode].operand[1].mode;
15799 mode1 = insn_data[icode].operand[2].mode;
15800
15801 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15802 op0 = copy_to_mode_reg (mode0, op0);
15803 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15804 {
15805 /* @@@ better error message */
15806 error ("selector must be an immediate");
15807 return gen_reg_rtx (tmode);
15808 }
15809 if (target == 0
15810 || GET_MODE (target) != tmode
15811 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15812 target = gen_reg_rtx (tmode);
15813 pat = GEN_FCN (icode) (target, op0, op1);
15814 if (! pat)
15815 return 0;
15816 emit_insn (pat);
15817 return target;
15818
15819 case ARM_BUILTIN_TINSRB:
15820 case ARM_BUILTIN_TINSRH:
15821 case ARM_BUILTIN_TINSRW:
15822 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
15823 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
15824 : CODE_FOR_iwmmxt_tinsrw);
15825 arg0 = CALL_EXPR_ARG (exp, 0);
15826 arg1 = CALL_EXPR_ARG (exp, 1);
15827 arg2 = CALL_EXPR_ARG (exp, 2);
15828 op0 = expand_normal (arg0);
15829 op1 = expand_normal (arg1);
15830 op2 = expand_normal (arg2);
15831 tmode = insn_data[icode].operand[0].mode;
15832 mode0 = insn_data[icode].operand[1].mode;
15833 mode1 = insn_data[icode].operand[2].mode;
15834 mode2 = insn_data[icode].operand[3].mode;
15835
15836 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15837 op0 = copy_to_mode_reg (mode0, op0);
15838 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15839 op1 = copy_to_mode_reg (mode1, op1);
15840 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15841 {
15842 /* @@@ better error message */
15843 error ("selector must be an immediate");
15844 return const0_rtx;
15845 }
15846 if (target == 0
15847 || GET_MODE (target) != tmode
15848 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15849 target = gen_reg_rtx (tmode);
15850 pat = GEN_FCN (icode) (target, op0, op1, op2);
15851 if (! pat)
15852 return 0;
15853 emit_insn (pat);
15854 return target;
15855
15856 case ARM_BUILTIN_SETWCX:
15857 arg0 = CALL_EXPR_ARG (exp, 0);
15858 arg1 = CALL_EXPR_ARG (exp, 1);
15859 op0 = force_reg (SImode, expand_normal (arg0));
15860 op1 = expand_normal (arg1);
15861 emit_insn (gen_iwmmxt_tmcr (op1, op0));
15862 return 0;
15863
15864 case ARM_BUILTIN_GETWCX:
15865 arg0 = CALL_EXPR_ARG (exp, 0);
15866 op0 = expand_normal (arg0);
15867 target = gen_reg_rtx (SImode);
15868 emit_insn (gen_iwmmxt_tmrc (target, op0));
15869 return target;
15870
15871 case ARM_BUILTIN_WSHUFH:
15872 icode = CODE_FOR_iwmmxt_wshufh;
15873 arg0 = CALL_EXPR_ARG (exp, 0);
15874 arg1 = CALL_EXPR_ARG (exp, 1);
15875 op0 = expand_normal (arg0);
15876 op1 = expand_normal (arg1);
15877 tmode = insn_data[icode].operand[0].mode;
15878 mode1 = insn_data[icode].operand[1].mode;
15879 mode2 = insn_data[icode].operand[2].mode;
15880
15881 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15882 op0 = copy_to_mode_reg (mode1, op0);
15883 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
15884 {
15885 /* @@@ better error message */
15886 error ("mask must be an immediate");
15887 return const0_rtx;
15888 }
15889 if (target == 0
15890 || GET_MODE (target) != tmode
15891 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15892 target = gen_reg_rtx (tmode);
15893 pat = GEN_FCN (icode) (target, op0, op1);
15894 if (! pat)
15895 return 0;
15896 emit_insn (pat);
15897 return target;
15898
15899 case ARM_BUILTIN_WSADB:
15900 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
15901 case ARM_BUILTIN_WSADH:
15902 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
15903 case ARM_BUILTIN_WSADBZ:
15904 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
15905 case ARM_BUILTIN_WSADHZ:
15906 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
15907
15908 /* Several three-argument builtins. */
15909 case ARM_BUILTIN_WMACS:
15910 case ARM_BUILTIN_WMACU:
15911 case ARM_BUILTIN_WALIGN:
15912 case ARM_BUILTIN_TMIA:
15913 case ARM_BUILTIN_TMIAPH:
15914 case ARM_BUILTIN_TMIATT:
15915 case ARM_BUILTIN_TMIATB:
15916 case ARM_BUILTIN_TMIABT:
15917 case ARM_BUILTIN_TMIABB:
15918 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
15919 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
15920 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
15921 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
15922 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
15923 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
15924 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
15925 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
15926 : CODE_FOR_iwmmxt_walign);
15927 arg0 = CALL_EXPR_ARG (exp, 0);
15928 arg1 = CALL_EXPR_ARG (exp, 1);
15929 arg2 = CALL_EXPR_ARG (exp, 2);
15930 op0 = expand_normal (arg0);
15931 op1 = expand_normal (arg1);
15932 op2 = expand_normal (arg2);
15933 tmode = insn_data[icode].operand[0].mode;
15934 mode0 = insn_data[icode].operand[1].mode;
15935 mode1 = insn_data[icode].operand[2].mode;
15936 mode2 = insn_data[icode].operand[3].mode;
15937
15938 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15939 op0 = copy_to_mode_reg (mode0, op0);
15940 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15941 op1 = copy_to_mode_reg (mode1, op1);
15942 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15943 op2 = copy_to_mode_reg (mode2, op2);
15944 if (target == 0
15945 || GET_MODE (target) != tmode
15946 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15947 target = gen_reg_rtx (tmode);
15948 pat = GEN_FCN (icode) (target, op0, op1, op2);
15949 if (! pat)
15950 return 0;
15951 emit_insn (pat);
15952 return target;
15953
15954 case ARM_BUILTIN_WZERO:
15955 target = gen_reg_rtx (DImode);
15956 emit_insn (gen_iwmmxt_clrdi (target));
15957 return target;
15958
15959 case ARM_BUILTIN_THREAD_POINTER:
15960 return arm_load_tp (target);
15961
15962 default:
15963 break;
15964 }
15965
15966 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15967 if (d->code == (const enum arm_builtins) fcode)
15968 return arm_expand_binop_builtin (d->icode, exp, target);
15969
15970 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15971 if (d->code == (const enum arm_builtins) fcode)
15972 return arm_expand_unop_builtin (d->icode, exp, target, 0);
15973
15974 /* @@@ Should really do something sensible here. */
15975 return NULL_RTX;
15976 }
15977 \f
15978 /* Return the number (counting from 0) of
15979 the least significant set bit in MASK. */
15980
15981 inline static int
15982 number_of_first_bit_set (unsigned mask)
15983 {
15984 int bit;
15985
15986 for (bit = 0;
15987 (mask & (1 << bit)) == 0;
15988 ++bit)
15989 continue;
15990
15991 return bit;
15992 }
15993
15994 /* Emit code to push or pop registers to or from the stack. F is the
15995 assembly file. MASK is the registers to push or pop. PUSH is
15996 nonzero if we should push, and zero if we should pop. For debugging
15997 output, if pushing, adjust CFA_OFFSET by the amount of space added
15998 to the stack. REAL_REGS should have the same number of bits set as
15999 MASK, and will be used instead (in the same order) to describe which
16000 registers were saved - this is used to mark the save slots when we
16001 push high registers after moving them to low registers. */
16002 static void
16003 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
16004 unsigned long real_regs)
16005 {
16006 int regno;
16007 int lo_mask = mask & 0xFF;
16008 int pushed_words = 0;
16009
16010 gcc_assert (mask);
16011
16012 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
16013 {
16014 /* Special case. Do not generate a POP PC statement here, do it in
16015 thumb_exit() */
16016 thumb_exit (f, -1);
16017 return;
16018 }
16019
16020 if (ARM_EABI_UNWIND_TABLES && push)
16021 {
16022 fprintf (f, "\t.save\t{");
16023 for (regno = 0; regno < 15; regno++)
16024 {
16025 if (real_regs & (1 << regno))
16026 {
16027 if (real_regs & ((1 << regno) -1))
16028 fprintf (f, ", ");
16029 asm_fprintf (f, "%r", regno);
16030 }
16031 }
16032 fprintf (f, "}\n");
16033 }
16034
16035 fprintf (f, "\t%s\t{", push ? "push" : "pop");
16036
16037 /* Look at the low registers first. */
16038 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
16039 {
16040 if (lo_mask & 1)
16041 {
16042 asm_fprintf (f, "%r", regno);
16043
16044 if ((lo_mask & ~1) != 0)
16045 fprintf (f, ", ");
16046
16047 pushed_words++;
16048 }
16049 }
16050
16051 if (push && (mask & (1 << LR_REGNUM)))
16052 {
16053 /* Catch pushing the LR. */
16054 if (mask & 0xFF)
16055 fprintf (f, ", ");
16056
16057 asm_fprintf (f, "%r", LR_REGNUM);
16058
16059 pushed_words++;
16060 }
16061 else if (!push && (mask & (1 << PC_REGNUM)))
16062 {
16063 /* Catch popping the PC. */
16064 if (TARGET_INTERWORK || TARGET_BACKTRACE
16065 || current_function_calls_eh_return)
16066 {
16067 /* The PC is never poped directly, instead
16068 it is popped into r3 and then BX is used. */
16069 fprintf (f, "}\n");
16070
16071 thumb_exit (f, -1);
16072
16073 return;
16074 }
16075 else
16076 {
16077 if (mask & 0xFF)
16078 fprintf (f, ", ");
16079
16080 asm_fprintf (f, "%r", PC_REGNUM);
16081 }
16082 }
16083
16084 fprintf (f, "}\n");
16085
16086 if (push && pushed_words && dwarf2out_do_frame ())
16087 {
16088 char *l = dwarf2out_cfi_label ();
16089 int pushed_mask = real_regs;
16090
16091 *cfa_offset += pushed_words * 4;
16092 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
16093
16094 pushed_words = 0;
16095 pushed_mask = real_regs;
16096 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
16097 {
16098 if (pushed_mask & 1)
16099 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
16100 }
16101 }
16102 }
16103
16104 /* Generate code to return from a thumb function.
16105 If 'reg_containing_return_addr' is -1, then the return address is
16106 actually on the stack, at the stack pointer. */
16107 static void
16108 thumb_exit (FILE *f, int reg_containing_return_addr)
16109 {
16110 unsigned regs_available_for_popping;
16111 unsigned regs_to_pop;
16112 int pops_needed;
16113 unsigned available;
16114 unsigned required;
16115 int mode;
16116 int size;
16117 int restore_a4 = FALSE;
16118
16119 /* Compute the registers we need to pop. */
16120 regs_to_pop = 0;
16121 pops_needed = 0;
16122
16123 if (reg_containing_return_addr == -1)
16124 {
16125 regs_to_pop |= 1 << LR_REGNUM;
16126 ++pops_needed;
16127 }
16128
16129 if (TARGET_BACKTRACE)
16130 {
16131 /* Restore the (ARM) frame pointer and stack pointer. */
16132 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
16133 pops_needed += 2;
16134 }
16135
16136 /* If there is nothing to pop then just emit the BX instruction and
16137 return. */
16138 if (pops_needed == 0)
16139 {
16140 if (current_function_calls_eh_return)
16141 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
16142
16143 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
16144 return;
16145 }
16146 /* Otherwise if we are not supporting interworking and we have not created
16147 a backtrace structure and the function was not entered in ARM mode then
16148 just pop the return address straight into the PC. */
16149 else if (!TARGET_INTERWORK
16150 && !TARGET_BACKTRACE
16151 && !is_called_in_ARM_mode (current_function_decl)
16152 && !current_function_calls_eh_return)
16153 {
16154 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
16155 return;
16156 }
16157
16158 /* Find out how many of the (return) argument registers we can corrupt. */
16159 regs_available_for_popping = 0;
16160
16161 /* If returning via __builtin_eh_return, the bottom three registers
16162 all contain information needed for the return. */
16163 if (current_function_calls_eh_return)
16164 size = 12;
16165 else
16166 {
16167 /* If we can deduce the registers used from the function's
16168 return value. This is more reliable that examining
16169 df_regs_ever_live_p () because that will be set if the register is
16170 ever used in the function, not just if the register is used
16171 to hold a return value. */
16172
16173 if (current_function_return_rtx != 0)
16174 mode = GET_MODE (current_function_return_rtx);
16175 else
16176 mode = DECL_MODE (DECL_RESULT (current_function_decl));
16177
16178 size = GET_MODE_SIZE (mode);
16179
16180 if (size == 0)
16181 {
16182 /* In a void function we can use any argument register.
16183 In a function that returns a structure on the stack
16184 we can use the second and third argument registers. */
16185 if (mode == VOIDmode)
16186 regs_available_for_popping =
16187 (1 << ARG_REGISTER (1))
16188 | (1 << ARG_REGISTER (2))
16189 | (1 << ARG_REGISTER (3));
16190 else
16191 regs_available_for_popping =
16192 (1 << ARG_REGISTER (2))
16193 | (1 << ARG_REGISTER (3));
16194 }
16195 else if (size <= 4)
16196 regs_available_for_popping =
16197 (1 << ARG_REGISTER (2))
16198 | (1 << ARG_REGISTER (3));
16199 else if (size <= 8)
16200 regs_available_for_popping =
16201 (1 << ARG_REGISTER (3));
16202 }
16203
16204 /* Match registers to be popped with registers into which we pop them. */
16205 for (available = regs_available_for_popping,
16206 required = regs_to_pop;
16207 required != 0 && available != 0;
16208 available &= ~(available & - available),
16209 required &= ~(required & - required))
16210 -- pops_needed;
16211
16212 /* If we have any popping registers left over, remove them. */
16213 if (available > 0)
16214 regs_available_for_popping &= ~available;
16215
16216 /* Otherwise if we need another popping register we can use
16217 the fourth argument register. */
16218 else if (pops_needed)
16219 {
16220 /* If we have not found any free argument registers and
16221 reg a4 contains the return address, we must move it. */
16222 if (regs_available_for_popping == 0
16223 && reg_containing_return_addr == LAST_ARG_REGNUM)
16224 {
16225 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
16226 reg_containing_return_addr = LR_REGNUM;
16227 }
16228 else if (size > 12)
16229 {
16230 /* Register a4 is being used to hold part of the return value,
16231 but we have dire need of a free, low register. */
16232 restore_a4 = TRUE;
16233
16234 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
16235 }
16236
16237 if (reg_containing_return_addr != LAST_ARG_REGNUM)
16238 {
16239 /* The fourth argument register is available. */
16240 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
16241
16242 --pops_needed;
16243 }
16244 }
16245
16246 /* Pop as many registers as we can. */
16247 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16248 regs_available_for_popping);
16249
16250 /* Process the registers we popped. */
16251 if (reg_containing_return_addr == -1)
16252 {
16253 /* The return address was popped into the lowest numbered register. */
16254 regs_to_pop &= ~(1 << LR_REGNUM);
16255
16256 reg_containing_return_addr =
16257 number_of_first_bit_set (regs_available_for_popping);
16258
16259 /* Remove this register for the mask of available registers, so that
16260 the return address will not be corrupted by further pops. */
16261 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
16262 }
16263
16264 /* If we popped other registers then handle them here. */
16265 if (regs_available_for_popping)
16266 {
16267 int frame_pointer;
16268
16269 /* Work out which register currently contains the frame pointer. */
16270 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
16271
16272 /* Move it into the correct place. */
16273 asm_fprintf (f, "\tmov\t%r, %r\n",
16274 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
16275
16276 /* (Temporarily) remove it from the mask of popped registers. */
16277 regs_available_for_popping &= ~(1 << frame_pointer);
16278 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
16279
16280 if (regs_available_for_popping)
16281 {
16282 int stack_pointer;
16283
16284 /* We popped the stack pointer as well,
16285 find the register that contains it. */
16286 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
16287
16288 /* Move it into the stack register. */
16289 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
16290
16291 /* At this point we have popped all necessary registers, so
16292 do not worry about restoring regs_available_for_popping
16293 to its correct value:
16294
16295 assert (pops_needed == 0)
16296 assert (regs_available_for_popping == (1 << frame_pointer))
16297 assert (regs_to_pop == (1 << STACK_POINTER)) */
16298 }
16299 else
16300 {
16301 /* Since we have just move the popped value into the frame
16302 pointer, the popping register is available for reuse, and
16303 we know that we still have the stack pointer left to pop. */
16304 regs_available_for_popping |= (1 << frame_pointer);
16305 }
16306 }
16307
16308 /* If we still have registers left on the stack, but we no longer have
16309 any registers into which we can pop them, then we must move the return
16310 address into the link register and make available the register that
16311 contained it. */
16312 if (regs_available_for_popping == 0 && pops_needed > 0)
16313 {
16314 regs_available_for_popping |= 1 << reg_containing_return_addr;
16315
16316 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
16317 reg_containing_return_addr);
16318
16319 reg_containing_return_addr = LR_REGNUM;
16320 }
16321
16322 /* If we have registers left on the stack then pop some more.
16323 We know that at most we will want to pop FP and SP. */
16324 if (pops_needed > 0)
16325 {
16326 int popped_into;
16327 int move_to;
16328
16329 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16330 regs_available_for_popping);
16331
16332 /* We have popped either FP or SP.
16333 Move whichever one it is into the correct register. */
16334 popped_into = number_of_first_bit_set (regs_available_for_popping);
16335 move_to = number_of_first_bit_set (regs_to_pop);
16336
16337 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
16338
16339 regs_to_pop &= ~(1 << move_to);
16340
16341 --pops_needed;
16342 }
16343
16344 /* If we still have not popped everything then we must have only
16345 had one register available to us and we are now popping the SP. */
16346 if (pops_needed > 0)
16347 {
16348 int popped_into;
16349
16350 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16351 regs_available_for_popping);
16352
16353 popped_into = number_of_first_bit_set (regs_available_for_popping);
16354
16355 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
16356 /*
16357 assert (regs_to_pop == (1 << STACK_POINTER))
16358 assert (pops_needed == 1)
16359 */
16360 }
16361
16362 /* If necessary restore the a4 register. */
16363 if (restore_a4)
16364 {
16365 if (reg_containing_return_addr != LR_REGNUM)
16366 {
16367 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
16368 reg_containing_return_addr = LR_REGNUM;
16369 }
16370
16371 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
16372 }
16373
16374 if (current_function_calls_eh_return)
16375 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
16376
16377 /* Return to caller. */
16378 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
16379 }
16380
16381 \f
16382 void
16383 thumb1_final_prescan_insn (rtx insn)
16384 {
16385 if (flag_print_asm_name)
16386 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
16387 INSN_ADDRESSES (INSN_UID (insn)));
16388 }
16389
16390 int
16391 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
16392 {
16393 unsigned HOST_WIDE_INT mask = 0xff;
16394 int i;
16395
16396 if (val == 0) /* XXX */
16397 return 0;
16398
16399 for (i = 0; i < 25; i++)
16400 if ((val & (mask << i)) == val)
16401 return 1;
16402
16403 return 0;
16404 }
16405
16406 /* Returns nonzero if the current function contains,
16407 or might contain a far jump. */
16408 static int
16409 thumb_far_jump_used_p (void)
16410 {
16411 rtx insn;
16412
16413 /* This test is only important for leaf functions. */
16414 /* assert (!leaf_function_p ()); */
16415
16416 /* If we have already decided that far jumps may be used,
16417 do not bother checking again, and always return true even if
16418 it turns out that they are not being used. Once we have made
16419 the decision that far jumps are present (and that hence the link
16420 register will be pushed onto the stack) we cannot go back on it. */
16421 if (cfun->machine->far_jump_used)
16422 return 1;
16423
16424 /* If this function is not being called from the prologue/epilogue
16425 generation code then it must be being called from the
16426 INITIAL_ELIMINATION_OFFSET macro. */
16427 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
16428 {
16429 /* In this case we know that we are being asked about the elimination
16430 of the arg pointer register. If that register is not being used,
16431 then there are no arguments on the stack, and we do not have to
16432 worry that a far jump might force the prologue to push the link
16433 register, changing the stack offsets. In this case we can just
16434 return false, since the presence of far jumps in the function will
16435 not affect stack offsets.
16436
16437 If the arg pointer is live (or if it was live, but has now been
16438 eliminated and so set to dead) then we do have to test to see if
16439 the function might contain a far jump. This test can lead to some
16440 false negatives, since before reload is completed, then length of
16441 branch instructions is not known, so gcc defaults to returning their
16442 longest length, which in turn sets the far jump attribute to true.
16443
16444 A false negative will not result in bad code being generated, but it
16445 will result in a needless push and pop of the link register. We
16446 hope that this does not occur too often.
16447
16448 If we need doubleword stack alignment this could affect the other
16449 elimination offsets so we can't risk getting it wrong. */
16450 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
16451 cfun->machine->arg_pointer_live = 1;
16452 else if (!cfun->machine->arg_pointer_live)
16453 return 0;
16454 }
16455
16456 /* Check to see if the function contains a branch
16457 insn with the far jump attribute set. */
16458 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16459 {
16460 if (GET_CODE (insn) == JUMP_INSN
16461 /* Ignore tablejump patterns. */
16462 && GET_CODE (PATTERN (insn)) != ADDR_VEC
16463 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
16464 && get_attr_far_jump (insn) == FAR_JUMP_YES
16465 )
16466 {
16467 /* Record the fact that we have decided that
16468 the function does use far jumps. */
16469 cfun->machine->far_jump_used = 1;
16470 return 1;
16471 }
16472 }
16473
16474 return 0;
16475 }
16476
16477 /* Return nonzero if FUNC must be entered in ARM mode. */
16478 int
16479 is_called_in_ARM_mode (tree func)
16480 {
16481 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
16482
16483 /* Ignore the problem about functions whose address is taken. */
16484 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
16485 return TRUE;
16486
16487 #ifdef ARM_PE
16488 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
16489 #else
16490 return FALSE;
16491 #endif
16492 }
16493
16494 /* The bits which aren't usefully expanded as rtl. */
16495 const char *
16496 thumb_unexpanded_epilogue (void)
16497 {
16498 int regno;
16499 unsigned long live_regs_mask = 0;
16500 int high_regs_pushed = 0;
16501 int had_to_push_lr;
16502 int size;
16503
16504 if (return_used_this_function)
16505 return "";
16506
16507 if (IS_NAKED (arm_current_func_type ()))
16508 return "";
16509
16510 live_regs_mask = thumb1_compute_save_reg_mask ();
16511 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
16512
16513 /* If we can deduce the registers used from the function's return value.
16514 This is more reliable that examining df_regs_ever_live_p () because that
16515 will be set if the register is ever used in the function, not just if
16516 the register is used to hold a return value. */
16517 size = arm_size_return_regs ();
16518
16519 /* The prolog may have pushed some high registers to use as
16520 work registers. e.g. the testsuite file:
16521 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
16522 compiles to produce:
16523 push {r4, r5, r6, r7, lr}
16524 mov r7, r9
16525 mov r6, r8
16526 push {r6, r7}
16527 as part of the prolog. We have to undo that pushing here. */
16528
16529 if (high_regs_pushed)
16530 {
16531 unsigned long mask = live_regs_mask & 0xff;
16532 int next_hi_reg;
16533
16534 /* The available low registers depend on the size of the value we are
16535 returning. */
16536 if (size <= 12)
16537 mask |= 1 << 3;
16538 if (size <= 8)
16539 mask |= 1 << 2;
16540
16541 if (mask == 0)
16542 /* Oh dear! We have no low registers into which we can pop
16543 high registers! */
16544 internal_error
16545 ("no low registers available for popping high registers");
16546
16547 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
16548 if (live_regs_mask & (1 << next_hi_reg))
16549 break;
16550
16551 while (high_regs_pushed)
16552 {
16553 /* Find lo register(s) into which the high register(s) can
16554 be popped. */
16555 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
16556 {
16557 if (mask & (1 << regno))
16558 high_regs_pushed--;
16559 if (high_regs_pushed == 0)
16560 break;
16561 }
16562
16563 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
16564
16565 /* Pop the values into the low register(s). */
16566 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
16567
16568 /* Move the value(s) into the high registers. */
16569 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
16570 {
16571 if (mask & (1 << regno))
16572 {
16573 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
16574 regno);
16575
16576 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
16577 if (live_regs_mask & (1 << next_hi_reg))
16578 break;
16579 }
16580 }
16581 }
16582 live_regs_mask &= ~0x0f00;
16583 }
16584
16585 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
16586 live_regs_mask &= 0xff;
16587
16588 if (current_function_pretend_args_size == 0 || TARGET_BACKTRACE)
16589 {
16590 /* Pop the return address into the PC. */
16591 if (had_to_push_lr)
16592 live_regs_mask |= 1 << PC_REGNUM;
16593
16594 /* Either no argument registers were pushed or a backtrace
16595 structure was created which includes an adjusted stack
16596 pointer, so just pop everything. */
16597 if (live_regs_mask)
16598 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
16599 live_regs_mask);
16600
16601 /* We have either just popped the return address into the
16602 PC or it is was kept in LR for the entire function. */
16603 if (!had_to_push_lr)
16604 thumb_exit (asm_out_file, LR_REGNUM);
16605 }
16606 else
16607 {
16608 /* Pop everything but the return address. */
16609 if (live_regs_mask)
16610 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
16611 live_regs_mask);
16612
16613 if (had_to_push_lr)
16614 {
16615 if (size > 12)
16616 {
16617 /* We have no free low regs, so save one. */
16618 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
16619 LAST_ARG_REGNUM);
16620 }
16621
16622 /* Get the return address into a temporary register. */
16623 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
16624 1 << LAST_ARG_REGNUM);
16625
16626 if (size > 12)
16627 {
16628 /* Move the return address to lr. */
16629 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
16630 LAST_ARG_REGNUM);
16631 /* Restore the low register. */
16632 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
16633 IP_REGNUM);
16634 regno = LR_REGNUM;
16635 }
16636 else
16637 regno = LAST_ARG_REGNUM;
16638 }
16639 else
16640 regno = LR_REGNUM;
16641
16642 /* Remove the argument registers that were pushed onto the stack. */
16643 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
16644 SP_REGNUM, SP_REGNUM,
16645 current_function_pretend_args_size);
16646
16647 thumb_exit (asm_out_file, regno);
16648 }
16649
16650 return "";
16651 }
16652
16653 /* Functions to save and restore machine-specific function data. */
16654 static struct machine_function *
16655 arm_init_machine_status (void)
16656 {
16657 struct machine_function *machine;
16658 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
16659
16660 #if ARM_FT_UNKNOWN != 0
16661 machine->func_type = ARM_FT_UNKNOWN;
16662 #endif
16663 return machine;
16664 }
16665
16666 /* Return an RTX indicating where the return address to the
16667 calling function can be found. */
16668 rtx
16669 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
16670 {
16671 if (count != 0)
16672 return NULL_RTX;
16673
16674 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
16675 }
16676
16677 /* Do anything needed before RTL is emitted for each function. */
16678 void
16679 arm_init_expanders (void)
16680 {
16681 /* Arrange to initialize and mark the machine per-function status. */
16682 init_machine_status = arm_init_machine_status;
16683
16684 /* This is to stop the combine pass optimizing away the alignment
16685 adjustment of va_arg. */
16686 /* ??? It is claimed that this should not be necessary. */
16687 if (cfun)
16688 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
16689 }
16690
16691
16692 /* Like arm_compute_initial_elimination offset. Simpler because there
16693 isn't an ABI specified frame pointer for Thumb. Instead, we set it
16694 to point at the base of the local variables after static stack
16695 space for a function has been allocated. */
16696
16697 HOST_WIDE_INT
16698 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16699 {
16700 arm_stack_offsets *offsets;
16701
16702 offsets = arm_get_frame_offsets ();
16703
16704 switch (from)
16705 {
16706 case ARG_POINTER_REGNUM:
16707 switch (to)
16708 {
16709 case STACK_POINTER_REGNUM:
16710 return offsets->outgoing_args - offsets->saved_args;
16711
16712 case FRAME_POINTER_REGNUM:
16713 return offsets->soft_frame - offsets->saved_args;
16714
16715 case ARM_HARD_FRAME_POINTER_REGNUM:
16716 return offsets->saved_regs - offsets->saved_args;
16717
16718 case THUMB_HARD_FRAME_POINTER_REGNUM:
16719 return offsets->locals_base - offsets->saved_args;
16720
16721 default:
16722 gcc_unreachable ();
16723 }
16724 break;
16725
16726 case FRAME_POINTER_REGNUM:
16727 switch (to)
16728 {
16729 case STACK_POINTER_REGNUM:
16730 return offsets->outgoing_args - offsets->soft_frame;
16731
16732 case ARM_HARD_FRAME_POINTER_REGNUM:
16733 return offsets->saved_regs - offsets->soft_frame;
16734
16735 case THUMB_HARD_FRAME_POINTER_REGNUM:
16736 return offsets->locals_base - offsets->soft_frame;
16737
16738 default:
16739 gcc_unreachable ();
16740 }
16741 break;
16742
16743 default:
16744 gcc_unreachable ();
16745 }
16746 }
16747
16748 /* Generate the rest of a function's prologue. */
16749 void
16750 thumb1_expand_prologue (void)
16751 {
16752 rtx insn, dwarf;
16753
16754 HOST_WIDE_INT amount;
16755 arm_stack_offsets *offsets;
16756 unsigned long func_type;
16757 int regno;
16758 unsigned long live_regs_mask;
16759
16760 func_type = arm_current_func_type ();
16761
16762 /* Naked functions don't have prologues. */
16763 if (IS_NAKED (func_type))
16764 return;
16765
16766 if (IS_INTERRUPT (func_type))
16767 {
16768 error ("interrupt Service Routines cannot be coded in Thumb mode");
16769 return;
16770 }
16771
16772 live_regs_mask = thumb1_compute_save_reg_mask ();
16773 /* Load the pic register before setting the frame pointer,
16774 so we can use r7 as a temporary work register. */
16775 if (flag_pic && arm_pic_register != INVALID_REGNUM)
16776 arm_load_pic_register (live_regs_mask);
16777
16778 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
16779 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
16780 stack_pointer_rtx);
16781
16782 offsets = arm_get_frame_offsets ();
16783 amount = offsets->outgoing_args - offsets->saved_regs;
16784 if (amount)
16785 {
16786 if (amount < 512)
16787 {
16788 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16789 GEN_INT (- amount)));
16790 RTX_FRAME_RELATED_P (insn) = 1;
16791 }
16792 else
16793 {
16794 rtx reg;
16795
16796 /* The stack decrement is too big for an immediate value in a single
16797 insn. In theory we could issue multiple subtracts, but after
16798 three of them it becomes more space efficient to place the full
16799 value in the constant pool and load into a register. (Also the
16800 ARM debugger really likes to see only one stack decrement per
16801 function). So instead we look for a scratch register into which
16802 we can load the decrement, and then we subtract this from the
16803 stack pointer. Unfortunately on the thumb the only available
16804 scratch registers are the argument registers, and we cannot use
16805 these as they may hold arguments to the function. Instead we
16806 attempt to locate a call preserved register which is used by this
16807 function. If we can find one, then we know that it will have
16808 been pushed at the start of the prologue and so we can corrupt
16809 it now. */
16810 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
16811 if (live_regs_mask & (1 << regno)
16812 && !(frame_pointer_needed
16813 && (regno == THUMB_HARD_FRAME_POINTER_REGNUM)))
16814 break;
16815
16816 if (regno > LAST_LO_REGNUM) /* Very unlikely. */
16817 {
16818 rtx spare = gen_rtx_REG (SImode, IP_REGNUM);
16819
16820 /* Choose an arbitrary, non-argument low register. */
16821 reg = gen_rtx_REG (SImode, LAST_LO_REGNUM);
16822
16823 /* Save it by copying it into a high, scratch register. */
16824 emit_insn (gen_movsi (spare, reg));
16825 /* Add a USE to stop propagate_one_insn() from barfing. */
16826 emit_insn (gen_prologue_use (spare));
16827
16828 /* Decrement the stack. */
16829 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
16830 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
16831 stack_pointer_rtx, reg));
16832 RTX_FRAME_RELATED_P (insn) = 1;
16833 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16834 plus_constant (stack_pointer_rtx,
16835 -amount));
16836 RTX_FRAME_RELATED_P (dwarf) = 1;
16837 REG_NOTES (insn)
16838 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
16839 REG_NOTES (insn));
16840
16841 /* Restore the low register's original value. */
16842 emit_insn (gen_movsi (reg, spare));
16843
16844 /* Emit a USE of the restored scratch register, so that flow
16845 analysis will not consider the restore redundant. The
16846 register won't be used again in this function and isn't
16847 restored by the epilogue. */
16848 emit_insn (gen_prologue_use (reg));
16849 }
16850 else
16851 {
16852 reg = gen_rtx_REG (SImode, regno);
16853
16854 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
16855
16856 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
16857 stack_pointer_rtx, reg));
16858 RTX_FRAME_RELATED_P (insn) = 1;
16859 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16860 plus_constant (stack_pointer_rtx,
16861 -amount));
16862 RTX_FRAME_RELATED_P (dwarf) = 1;
16863 REG_NOTES (insn)
16864 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
16865 REG_NOTES (insn));
16866 }
16867 }
16868 }
16869
16870 if (frame_pointer_needed)
16871 thumb_set_frame_pointer (offsets);
16872
16873 /* If we are profiling, make sure no instructions are scheduled before
16874 the call to mcount. Similarly if the user has requested no
16875 scheduling in the prolog. Similarly if we want non-call exceptions
16876 using the EABI unwinder, to prevent faulting instructions from being
16877 swapped with a stack adjustment. */
16878 if (current_function_profile || !TARGET_SCHED_PROLOG
16879 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
16880 emit_insn (gen_blockage ());
16881
16882 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
16883 if (live_regs_mask & 0xff)
16884 cfun->machine->lr_save_eliminated = 0;
16885 }
16886
16887
16888 void
16889 thumb1_expand_epilogue (void)
16890 {
16891 HOST_WIDE_INT amount;
16892 arm_stack_offsets *offsets;
16893 int regno;
16894
16895 /* Naked functions don't have prologues. */
16896 if (IS_NAKED (arm_current_func_type ()))
16897 return;
16898
16899 offsets = arm_get_frame_offsets ();
16900 amount = offsets->outgoing_args - offsets->saved_regs;
16901
16902 if (frame_pointer_needed)
16903 {
16904 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
16905 amount = offsets->locals_base - offsets->saved_regs;
16906 }
16907
16908 gcc_assert (amount >= 0);
16909 if (amount)
16910 {
16911 if (amount < 512)
16912 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16913 GEN_INT (amount)));
16914 else
16915 {
16916 /* r3 is always free in the epilogue. */
16917 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
16918
16919 emit_insn (gen_movsi (reg, GEN_INT (amount)));
16920 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
16921 }
16922 }
16923
16924 /* Emit a USE (stack_pointer_rtx), so that
16925 the stack adjustment will not be deleted. */
16926 emit_insn (gen_prologue_use (stack_pointer_rtx));
16927
16928 if (current_function_profile || !TARGET_SCHED_PROLOG)
16929 emit_insn (gen_blockage ());
16930
16931 /* Emit a clobber for each insn that will be restored in the epilogue,
16932 so that flow2 will get register lifetimes correct. */
16933 for (regno = 0; regno < 13; regno++)
16934 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
16935 emit_insn (gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, regno)));
16936
16937 if (! df_regs_ever_live_p (LR_REGNUM))
16938 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, LR_REGNUM)));
16939 }
16940
16941 static void
16942 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
16943 {
16944 unsigned long live_regs_mask = 0;
16945 unsigned long l_mask;
16946 unsigned high_regs_pushed = 0;
16947 int cfa_offset = 0;
16948 int regno;
16949
16950 if (IS_NAKED (arm_current_func_type ()))
16951 return;
16952
16953 if (is_called_in_ARM_mode (current_function_decl))
16954 {
16955 const char * name;
16956
16957 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
16958 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
16959 == SYMBOL_REF);
16960 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
16961
16962 /* Generate code sequence to switch us into Thumb mode. */
16963 /* The .code 32 directive has already been emitted by
16964 ASM_DECLARE_FUNCTION_NAME. */
16965 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
16966 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
16967
16968 /* Generate a label, so that the debugger will notice the
16969 change in instruction sets. This label is also used by
16970 the assembler to bypass the ARM code when this function
16971 is called from a Thumb encoded function elsewhere in the
16972 same file. Hence the definition of STUB_NAME here must
16973 agree with the definition in gas/config/tc-arm.c. */
16974
16975 #define STUB_NAME ".real_start_of"
16976
16977 fprintf (f, "\t.code\t16\n");
16978 #ifdef ARM_PE
16979 if (arm_dllexport_name_p (name))
16980 name = arm_strip_name_encoding (name);
16981 #endif
16982 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
16983 fprintf (f, "\t.thumb_func\n");
16984 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
16985 }
16986
16987 if (current_function_pretend_args_size)
16988 {
16989 /* Output unwind directive for the stack adjustment. */
16990 if (ARM_EABI_UNWIND_TABLES)
16991 fprintf (f, "\t.pad #%d\n",
16992 current_function_pretend_args_size);
16993
16994 if (cfun->machine->uses_anonymous_args)
16995 {
16996 int num_pushes;
16997
16998 fprintf (f, "\tpush\t{");
16999
17000 num_pushes = ARM_NUM_INTS (current_function_pretend_args_size);
17001
17002 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
17003 regno <= LAST_ARG_REGNUM;
17004 regno++)
17005 asm_fprintf (f, "%r%s", regno,
17006 regno == LAST_ARG_REGNUM ? "" : ", ");
17007
17008 fprintf (f, "}\n");
17009 }
17010 else
17011 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
17012 SP_REGNUM, SP_REGNUM,
17013 current_function_pretend_args_size);
17014
17015 /* We don't need to record the stores for unwinding (would it
17016 help the debugger any if we did?), but record the change in
17017 the stack pointer. */
17018 if (dwarf2out_do_frame ())
17019 {
17020 char *l = dwarf2out_cfi_label ();
17021
17022 cfa_offset = cfa_offset + current_function_pretend_args_size;
17023 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
17024 }
17025 }
17026
17027 /* Get the registers we are going to push. */
17028 live_regs_mask = thumb1_compute_save_reg_mask ();
17029 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
17030 l_mask = live_regs_mask & 0x40ff;
17031 /* Then count how many other high registers will need to be pushed. */
17032 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
17033
17034 if (TARGET_BACKTRACE)
17035 {
17036 unsigned offset;
17037 unsigned work_register;
17038
17039 /* We have been asked to create a stack backtrace structure.
17040 The code looks like this:
17041
17042 0 .align 2
17043 0 func:
17044 0 sub SP, #16 Reserve space for 4 registers.
17045 2 push {R7} Push low registers.
17046 4 add R7, SP, #20 Get the stack pointer before the push.
17047 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
17048 8 mov R7, PC Get hold of the start of this code plus 12.
17049 10 str R7, [SP, #16] Store it.
17050 12 mov R7, FP Get hold of the current frame pointer.
17051 14 str R7, [SP, #4] Store it.
17052 16 mov R7, LR Get hold of the current return address.
17053 18 str R7, [SP, #12] Store it.
17054 20 add R7, SP, #16 Point at the start of the backtrace structure.
17055 22 mov FP, R7 Put this value into the frame pointer. */
17056
17057 work_register = thumb_find_work_register (live_regs_mask);
17058
17059 if (ARM_EABI_UNWIND_TABLES)
17060 asm_fprintf (f, "\t.pad #16\n");
17061
17062 asm_fprintf
17063 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
17064 SP_REGNUM, SP_REGNUM);
17065
17066 if (dwarf2out_do_frame ())
17067 {
17068 char *l = dwarf2out_cfi_label ();
17069
17070 cfa_offset = cfa_offset + 16;
17071 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
17072 }
17073
17074 if (l_mask)
17075 {
17076 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17077 offset = bit_count (l_mask) * UNITS_PER_WORD;
17078 }
17079 else
17080 offset = 0;
17081
17082 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17083 offset + 16 + current_function_pretend_args_size);
17084
17085 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17086 offset + 4);
17087
17088 /* Make sure that the instruction fetching the PC is in the right place
17089 to calculate "start of backtrace creation code + 12". */
17090 if (l_mask)
17091 {
17092 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17093 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17094 offset + 12);
17095 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17096 ARM_HARD_FRAME_POINTER_REGNUM);
17097 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17098 offset);
17099 }
17100 else
17101 {
17102 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17103 ARM_HARD_FRAME_POINTER_REGNUM);
17104 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17105 offset);
17106 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17107 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17108 offset + 12);
17109 }
17110
17111 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
17112 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17113 offset + 8);
17114 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17115 offset + 12);
17116 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
17117 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
17118 }
17119 /* Optimization: If we are not pushing any low registers but we are going
17120 to push some high registers then delay our first push. This will just
17121 be a push of LR and we can combine it with the push of the first high
17122 register. */
17123 else if ((l_mask & 0xff) != 0
17124 || (high_regs_pushed == 0 && l_mask))
17125 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17126
17127 if (high_regs_pushed)
17128 {
17129 unsigned pushable_regs;
17130 unsigned next_hi_reg;
17131
17132 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
17133 if (live_regs_mask & (1 << next_hi_reg))
17134 break;
17135
17136 pushable_regs = l_mask & 0xff;
17137
17138 if (pushable_regs == 0)
17139 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
17140
17141 while (high_regs_pushed > 0)
17142 {
17143 unsigned long real_regs_mask = 0;
17144
17145 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
17146 {
17147 if (pushable_regs & (1 << regno))
17148 {
17149 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
17150
17151 high_regs_pushed --;
17152 real_regs_mask |= (1 << next_hi_reg);
17153
17154 if (high_regs_pushed)
17155 {
17156 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
17157 next_hi_reg --)
17158 if (live_regs_mask & (1 << next_hi_reg))
17159 break;
17160 }
17161 else
17162 {
17163 pushable_regs &= ~((1 << regno) - 1);
17164 break;
17165 }
17166 }
17167 }
17168
17169 /* If we had to find a work register and we have not yet
17170 saved the LR then add it to the list of regs to push. */
17171 if (l_mask == (1 << LR_REGNUM))
17172 {
17173 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
17174 1, &cfa_offset,
17175 real_regs_mask | (1 << LR_REGNUM));
17176 l_mask = 0;
17177 }
17178 else
17179 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
17180 }
17181 }
17182 }
17183
17184 /* Handle the case of a double word load into a low register from
17185 a computed memory address. The computed address may involve a
17186 register which is overwritten by the load. */
17187 const char *
17188 thumb_load_double_from_address (rtx *operands)
17189 {
17190 rtx addr;
17191 rtx base;
17192 rtx offset;
17193 rtx arg1;
17194 rtx arg2;
17195
17196 gcc_assert (GET_CODE (operands[0]) == REG);
17197 gcc_assert (GET_CODE (operands[1]) == MEM);
17198
17199 /* Get the memory address. */
17200 addr = XEXP (operands[1], 0);
17201
17202 /* Work out how the memory address is computed. */
17203 switch (GET_CODE (addr))
17204 {
17205 case REG:
17206 operands[2] = adjust_address (operands[1], SImode, 4);
17207
17208 if (REGNO (operands[0]) == REGNO (addr))
17209 {
17210 output_asm_insn ("ldr\t%H0, %2", operands);
17211 output_asm_insn ("ldr\t%0, %1", operands);
17212 }
17213 else
17214 {
17215 output_asm_insn ("ldr\t%0, %1", operands);
17216 output_asm_insn ("ldr\t%H0, %2", operands);
17217 }
17218 break;
17219
17220 case CONST:
17221 /* Compute <address> + 4 for the high order load. */
17222 operands[2] = adjust_address (operands[1], SImode, 4);
17223
17224 output_asm_insn ("ldr\t%0, %1", operands);
17225 output_asm_insn ("ldr\t%H0, %2", operands);
17226 break;
17227
17228 case PLUS:
17229 arg1 = XEXP (addr, 0);
17230 arg2 = XEXP (addr, 1);
17231
17232 if (CONSTANT_P (arg1))
17233 base = arg2, offset = arg1;
17234 else
17235 base = arg1, offset = arg2;
17236
17237 gcc_assert (GET_CODE (base) == REG);
17238
17239 /* Catch the case of <address> = <reg> + <reg> */
17240 if (GET_CODE (offset) == REG)
17241 {
17242 int reg_offset = REGNO (offset);
17243 int reg_base = REGNO (base);
17244 int reg_dest = REGNO (operands[0]);
17245
17246 /* Add the base and offset registers together into the
17247 higher destination register. */
17248 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
17249 reg_dest + 1, reg_base, reg_offset);
17250
17251 /* Load the lower destination register from the address in
17252 the higher destination register. */
17253 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
17254 reg_dest, reg_dest + 1);
17255
17256 /* Load the higher destination register from its own address
17257 plus 4. */
17258 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
17259 reg_dest + 1, reg_dest + 1);
17260 }
17261 else
17262 {
17263 /* Compute <address> + 4 for the high order load. */
17264 operands[2] = adjust_address (operands[1], SImode, 4);
17265
17266 /* If the computed address is held in the low order register
17267 then load the high order register first, otherwise always
17268 load the low order register first. */
17269 if (REGNO (operands[0]) == REGNO (base))
17270 {
17271 output_asm_insn ("ldr\t%H0, %2", operands);
17272 output_asm_insn ("ldr\t%0, %1", operands);
17273 }
17274 else
17275 {
17276 output_asm_insn ("ldr\t%0, %1", operands);
17277 output_asm_insn ("ldr\t%H0, %2", operands);
17278 }
17279 }
17280 break;
17281
17282 case LABEL_REF:
17283 /* With no registers to worry about we can just load the value
17284 directly. */
17285 operands[2] = adjust_address (operands[1], SImode, 4);
17286
17287 output_asm_insn ("ldr\t%H0, %2", operands);
17288 output_asm_insn ("ldr\t%0, %1", operands);
17289 break;
17290
17291 default:
17292 gcc_unreachable ();
17293 }
17294
17295 return "";
17296 }
17297
17298 const char *
17299 thumb_output_move_mem_multiple (int n, rtx *operands)
17300 {
17301 rtx tmp;
17302
17303 switch (n)
17304 {
17305 case 2:
17306 if (REGNO (operands[4]) > REGNO (operands[5]))
17307 {
17308 tmp = operands[4];
17309 operands[4] = operands[5];
17310 operands[5] = tmp;
17311 }
17312 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
17313 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
17314 break;
17315
17316 case 3:
17317 if (REGNO (operands[4]) > REGNO (operands[5]))
17318 {
17319 tmp = operands[4];
17320 operands[4] = operands[5];
17321 operands[5] = tmp;
17322 }
17323 if (REGNO (operands[5]) > REGNO (operands[6]))
17324 {
17325 tmp = operands[5];
17326 operands[5] = operands[6];
17327 operands[6] = tmp;
17328 }
17329 if (REGNO (operands[4]) > REGNO (operands[5]))
17330 {
17331 tmp = operands[4];
17332 operands[4] = operands[5];
17333 operands[5] = tmp;
17334 }
17335
17336 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
17337 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
17338 break;
17339
17340 default:
17341 gcc_unreachable ();
17342 }
17343
17344 return "";
17345 }
17346
17347 /* Output a call-via instruction for thumb state. */
17348 const char *
17349 thumb_call_via_reg (rtx reg)
17350 {
17351 int regno = REGNO (reg);
17352 rtx *labelp;
17353
17354 gcc_assert (regno < LR_REGNUM);
17355
17356 /* If we are in the normal text section we can use a single instance
17357 per compilation unit. If we are doing function sections, then we need
17358 an entry per section, since we can't rely on reachability. */
17359 if (in_section == text_section)
17360 {
17361 thumb_call_reg_needed = 1;
17362
17363 if (thumb_call_via_label[regno] == NULL)
17364 thumb_call_via_label[regno] = gen_label_rtx ();
17365 labelp = thumb_call_via_label + regno;
17366 }
17367 else
17368 {
17369 if (cfun->machine->call_via[regno] == NULL)
17370 cfun->machine->call_via[regno] = gen_label_rtx ();
17371 labelp = cfun->machine->call_via + regno;
17372 }
17373
17374 output_asm_insn ("bl\t%a0", labelp);
17375 return "";
17376 }
17377
17378 /* Routines for generating rtl. */
17379 void
17380 thumb_expand_movmemqi (rtx *operands)
17381 {
17382 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
17383 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
17384 HOST_WIDE_INT len = INTVAL (operands[2]);
17385 HOST_WIDE_INT offset = 0;
17386
17387 while (len >= 12)
17388 {
17389 emit_insn (gen_movmem12b (out, in, out, in));
17390 len -= 12;
17391 }
17392
17393 if (len >= 8)
17394 {
17395 emit_insn (gen_movmem8b (out, in, out, in));
17396 len -= 8;
17397 }
17398
17399 if (len >= 4)
17400 {
17401 rtx reg = gen_reg_rtx (SImode);
17402 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
17403 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
17404 len -= 4;
17405 offset += 4;
17406 }
17407
17408 if (len >= 2)
17409 {
17410 rtx reg = gen_reg_rtx (HImode);
17411 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
17412 plus_constant (in, offset))));
17413 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
17414 reg));
17415 len -= 2;
17416 offset += 2;
17417 }
17418
17419 if (len)
17420 {
17421 rtx reg = gen_reg_rtx (QImode);
17422 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
17423 plus_constant (in, offset))));
17424 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
17425 reg));
17426 }
17427 }
17428
17429 void
17430 thumb_reload_out_hi (rtx *operands)
17431 {
17432 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
17433 }
17434
17435 /* Handle reading a half-word from memory during reload. */
17436 void
17437 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
17438 {
17439 gcc_unreachable ();
17440 }
17441
17442 /* Return the length of a function name prefix
17443 that starts with the character 'c'. */
17444 static int
17445 arm_get_strip_length (int c)
17446 {
17447 switch (c)
17448 {
17449 ARM_NAME_ENCODING_LENGTHS
17450 default: return 0;
17451 }
17452 }
17453
17454 /* Return a pointer to a function's name with any
17455 and all prefix encodings stripped from it. */
17456 const char *
17457 arm_strip_name_encoding (const char *name)
17458 {
17459 int skip;
17460
17461 while ((skip = arm_get_strip_length (* name)))
17462 name += skip;
17463
17464 return name;
17465 }
17466
17467 /* If there is a '*' anywhere in the name's prefix, then
17468 emit the stripped name verbatim, otherwise prepend an
17469 underscore if leading underscores are being used. */
17470 void
17471 arm_asm_output_labelref (FILE *stream, const char *name)
17472 {
17473 int skip;
17474 int verbatim = 0;
17475
17476 while ((skip = arm_get_strip_length (* name)))
17477 {
17478 verbatim |= (*name == '*');
17479 name += skip;
17480 }
17481
17482 if (verbatim)
17483 fputs (name, stream);
17484 else
17485 asm_fprintf (stream, "%U%s", name);
17486 }
17487
17488 static void
17489 arm_file_start (void)
17490 {
17491 int val;
17492
17493 if (TARGET_UNIFIED_ASM)
17494 asm_fprintf (asm_out_file, "\t.syntax unified\n");
17495
17496 if (TARGET_BPABI)
17497 {
17498 const char *fpu_name;
17499 if (arm_select[0].string)
17500 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
17501 else if (arm_select[1].string)
17502 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
17503 else
17504 asm_fprintf (asm_out_file, "\t.cpu %s\n",
17505 all_cores[arm_default_cpu].name);
17506
17507 if (TARGET_SOFT_FLOAT)
17508 {
17509 if (TARGET_VFP)
17510 fpu_name = "softvfp";
17511 else
17512 fpu_name = "softfpa";
17513 }
17514 else
17515 {
17516 int set_float_abi_attributes = 0;
17517 switch (arm_fpu_arch)
17518 {
17519 case FPUTYPE_FPA:
17520 fpu_name = "fpa";
17521 break;
17522 case FPUTYPE_FPA_EMU2:
17523 fpu_name = "fpe2";
17524 break;
17525 case FPUTYPE_FPA_EMU3:
17526 fpu_name = "fpe3";
17527 break;
17528 case FPUTYPE_MAVERICK:
17529 fpu_name = "maverick";
17530 break;
17531 case FPUTYPE_VFP:
17532 fpu_name = "vfp";
17533 set_float_abi_attributes = 1;
17534 break;
17535 case FPUTYPE_VFP3:
17536 fpu_name = "vfp3";
17537 set_float_abi_attributes = 1;
17538 break;
17539 case FPUTYPE_NEON:
17540 fpu_name = "neon";
17541 set_float_abi_attributes = 1;
17542 break;
17543 default:
17544 abort();
17545 }
17546 if (set_float_abi_attributes)
17547 {
17548 if (TARGET_HARD_FLOAT)
17549 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
17550 if (TARGET_HARD_FLOAT_ABI)
17551 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
17552 }
17553 }
17554 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
17555
17556 /* Some of these attributes only apply when the corresponding features
17557 are used. However we don't have any easy way of figuring this out.
17558 Conservatively record the setting that would have been used. */
17559
17560 /* Tag_ABI_PCS_wchar_t. */
17561 asm_fprintf (asm_out_file, "\t.eabi_attribute 18, %d\n",
17562 (int)WCHAR_TYPE_SIZE / BITS_PER_UNIT);
17563
17564 /* Tag_ABI_FP_rounding. */
17565 if (flag_rounding_math)
17566 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
17567 if (!flag_unsafe_math_optimizations)
17568 {
17569 /* Tag_ABI_FP_denomal. */
17570 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
17571 /* Tag_ABI_FP_exceptions. */
17572 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
17573 }
17574 /* Tag_ABI_FP_user_exceptions. */
17575 if (flag_signaling_nans)
17576 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
17577 /* Tag_ABI_FP_number_model. */
17578 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
17579 flag_finite_math_only ? 1 : 3);
17580
17581 /* Tag_ABI_align8_needed. */
17582 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
17583 /* Tag_ABI_align8_preserved. */
17584 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
17585 /* Tag_ABI_enum_size. */
17586 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
17587 flag_short_enums ? 1 : 2);
17588
17589 /* Tag_ABI_optimization_goals. */
17590 if (optimize_size)
17591 val = 4;
17592 else if (optimize >= 2)
17593 val = 2;
17594 else if (optimize)
17595 val = 1;
17596 else
17597 val = 6;
17598 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
17599 }
17600 default_file_start();
17601 }
17602
17603 static void
17604 arm_file_end (void)
17605 {
17606 int regno;
17607
17608 if (NEED_INDICATE_EXEC_STACK)
17609 /* Add .note.GNU-stack. */
17610 file_end_indicate_exec_stack ();
17611
17612 if (! thumb_call_reg_needed)
17613 return;
17614
17615 switch_to_section (text_section);
17616 asm_fprintf (asm_out_file, "\t.code 16\n");
17617 ASM_OUTPUT_ALIGN (asm_out_file, 1);
17618
17619 for (regno = 0; regno < LR_REGNUM; regno++)
17620 {
17621 rtx label = thumb_call_via_label[regno];
17622
17623 if (label != 0)
17624 {
17625 targetm.asm_out.internal_label (asm_out_file, "L",
17626 CODE_LABEL_NUMBER (label));
17627 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
17628 }
17629 }
17630 }
17631
17632 rtx aof_pic_label;
17633
17634 #ifdef AOF_ASSEMBLER
17635 /* Special functions only needed when producing AOF syntax assembler. */
17636
17637 struct pic_chain
17638 {
17639 struct pic_chain * next;
17640 const char * symname;
17641 };
17642
17643 static struct pic_chain * aof_pic_chain = NULL;
17644
17645 rtx
17646 aof_pic_entry (rtx x)
17647 {
17648 struct pic_chain ** chainp;
17649 int offset;
17650
17651 if (aof_pic_label == NULL_RTX)
17652 {
17653 aof_pic_label = gen_rtx_SYMBOL_REF (Pmode, "x$adcons");
17654 }
17655
17656 for (offset = 0, chainp = &aof_pic_chain; *chainp;
17657 offset += 4, chainp = &(*chainp)->next)
17658 if ((*chainp)->symname == XSTR (x, 0))
17659 return plus_constant (aof_pic_label, offset);
17660
17661 *chainp = (struct pic_chain *) xmalloc (sizeof (struct pic_chain));
17662 (*chainp)->next = NULL;
17663 (*chainp)->symname = XSTR (x, 0);
17664 return plus_constant (aof_pic_label, offset);
17665 }
17666
17667 void
17668 aof_dump_pic_table (FILE *f)
17669 {
17670 struct pic_chain * chain;
17671
17672 if (aof_pic_chain == NULL)
17673 return;
17674
17675 asm_fprintf (f, "\tAREA |%r$$adcons|, BASED %r\n",
17676 PIC_OFFSET_TABLE_REGNUM,
17677 PIC_OFFSET_TABLE_REGNUM);
17678 fputs ("|x$adcons|\n", f);
17679
17680 for (chain = aof_pic_chain; chain; chain = chain->next)
17681 {
17682 fputs ("\tDCD\t", f);
17683 assemble_name (f, chain->symname);
17684 fputs ("\n", f);
17685 }
17686 }
17687
17688 int arm_text_section_count = 1;
17689
17690 /* A get_unnamed_section callback for switching to the text section. */
17691
17692 static void
17693 aof_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
17694 {
17695 fprintf (asm_out_file, "\tAREA |C$$code%d|, CODE, READONLY",
17696 arm_text_section_count++);
17697 if (flag_pic)
17698 fprintf (asm_out_file, ", PIC, REENTRANT");
17699 fprintf (asm_out_file, "\n");
17700 }
17701
17702 static int arm_data_section_count = 1;
17703
17704 /* A get_unnamed_section callback for switching to the data section. */
17705
17706 static void
17707 aof_output_data_section_asm_op (const void *data ATTRIBUTE_UNUSED)
17708 {
17709 fprintf (asm_out_file, "\tAREA |C$$data%d|, DATA\n",
17710 arm_data_section_count++);
17711 }
17712
17713 /* Implement TARGET_ASM_INIT_SECTIONS.
17714
17715 AOF Assembler syntax is a nightmare when it comes to areas, since once
17716 we change from one area to another, we can't go back again. Instead,
17717 we must create a new area with the same attributes and add the new output
17718 to that. Unfortunately, there is nothing we can do here to guarantee that
17719 two areas with the same attributes will be linked adjacently in the
17720 resulting executable, so we have to be careful not to do pc-relative
17721 addressing across such boundaries. */
17722
17723 static void
17724 aof_asm_init_sections (void)
17725 {
17726 text_section = get_unnamed_section (SECTION_CODE,
17727 aof_output_text_section_asm_op, NULL);
17728 data_section = get_unnamed_section (SECTION_WRITE,
17729 aof_output_data_section_asm_op, NULL);
17730 readonly_data_section = text_section;
17731 }
17732
17733 void
17734 zero_init_section (void)
17735 {
17736 static int zero_init_count = 1;
17737
17738 fprintf (asm_out_file, "\tAREA |C$$zidata%d|,NOINIT\n", zero_init_count++);
17739 in_section = NULL;
17740 }
17741
17742 /* The AOF assembler is religiously strict about declarations of
17743 imported and exported symbols, so that it is impossible to declare
17744 a function as imported near the beginning of the file, and then to
17745 export it later on. It is, however, possible to delay the decision
17746 until all the functions in the file have been compiled. To get
17747 around this, we maintain a list of the imports and exports, and
17748 delete from it any that are subsequently defined. At the end of
17749 compilation we spit the remainder of the list out before the END
17750 directive. */
17751
17752 struct import
17753 {
17754 struct import * next;
17755 const char * name;
17756 };
17757
17758 static struct import * imports_list = NULL;
17759
17760 void
17761 aof_add_import (const char *name)
17762 {
17763 struct import * new;
17764
17765 for (new = imports_list; new; new = new->next)
17766 if (new->name == name)
17767 return;
17768
17769 new = (struct import *) xmalloc (sizeof (struct import));
17770 new->next = imports_list;
17771 imports_list = new;
17772 new->name = name;
17773 }
17774
17775 void
17776 aof_delete_import (const char *name)
17777 {
17778 struct import ** old;
17779
17780 for (old = &imports_list; *old; old = & (*old)->next)
17781 {
17782 if ((*old)->name == name)
17783 {
17784 *old = (*old)->next;
17785 return;
17786 }
17787 }
17788 }
17789
17790 int arm_main_function = 0;
17791
17792 static void
17793 aof_dump_imports (FILE *f)
17794 {
17795 /* The AOF assembler needs this to cause the startup code to be extracted
17796 from the library. Brining in __main causes the whole thing to work
17797 automagically. */
17798 if (arm_main_function)
17799 {
17800 switch_to_section (text_section);
17801 fputs ("\tIMPORT __main\n", f);
17802 fputs ("\tDCD __main\n", f);
17803 }
17804
17805 /* Now dump the remaining imports. */
17806 while (imports_list)
17807 {
17808 fprintf (f, "\tIMPORT\t");
17809 assemble_name (f, imports_list->name);
17810 fputc ('\n', f);
17811 imports_list = imports_list->next;
17812 }
17813 }
17814
17815 static void
17816 aof_globalize_label (FILE *stream, const char *name)
17817 {
17818 default_globalize_label (stream, name);
17819 if (! strcmp (name, "main"))
17820 arm_main_function = 1;
17821 }
17822
17823 static void
17824 aof_file_start (void)
17825 {
17826 fputs ("__r0\tRN\t0\n", asm_out_file);
17827 fputs ("__a1\tRN\t0\n", asm_out_file);
17828 fputs ("__a2\tRN\t1\n", asm_out_file);
17829 fputs ("__a3\tRN\t2\n", asm_out_file);
17830 fputs ("__a4\tRN\t3\n", asm_out_file);
17831 fputs ("__v1\tRN\t4\n", asm_out_file);
17832 fputs ("__v2\tRN\t5\n", asm_out_file);
17833 fputs ("__v3\tRN\t6\n", asm_out_file);
17834 fputs ("__v4\tRN\t7\n", asm_out_file);
17835 fputs ("__v5\tRN\t8\n", asm_out_file);
17836 fputs ("__v6\tRN\t9\n", asm_out_file);
17837 fputs ("__sl\tRN\t10\n", asm_out_file);
17838 fputs ("__fp\tRN\t11\n", asm_out_file);
17839 fputs ("__ip\tRN\t12\n", asm_out_file);
17840 fputs ("__sp\tRN\t13\n", asm_out_file);
17841 fputs ("__lr\tRN\t14\n", asm_out_file);
17842 fputs ("__pc\tRN\t15\n", asm_out_file);
17843 fputs ("__f0\tFN\t0\n", asm_out_file);
17844 fputs ("__f1\tFN\t1\n", asm_out_file);
17845 fputs ("__f2\tFN\t2\n", asm_out_file);
17846 fputs ("__f3\tFN\t3\n", asm_out_file);
17847 fputs ("__f4\tFN\t4\n", asm_out_file);
17848 fputs ("__f5\tFN\t5\n", asm_out_file);
17849 fputs ("__f6\tFN\t6\n", asm_out_file);
17850 fputs ("__f7\tFN\t7\n", asm_out_file);
17851 switch_to_section (text_section);
17852 }
17853
17854 static void
17855 aof_file_end (void)
17856 {
17857 if (flag_pic)
17858 aof_dump_pic_table (asm_out_file);
17859 arm_file_end ();
17860 aof_dump_imports (asm_out_file);
17861 fputs ("\tEND\n", asm_out_file);
17862 }
17863 #endif /* AOF_ASSEMBLER */
17864
17865 #ifndef ARM_PE
17866 /* Symbols in the text segment can be accessed without indirecting via the
17867 constant pool; it may take an extra binary operation, but this is still
17868 faster than indirecting via memory. Don't do this when not optimizing,
17869 since we won't be calculating al of the offsets necessary to do this
17870 simplification. */
17871
17872 static void
17873 arm_encode_section_info (tree decl, rtx rtl, int first)
17874 {
17875 /* This doesn't work with AOF syntax, since the string table may be in
17876 a different AREA. */
17877 #ifndef AOF_ASSEMBLER
17878 if (optimize > 0 && TREE_CONSTANT (decl))
17879 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
17880 #endif
17881
17882 default_encode_section_info (decl, rtl, first);
17883 }
17884 #endif /* !ARM_PE */
17885
17886 static void
17887 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
17888 {
17889 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
17890 && !strcmp (prefix, "L"))
17891 {
17892 arm_ccfsm_state = 0;
17893 arm_target_insn = NULL;
17894 }
17895 default_internal_label (stream, prefix, labelno);
17896 }
17897
17898 /* Output code to add DELTA to the first argument, and then jump
17899 to FUNCTION. Used for C++ multiple inheritance. */
17900 static void
17901 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
17902 HOST_WIDE_INT delta,
17903 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
17904 tree function)
17905 {
17906 static int thunk_label = 0;
17907 char label[256];
17908 char labelpc[256];
17909 int mi_delta = delta;
17910 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
17911 int shift = 0;
17912 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
17913 ? 1 : 0);
17914 if (mi_delta < 0)
17915 mi_delta = - mi_delta;
17916 /* When generating 16-bit thumb code, thunks are entered in arm mode. */
17917 if (TARGET_THUMB1)
17918 {
17919 int labelno = thunk_label++;
17920 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
17921 fputs ("\tldr\tr12, ", file);
17922 assemble_name (file, label);
17923 fputc ('\n', file);
17924 if (flag_pic)
17925 {
17926 /* If we are generating PIC, the ldr instruction below loads
17927 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
17928 the address of the add + 8, so we have:
17929
17930 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
17931 = target + 1.
17932
17933 Note that we have "+ 1" because some versions of GNU ld
17934 don't set the low bit of the result for R_ARM_REL32
17935 relocations against thumb function symbols. */
17936 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
17937 assemble_name (file, labelpc);
17938 fputs (":\n", file);
17939 fputs ("\tadd\tr12, pc, r12\n", file);
17940 }
17941 }
17942 /* TODO: Use movw/movt for large constants when available. */
17943 while (mi_delta != 0)
17944 {
17945 if ((mi_delta & (3 << shift)) == 0)
17946 shift += 2;
17947 else
17948 {
17949 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
17950 mi_op, this_regno, this_regno,
17951 mi_delta & (0xff << shift));
17952 mi_delta &= ~(0xff << shift);
17953 shift += 8;
17954 }
17955 }
17956 if (TARGET_THUMB1)
17957 {
17958 fprintf (file, "\tbx\tr12\n");
17959 ASM_OUTPUT_ALIGN (file, 2);
17960 assemble_name (file, label);
17961 fputs (":\n", file);
17962 if (flag_pic)
17963 {
17964 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
17965 rtx tem = XEXP (DECL_RTL (function), 0);
17966 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
17967 tem = gen_rtx_MINUS (GET_MODE (tem),
17968 tem,
17969 gen_rtx_SYMBOL_REF (Pmode,
17970 ggc_strdup (labelpc)));
17971 assemble_integer (tem, 4, BITS_PER_WORD, 1);
17972 }
17973 else
17974 /* Output ".word .LTHUNKn". */
17975 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
17976 }
17977 else
17978 {
17979 fputs ("\tb\t", file);
17980 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
17981 if (NEED_PLT_RELOC)
17982 fputs ("(PLT)", file);
17983 fputc ('\n', file);
17984 }
17985 }
17986
17987 int
17988 arm_emit_vector_const (FILE *file, rtx x)
17989 {
17990 int i;
17991 const char * pattern;
17992
17993 gcc_assert (GET_CODE (x) == CONST_VECTOR);
17994
17995 switch (GET_MODE (x))
17996 {
17997 case V2SImode: pattern = "%08x"; break;
17998 case V4HImode: pattern = "%04x"; break;
17999 case V8QImode: pattern = "%02x"; break;
18000 default: gcc_unreachable ();
18001 }
18002
18003 fprintf (file, "0x");
18004 for (i = CONST_VECTOR_NUNITS (x); i--;)
18005 {
18006 rtx element;
18007
18008 element = CONST_VECTOR_ELT (x, i);
18009 fprintf (file, pattern, INTVAL (element));
18010 }
18011
18012 return 1;
18013 }
18014
18015 const char *
18016 arm_output_load_gr (rtx *operands)
18017 {
18018 rtx reg;
18019 rtx offset;
18020 rtx wcgr;
18021 rtx sum;
18022
18023 if (GET_CODE (operands [1]) != MEM
18024 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
18025 || GET_CODE (reg = XEXP (sum, 0)) != REG
18026 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
18027 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
18028 return "wldrw%?\t%0, %1";
18029
18030 /* Fix up an out-of-range load of a GR register. */
18031 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
18032 wcgr = operands[0];
18033 operands[0] = reg;
18034 output_asm_insn ("ldr%?\t%0, %1", operands);
18035
18036 operands[0] = wcgr;
18037 operands[1] = reg;
18038 output_asm_insn ("tmcr%?\t%0, %1", operands);
18039 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
18040
18041 return "";
18042 }
18043
18044 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
18045
18046 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
18047 named arg and all anonymous args onto the stack.
18048 XXX I know the prologue shouldn't be pushing registers, but it is faster
18049 that way. */
18050
18051 static void
18052 arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
18053 enum machine_mode mode ATTRIBUTE_UNUSED,
18054 tree type ATTRIBUTE_UNUSED,
18055 int *pretend_size,
18056 int second_time ATTRIBUTE_UNUSED)
18057 {
18058 cfun->machine->uses_anonymous_args = 1;
18059 if (cum->nregs < NUM_ARG_REGS)
18060 *pretend_size = (NUM_ARG_REGS - cum->nregs) * UNITS_PER_WORD;
18061 }
18062
18063 /* Return nonzero if the CONSUMER instruction (a store) does not need
18064 PRODUCER's value to calculate the address. */
18065
18066 int
18067 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
18068 {
18069 rtx value = PATTERN (producer);
18070 rtx addr = PATTERN (consumer);
18071
18072 if (GET_CODE (value) == COND_EXEC)
18073 value = COND_EXEC_CODE (value);
18074 if (GET_CODE (value) == PARALLEL)
18075 value = XVECEXP (value, 0, 0);
18076 value = XEXP (value, 0);
18077 if (GET_CODE (addr) == COND_EXEC)
18078 addr = COND_EXEC_CODE (addr);
18079 if (GET_CODE (addr) == PARALLEL)
18080 addr = XVECEXP (addr, 0, 0);
18081 addr = XEXP (addr, 0);
18082
18083 return !reg_overlap_mentioned_p (value, addr);
18084 }
18085
18086 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
18087 have an early register shift value or amount dependency on the
18088 result of PRODUCER. */
18089
18090 int
18091 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
18092 {
18093 rtx value = PATTERN (producer);
18094 rtx op = PATTERN (consumer);
18095 rtx early_op;
18096
18097 if (GET_CODE (value) == COND_EXEC)
18098 value = COND_EXEC_CODE (value);
18099 if (GET_CODE (value) == PARALLEL)
18100 value = XVECEXP (value, 0, 0);
18101 value = XEXP (value, 0);
18102 if (GET_CODE (op) == COND_EXEC)
18103 op = COND_EXEC_CODE (op);
18104 if (GET_CODE (op) == PARALLEL)
18105 op = XVECEXP (op, 0, 0);
18106 op = XEXP (op, 1);
18107
18108 early_op = XEXP (op, 0);
18109 /* This is either an actual independent shift, or a shift applied to
18110 the first operand of another operation. We want the whole shift
18111 operation. */
18112 if (GET_CODE (early_op) == REG)
18113 early_op = op;
18114
18115 return !reg_overlap_mentioned_p (value, early_op);
18116 }
18117
18118 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
18119 have an early register shift value dependency on the result of
18120 PRODUCER. */
18121
18122 int
18123 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
18124 {
18125 rtx value = PATTERN (producer);
18126 rtx op = PATTERN (consumer);
18127 rtx early_op;
18128
18129 if (GET_CODE (value) == COND_EXEC)
18130 value = COND_EXEC_CODE (value);
18131 if (GET_CODE (value) == PARALLEL)
18132 value = XVECEXP (value, 0, 0);
18133 value = XEXP (value, 0);
18134 if (GET_CODE (op) == COND_EXEC)
18135 op = COND_EXEC_CODE (op);
18136 if (GET_CODE (op) == PARALLEL)
18137 op = XVECEXP (op, 0, 0);
18138 op = XEXP (op, 1);
18139
18140 early_op = XEXP (op, 0);
18141
18142 /* This is either an actual independent shift, or a shift applied to
18143 the first operand of another operation. We want the value being
18144 shifted, in either case. */
18145 if (GET_CODE (early_op) != REG)
18146 early_op = XEXP (early_op, 0);
18147
18148 return !reg_overlap_mentioned_p (value, early_op);
18149 }
18150
18151 /* Return nonzero if the CONSUMER (a mul or mac op) does not
18152 have an early register mult dependency on the result of
18153 PRODUCER. */
18154
18155 int
18156 arm_no_early_mul_dep (rtx producer, rtx consumer)
18157 {
18158 rtx value = PATTERN (producer);
18159 rtx op = PATTERN (consumer);
18160
18161 if (GET_CODE (value) == COND_EXEC)
18162 value = COND_EXEC_CODE (value);
18163 if (GET_CODE (value) == PARALLEL)
18164 value = XVECEXP (value, 0, 0);
18165 value = XEXP (value, 0);
18166 if (GET_CODE (op) == COND_EXEC)
18167 op = COND_EXEC_CODE (op);
18168 if (GET_CODE (op) == PARALLEL)
18169 op = XVECEXP (op, 0, 0);
18170 op = XEXP (op, 1);
18171
18172 return (GET_CODE (op) == PLUS
18173 && !reg_overlap_mentioned_p (value, XEXP (op, 0)));
18174 }
18175
18176 /* We can't rely on the caller doing the proper promotion when
18177 using APCS or ATPCS. */
18178
18179 static bool
18180 arm_promote_prototypes (tree t ATTRIBUTE_UNUSED)
18181 {
18182 return !TARGET_AAPCS_BASED;
18183 }
18184
18185
18186 /* AAPCS based ABIs use short enums by default. */
18187
18188 static bool
18189 arm_default_short_enums (void)
18190 {
18191 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
18192 }
18193
18194
18195 /* AAPCS requires that anonymous bitfields affect structure alignment. */
18196
18197 static bool
18198 arm_align_anon_bitfield (void)
18199 {
18200 return TARGET_AAPCS_BASED;
18201 }
18202
18203
18204 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
18205
18206 static tree
18207 arm_cxx_guard_type (void)
18208 {
18209 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
18210 }
18211
18212 /* Return non-zero if the consumer (a multiply-accumulate instruction)
18213 has an accumulator dependency on the result of the producer (a
18214 multiplication instruction) and no other dependency on that result. */
18215 int
18216 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
18217 {
18218 rtx mul = PATTERN (producer);
18219 rtx mac = PATTERN (consumer);
18220 rtx mul_result;
18221 rtx mac_op0, mac_op1, mac_acc;
18222
18223 if (GET_CODE (mul) == COND_EXEC)
18224 mul = COND_EXEC_CODE (mul);
18225 if (GET_CODE (mac) == COND_EXEC)
18226 mac = COND_EXEC_CODE (mac);
18227
18228 /* Check that mul is of the form (set (...) (mult ...))
18229 and mla is of the form (set (...) (plus (mult ...) (...))). */
18230 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
18231 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
18232 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
18233 return 0;
18234
18235 mul_result = XEXP (mul, 0);
18236 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
18237 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
18238 mac_acc = XEXP (XEXP (mac, 1), 1);
18239
18240 return (reg_overlap_mentioned_p (mul_result, mac_acc)
18241 && !reg_overlap_mentioned_p (mul_result, mac_op0)
18242 && !reg_overlap_mentioned_p (mul_result, mac_op1));
18243 }
18244
18245
18246 /* The EABI says test the least significant bit of a guard variable. */
18247
18248 static bool
18249 arm_cxx_guard_mask_bit (void)
18250 {
18251 return TARGET_AAPCS_BASED;
18252 }
18253
18254
18255 /* The EABI specifies that all array cookies are 8 bytes long. */
18256
18257 static tree
18258 arm_get_cookie_size (tree type)
18259 {
18260 tree size;
18261
18262 if (!TARGET_AAPCS_BASED)
18263 return default_cxx_get_cookie_size (type);
18264
18265 size = build_int_cst (sizetype, 8);
18266 return size;
18267 }
18268
18269
18270 /* The EABI says that array cookies should also contain the element size. */
18271
18272 static bool
18273 arm_cookie_has_size (void)
18274 {
18275 return TARGET_AAPCS_BASED;
18276 }
18277
18278
18279 /* The EABI says constructors and destructors should return a pointer to
18280 the object constructed/destroyed. */
18281
18282 static bool
18283 arm_cxx_cdtor_returns_this (void)
18284 {
18285 return TARGET_AAPCS_BASED;
18286 }
18287
18288 /* The EABI says that an inline function may never be the key
18289 method. */
18290
18291 static bool
18292 arm_cxx_key_method_may_be_inline (void)
18293 {
18294 return !TARGET_AAPCS_BASED;
18295 }
18296
18297 static void
18298 arm_cxx_determine_class_data_visibility (tree decl)
18299 {
18300 if (!TARGET_AAPCS_BASED)
18301 return;
18302
18303 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
18304 is exported. However, on systems without dynamic vague linkage,
18305 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
18306 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
18307 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
18308 else
18309 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
18310 DECL_VISIBILITY_SPECIFIED (decl) = 1;
18311 }
18312
18313 static bool
18314 arm_cxx_class_data_always_comdat (void)
18315 {
18316 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
18317 vague linkage if the class has no key function. */
18318 return !TARGET_AAPCS_BASED;
18319 }
18320
18321
18322 /* The EABI says __aeabi_atexit should be used to register static
18323 destructors. */
18324
18325 static bool
18326 arm_cxx_use_aeabi_atexit (void)
18327 {
18328 return TARGET_AAPCS_BASED;
18329 }
18330
18331
18332 void
18333 arm_set_return_address (rtx source, rtx scratch)
18334 {
18335 arm_stack_offsets *offsets;
18336 HOST_WIDE_INT delta;
18337 rtx addr;
18338 unsigned long saved_regs;
18339
18340 saved_regs = arm_compute_save_reg_mask ();
18341
18342 if ((saved_regs & (1 << LR_REGNUM)) == 0)
18343 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
18344 else
18345 {
18346 if (frame_pointer_needed)
18347 addr = plus_constant(hard_frame_pointer_rtx, -4);
18348 else
18349 {
18350 /* LR will be the first saved register. */
18351 offsets = arm_get_frame_offsets ();
18352 delta = offsets->outgoing_args - (offsets->frame + 4);
18353
18354
18355 if (delta >= 4096)
18356 {
18357 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
18358 GEN_INT (delta & ~4095)));
18359 addr = scratch;
18360 delta &= 4095;
18361 }
18362 else
18363 addr = stack_pointer_rtx;
18364
18365 addr = plus_constant (addr, delta);
18366 }
18367 emit_move_insn (gen_frame_mem (Pmode, addr), source);
18368 }
18369 }
18370
18371
18372 void
18373 thumb_set_return_address (rtx source, rtx scratch)
18374 {
18375 arm_stack_offsets *offsets;
18376 HOST_WIDE_INT delta;
18377 HOST_WIDE_INT limit;
18378 int reg;
18379 rtx addr;
18380 unsigned long mask;
18381
18382 emit_insn (gen_rtx_USE (VOIDmode, source));
18383
18384 mask = thumb1_compute_save_reg_mask ();
18385 if (mask & (1 << LR_REGNUM))
18386 {
18387 offsets = arm_get_frame_offsets ();
18388
18389 limit = 1024;
18390 /* Find the saved regs. */
18391 if (frame_pointer_needed)
18392 {
18393 delta = offsets->soft_frame - offsets->saved_args;
18394 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
18395 if (TARGET_THUMB1)
18396 limit = 128;
18397 }
18398 else
18399 {
18400 delta = offsets->outgoing_args - offsets->saved_args;
18401 reg = SP_REGNUM;
18402 }
18403 /* Allow for the stack frame. */
18404 if (TARGET_THUMB1 && TARGET_BACKTRACE)
18405 delta -= 16;
18406 /* The link register is always the first saved register. */
18407 delta -= 4;
18408
18409 /* Construct the address. */
18410 addr = gen_rtx_REG (SImode, reg);
18411 if (delta > limit)
18412 {
18413 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
18414 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
18415 addr = scratch;
18416 }
18417 else
18418 addr = plus_constant (addr, delta);
18419
18420 emit_move_insn (gen_frame_mem (Pmode, addr), source);
18421 }
18422 else
18423 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
18424 }
18425
18426 /* Implements target hook vector_mode_supported_p. */
18427 bool
18428 arm_vector_mode_supported_p (enum machine_mode mode)
18429 {
18430 /* Neon also supports V2SImode, etc. listed in the clause below. */
18431 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
18432 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
18433 return true;
18434
18435 if ((mode == V2SImode)
18436 || (mode == V4HImode)
18437 || (mode == V8QImode))
18438 return true;
18439
18440 return false;
18441 }
18442
18443 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
18444 ARM insns and therefore guarantee that the shift count is modulo 256.
18445 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
18446 guarantee no particular behavior for out-of-range counts. */
18447
18448 static unsigned HOST_WIDE_INT
18449 arm_shift_truncation_mask (enum machine_mode mode)
18450 {
18451 return mode == SImode ? 255 : 0;
18452 }
18453
18454
18455 /* Map internal gcc register numbers to DWARF2 register numbers. */
18456
18457 unsigned int
18458 arm_dbx_register_number (unsigned int regno)
18459 {
18460 if (regno < 16)
18461 return regno;
18462
18463 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
18464 compatibility. The EABI defines them as registers 96-103. */
18465 if (IS_FPA_REGNUM (regno))
18466 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
18467
18468 /* FIXME: VFPv3 register numbering. */
18469 if (IS_VFP_REGNUM (regno))
18470 return 64 + regno - FIRST_VFP_REGNUM;
18471
18472 if (IS_IWMMXT_GR_REGNUM (regno))
18473 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
18474
18475 if (IS_IWMMXT_REGNUM (regno))
18476 return 112 + regno - FIRST_IWMMXT_REGNUM;
18477
18478 gcc_unreachable ();
18479 }
18480
18481
18482 #ifdef TARGET_UNWIND_INFO
18483 /* Emit unwind directives for a store-multiple instruction or stack pointer
18484 push during alignment.
18485 These should only ever be generated by the function prologue code, so
18486 expect them to have a particular form. */
18487
18488 static void
18489 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
18490 {
18491 int i;
18492 HOST_WIDE_INT offset;
18493 HOST_WIDE_INT nregs;
18494 int reg_size;
18495 unsigned reg;
18496 unsigned lastreg;
18497 rtx e;
18498
18499 e = XVECEXP (p, 0, 0);
18500 if (GET_CODE (e) != SET)
18501 abort ();
18502
18503 /* First insn will adjust the stack pointer. */
18504 if (GET_CODE (e) != SET
18505 || GET_CODE (XEXP (e, 0)) != REG
18506 || REGNO (XEXP (e, 0)) != SP_REGNUM
18507 || GET_CODE (XEXP (e, 1)) != PLUS)
18508 abort ();
18509
18510 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
18511 nregs = XVECLEN (p, 0) - 1;
18512
18513 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
18514 if (reg < 16)
18515 {
18516 /* The function prologue may also push pc, but not annotate it as it is
18517 never restored. We turn this into a stack pointer adjustment. */
18518 if (nregs * 4 == offset - 4)
18519 {
18520 fprintf (asm_out_file, "\t.pad #4\n");
18521 offset -= 4;
18522 }
18523 reg_size = 4;
18524 fprintf (asm_out_file, "\t.save {");
18525 }
18526 else if (IS_VFP_REGNUM (reg))
18527 {
18528 reg_size = 8;
18529 fprintf (asm_out_file, "\t.vsave {");
18530 }
18531 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
18532 {
18533 /* FPA registers are done differently. */
18534 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
18535 return;
18536 }
18537 else
18538 /* Unknown register type. */
18539 abort ();
18540
18541 /* If the stack increment doesn't match the size of the saved registers,
18542 something has gone horribly wrong. */
18543 if (offset != nregs * reg_size)
18544 abort ();
18545
18546 offset = 0;
18547 lastreg = 0;
18548 /* The remaining insns will describe the stores. */
18549 for (i = 1; i <= nregs; i++)
18550 {
18551 /* Expect (set (mem <addr>) (reg)).
18552 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
18553 e = XVECEXP (p, 0, i);
18554 if (GET_CODE (e) != SET
18555 || GET_CODE (XEXP (e, 0)) != MEM
18556 || GET_CODE (XEXP (e, 1)) != REG)
18557 abort ();
18558
18559 reg = REGNO (XEXP (e, 1));
18560 if (reg < lastreg)
18561 abort ();
18562
18563 if (i != 1)
18564 fprintf (asm_out_file, ", ");
18565 /* We can't use %r for vfp because we need to use the
18566 double precision register names. */
18567 if (IS_VFP_REGNUM (reg))
18568 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
18569 else
18570 asm_fprintf (asm_out_file, "%r", reg);
18571
18572 #ifdef ENABLE_CHECKING
18573 /* Check that the addresses are consecutive. */
18574 e = XEXP (XEXP (e, 0), 0);
18575 if (GET_CODE (e) == PLUS)
18576 {
18577 offset += reg_size;
18578 if (GET_CODE (XEXP (e, 0)) != REG
18579 || REGNO (XEXP (e, 0)) != SP_REGNUM
18580 || GET_CODE (XEXP (e, 1)) != CONST_INT
18581 || offset != INTVAL (XEXP (e, 1)))
18582 abort ();
18583 }
18584 else if (i != 1
18585 || GET_CODE (e) != REG
18586 || REGNO (e) != SP_REGNUM)
18587 abort ();
18588 #endif
18589 }
18590 fprintf (asm_out_file, "}\n");
18591 }
18592
18593 /* Emit unwind directives for a SET. */
18594
18595 static void
18596 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
18597 {
18598 rtx e0;
18599 rtx e1;
18600 unsigned reg;
18601
18602 e0 = XEXP (p, 0);
18603 e1 = XEXP (p, 1);
18604 switch (GET_CODE (e0))
18605 {
18606 case MEM:
18607 /* Pushing a single register. */
18608 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
18609 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
18610 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
18611 abort ();
18612
18613 asm_fprintf (asm_out_file, "\t.save ");
18614 if (IS_VFP_REGNUM (REGNO (e1)))
18615 asm_fprintf(asm_out_file, "{d%d}\n",
18616 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
18617 else
18618 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
18619 break;
18620
18621 case REG:
18622 if (REGNO (e0) == SP_REGNUM)
18623 {
18624 /* A stack increment. */
18625 if (GET_CODE (e1) != PLUS
18626 || GET_CODE (XEXP (e1, 0)) != REG
18627 || REGNO (XEXP (e1, 0)) != SP_REGNUM
18628 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
18629 abort ();
18630
18631 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
18632 -INTVAL (XEXP (e1, 1)));
18633 }
18634 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
18635 {
18636 HOST_WIDE_INT offset;
18637
18638 if (GET_CODE (e1) == PLUS)
18639 {
18640 if (GET_CODE (XEXP (e1, 0)) != REG
18641 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
18642 abort ();
18643 reg = REGNO (XEXP (e1, 0));
18644 offset = INTVAL (XEXP (e1, 1));
18645 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
18646 HARD_FRAME_POINTER_REGNUM, reg,
18647 INTVAL (XEXP (e1, 1)));
18648 }
18649 else if (GET_CODE (e1) == REG)
18650 {
18651 reg = REGNO (e1);
18652 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
18653 HARD_FRAME_POINTER_REGNUM, reg);
18654 }
18655 else
18656 abort ();
18657 }
18658 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
18659 {
18660 /* Move from sp to reg. */
18661 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
18662 }
18663 else if (GET_CODE (e1) == PLUS
18664 && GET_CODE (XEXP (e1, 0)) == REG
18665 && REGNO (XEXP (e1, 0)) == SP_REGNUM
18666 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
18667 {
18668 /* Set reg to offset from sp. */
18669 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
18670 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
18671 }
18672 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
18673 {
18674 /* Stack pointer save before alignment. */
18675 reg = REGNO (e0);
18676 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
18677 reg + 0x90, reg);
18678 }
18679 else
18680 abort ();
18681 break;
18682
18683 default:
18684 abort ();
18685 }
18686 }
18687
18688
18689 /* Emit unwind directives for the given insn. */
18690
18691 static void
18692 arm_unwind_emit (FILE * asm_out_file, rtx insn)
18693 {
18694 rtx pat;
18695
18696 if (!ARM_EABI_UNWIND_TABLES)
18697 return;
18698
18699 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
18700 return;
18701
18702 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
18703 if (pat)
18704 pat = XEXP (pat, 0);
18705 else
18706 pat = PATTERN (insn);
18707
18708 switch (GET_CODE (pat))
18709 {
18710 case SET:
18711 arm_unwind_emit_set (asm_out_file, pat);
18712 break;
18713
18714 case SEQUENCE:
18715 /* Store multiple. */
18716 arm_unwind_emit_sequence (asm_out_file, pat);
18717 break;
18718
18719 default:
18720 abort();
18721 }
18722 }
18723
18724
18725 /* Output a reference from a function exception table to the type_info
18726 object X. The EABI specifies that the symbol should be relocated by
18727 an R_ARM_TARGET2 relocation. */
18728
18729 static bool
18730 arm_output_ttype (rtx x)
18731 {
18732 fputs ("\t.word\t", asm_out_file);
18733 output_addr_const (asm_out_file, x);
18734 /* Use special relocations for symbol references. */
18735 if (GET_CODE (x) != CONST_INT)
18736 fputs ("(TARGET2)", asm_out_file);
18737 fputc ('\n', asm_out_file);
18738
18739 return TRUE;
18740 }
18741 #endif /* TARGET_UNWIND_INFO */
18742
18743
18744 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
18745 stack alignment. */
18746
18747 static void
18748 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
18749 {
18750 rtx unspec = SET_SRC (pattern);
18751 gcc_assert (GET_CODE (unspec) == UNSPEC);
18752
18753 switch (index)
18754 {
18755 case UNSPEC_STACK_ALIGN:
18756 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
18757 put anything on the stack, so hopefully it won't matter.
18758 CFA = SP will be correct after alignment. */
18759 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
18760 SET_DEST (pattern));
18761 break;
18762 default:
18763 gcc_unreachable ();
18764 }
18765 }
18766
18767
18768 /* Output unwind directives for the start/end of a function. */
18769
18770 void
18771 arm_output_fn_unwind (FILE * f, bool prologue)
18772 {
18773 if (!ARM_EABI_UNWIND_TABLES)
18774 return;
18775
18776 if (prologue)
18777 fputs ("\t.fnstart\n", f);
18778 else
18779 fputs ("\t.fnend\n", f);
18780 }
18781
18782 static bool
18783 arm_emit_tls_decoration (FILE *fp, rtx x)
18784 {
18785 enum tls_reloc reloc;
18786 rtx val;
18787
18788 val = XVECEXP (x, 0, 0);
18789 reloc = INTVAL (XVECEXP (x, 0, 1));
18790
18791 output_addr_const (fp, val);
18792
18793 switch (reloc)
18794 {
18795 case TLS_GD32:
18796 fputs ("(tlsgd)", fp);
18797 break;
18798 case TLS_LDM32:
18799 fputs ("(tlsldm)", fp);
18800 break;
18801 case TLS_LDO32:
18802 fputs ("(tlsldo)", fp);
18803 break;
18804 case TLS_IE32:
18805 fputs ("(gottpoff)", fp);
18806 break;
18807 case TLS_LE32:
18808 fputs ("(tpoff)", fp);
18809 break;
18810 default:
18811 gcc_unreachable ();
18812 }
18813
18814 switch (reloc)
18815 {
18816 case TLS_GD32:
18817 case TLS_LDM32:
18818 case TLS_IE32:
18819 fputs (" + (. - ", fp);
18820 output_addr_const (fp, XVECEXP (x, 0, 2));
18821 fputs (" - ", fp);
18822 output_addr_const (fp, XVECEXP (x, 0, 3));
18823 fputc (')', fp);
18824 break;
18825 default:
18826 break;
18827 }
18828
18829 return TRUE;
18830 }
18831
18832 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
18833
18834 static void
18835 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
18836 {
18837 gcc_assert (size == 4);
18838 fputs ("\t.word\t", file);
18839 output_addr_const (file, x);
18840 fputs ("(tlsldo)", file);
18841 }
18842
18843 bool
18844 arm_output_addr_const_extra (FILE *fp, rtx x)
18845 {
18846 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
18847 return arm_emit_tls_decoration (fp, x);
18848 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
18849 {
18850 char label[256];
18851 int labelno = INTVAL (XVECEXP (x, 0, 0));
18852
18853 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
18854 assemble_name_raw (fp, label);
18855
18856 return TRUE;
18857 }
18858 else if (GET_CODE (x) == CONST_VECTOR)
18859 return arm_emit_vector_const (fp, x);
18860
18861 return FALSE;
18862 }
18863
18864 /* Output assembly for a shift instruction.
18865 SET_FLAGS determines how the instruction modifies the condition codes.
18866 0 - Do not set condition codes.
18867 1 - Set condition codes.
18868 2 - Use smallest instruction. */
18869 const char *
18870 arm_output_shift(rtx * operands, int set_flags)
18871 {
18872 char pattern[100];
18873 static const char flag_chars[3] = {'?', '.', '!'};
18874 const char *shift;
18875 HOST_WIDE_INT val;
18876 char c;
18877
18878 c = flag_chars[set_flags];
18879 if (TARGET_UNIFIED_ASM)
18880 {
18881 shift = shift_op(operands[3], &val);
18882 if (shift)
18883 {
18884 if (val != -1)
18885 operands[2] = GEN_INT(val);
18886 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
18887 }
18888 else
18889 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
18890 }
18891 else
18892 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
18893 output_asm_insn (pattern, operands);
18894 return "";
18895 }
18896
18897 /* Output a Thumb-2 casesi instruction. */
18898 const char *
18899 thumb2_output_casesi (rtx *operands)
18900 {
18901 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
18902
18903 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
18904
18905 output_asm_insn ("cmp\t%0, %1", operands);
18906 output_asm_insn ("bhi\t%l3", operands);
18907 switch (GET_MODE(diff_vec))
18908 {
18909 case QImode:
18910 return "tbb\t[%|pc, %0]";
18911 case HImode:
18912 return "tbh\t[%|pc, %0, lsl #1]";
18913 case SImode:
18914 if (flag_pic)
18915 {
18916 output_asm_insn ("adr\t%4, %l2", operands);
18917 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
18918 output_asm_insn ("add\t%4, %4, %5", operands);
18919 return "bx\t%4";
18920 }
18921 else
18922 {
18923 output_asm_insn ("adr\t%4, %l2", operands);
18924 return "ldr\t%|pc, [%4, %0, lsl #2]";
18925 }
18926 default:
18927 gcc_unreachable ();
18928 }
18929 }
18930
18931 /* A table and a function to perform ARM-specific name mangling for
18932 NEON vector types in order to conform to the AAPCS (see "Procedure
18933 Call Standard for the ARM Architecture", Appendix A). To qualify
18934 for emission with the mangled names defined in that document, a
18935 vector type must not only be of the correct mode but also be
18936 composed of NEON vector element types (e.g. __builtin_neon_qi). */
18937 typedef struct
18938 {
18939 enum machine_mode mode;
18940 const char *element_type_name;
18941 const char *aapcs_name;
18942 } arm_mangle_map_entry;
18943
18944 static arm_mangle_map_entry arm_mangle_map[] = {
18945 /* 64-bit containerized types. */
18946 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
18947 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
18948 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
18949 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
18950 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
18951 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
18952 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
18953 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
18954 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
18955 /* 128-bit containerized types. */
18956 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
18957 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
18958 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
18959 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
18960 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
18961 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
18962 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
18963 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
18964 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
18965 { VOIDmode, NULL, NULL }
18966 };
18967
18968 const char *
18969 arm_mangle_type (tree type)
18970 {
18971 arm_mangle_map_entry *pos = arm_mangle_map;
18972
18973 if (TREE_CODE (type) != VECTOR_TYPE)
18974 return NULL;
18975
18976 /* Check the mode of the vector type, and the name of the vector
18977 element type, against the table. */
18978 while (pos->mode != VOIDmode)
18979 {
18980 tree elt_type = TREE_TYPE (type);
18981
18982 if (pos->mode == TYPE_MODE (type)
18983 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
18984 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
18985 pos->element_type_name))
18986 return pos->aapcs_name;
18987
18988 pos++;
18989 }
18990
18991 /* Use the default mangling for unrecognized (possibly user-defined)
18992 vector types. */
18993 return NULL;
18994 }
18995
18996 #include "gt-arm.h"