73e8657b828e76706266533990220b0854513910
[gcc.git] / gcc / config / aarch64 / aarch64.c
1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2013 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "insn-codes.h"
26 #include "rtl.h"
27 #include "insn-attr.h"
28 #include "tree.h"
29 #include "regs.h"
30 #include "df.h"
31 #include "hard-reg-set.h"
32 #include "output.h"
33 #include "expr.h"
34 #include "reload.h"
35 #include "toplev.h"
36 #include "target.h"
37 #include "target-def.h"
38 #include "targhooks.h"
39 #include "ggc.h"
40 #include "function.h"
41 #include "tm_p.h"
42 #include "recog.h"
43 #include "langhooks.h"
44 #include "diagnostic-core.h"
45 #include "gimple.h"
46 #include "optabs.h"
47 #include "dwarf2.h"
48 #include "cfgloop.h"
49 #include "tree-vectorizer.h"
50
51 /* Defined for convenience. */
52 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
53
54 /* Classifies an address.
55
56 ADDRESS_REG_IMM
57 A simple base register plus immediate offset.
58
59 ADDRESS_REG_WB
60 A base register indexed by immediate offset with writeback.
61
62 ADDRESS_REG_REG
63 A base register indexed by (optionally scaled) register.
64
65 ADDRESS_REG_UXTW
66 A base register indexed by (optionally scaled) zero-extended register.
67
68 ADDRESS_REG_SXTW
69 A base register indexed by (optionally scaled) sign-extended register.
70
71 ADDRESS_LO_SUM
72 A LO_SUM rtx with a base register and "LO12" symbol relocation.
73
74 ADDRESS_SYMBOLIC:
75 A constant symbolic address, in pc-relative literal pool. */
76
77 enum aarch64_address_type {
78 ADDRESS_REG_IMM,
79 ADDRESS_REG_WB,
80 ADDRESS_REG_REG,
81 ADDRESS_REG_UXTW,
82 ADDRESS_REG_SXTW,
83 ADDRESS_LO_SUM,
84 ADDRESS_SYMBOLIC
85 };
86
87 struct aarch64_address_info {
88 enum aarch64_address_type type;
89 rtx base;
90 rtx offset;
91 int shift;
92 enum aarch64_symbol_type symbol_type;
93 };
94
95 struct simd_immediate_info
96 {
97 rtx value;
98 int shift;
99 int element_width;
100 bool mvn;
101 bool msl;
102 };
103
104 /* The current code model. */
105 enum aarch64_code_model aarch64_cmodel;
106
107 #ifdef HAVE_AS_TLS
108 #undef TARGET_HAVE_TLS
109 #define TARGET_HAVE_TLS 1
110 #endif
111
112 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
113 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
114 const_tree,
115 enum machine_mode *, int *,
116 bool *);
117 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
118 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
119 static void aarch64_override_options_after_change (void);
120 static bool aarch64_vector_mode_supported_p (enum machine_mode);
121 static unsigned bit_count (unsigned HOST_WIDE_INT);
122 static bool aarch64_const_vec_all_same_int_p (rtx,
123 HOST_WIDE_INT, HOST_WIDE_INT);
124
125 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
126 const unsigned char *sel);
127
128 /* The processor for which instructions should be scheduled. */
129 enum aarch64_processor aarch64_tune = generic;
130
131 /* The current tuning set. */
132 const struct tune_params *aarch64_tune_params;
133
134 /* Mask to specify which instructions we are allowed to generate. */
135 unsigned long aarch64_isa_flags = 0;
136
137 /* Mask to specify which instruction scheduling options should be used. */
138 unsigned long aarch64_tune_flags = 0;
139
140 /* Tuning parameters. */
141
142 #if HAVE_DESIGNATED_INITIALIZERS
143 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
144 #else
145 #define NAMED_PARAM(NAME, VAL) (VAL)
146 #endif
147
148 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
149 __extension__
150 #endif
151 static const struct cpu_rtx_cost_table generic_rtx_cost_table =
152 {
153 NAMED_PARAM (memory_load, COSTS_N_INSNS (1)),
154 NAMED_PARAM (memory_store, COSTS_N_INSNS (0)),
155 NAMED_PARAM (register_shift, COSTS_N_INSNS (1)),
156 NAMED_PARAM (int_divide, COSTS_N_INSNS (6)),
157 NAMED_PARAM (float_divide, COSTS_N_INSNS (2)),
158 NAMED_PARAM (double_divide, COSTS_N_INSNS (6)),
159 NAMED_PARAM (int_multiply, COSTS_N_INSNS (1)),
160 NAMED_PARAM (int_multiply_extend, COSTS_N_INSNS (1)),
161 NAMED_PARAM (int_multiply_add, COSTS_N_INSNS (1)),
162 NAMED_PARAM (int_multiply_extend_add, COSTS_N_INSNS (1)),
163 NAMED_PARAM (float_multiply, COSTS_N_INSNS (0)),
164 NAMED_PARAM (double_multiply, COSTS_N_INSNS (1))
165 };
166
167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
168 __extension__
169 #endif
170 static const struct cpu_addrcost_table generic_addrcost_table =
171 {
172 NAMED_PARAM (pre_modify, 0),
173 NAMED_PARAM (post_modify, 0),
174 NAMED_PARAM (register_offset, 0),
175 NAMED_PARAM (register_extend, 0),
176 NAMED_PARAM (imm_offset, 0)
177 };
178
179 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
180 __extension__
181 #endif
182 static const struct cpu_regmove_cost generic_regmove_cost =
183 {
184 NAMED_PARAM (GP2GP, 1),
185 NAMED_PARAM (GP2FP, 2),
186 NAMED_PARAM (FP2GP, 2),
187 /* We currently do not provide direct support for TFmode Q->Q move.
188 Therefore we need to raise the cost above 2 in order to have
189 reload handle the situation. */
190 NAMED_PARAM (FP2FP, 4)
191 };
192
193 /* Generic costs for vector insn classes. */
194 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
195 __extension__
196 #endif
197 static const struct cpu_vector_cost generic_vector_cost =
198 {
199 NAMED_PARAM (scalar_stmt_cost, 1),
200 NAMED_PARAM (scalar_load_cost, 1),
201 NAMED_PARAM (scalar_store_cost, 1),
202 NAMED_PARAM (vec_stmt_cost, 1),
203 NAMED_PARAM (vec_to_scalar_cost, 1),
204 NAMED_PARAM (scalar_to_vec_cost, 1),
205 NAMED_PARAM (vec_align_load_cost, 1),
206 NAMED_PARAM (vec_unalign_load_cost, 1),
207 NAMED_PARAM (vec_unalign_store_cost, 1),
208 NAMED_PARAM (vec_store_cost, 1),
209 NAMED_PARAM (cond_taken_branch_cost, 3),
210 NAMED_PARAM (cond_not_taken_branch_cost, 1)
211 };
212
213 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
214 __extension__
215 #endif
216 static const struct tune_params generic_tunings =
217 {
218 &generic_rtx_cost_table,
219 &generic_addrcost_table,
220 &generic_regmove_cost,
221 &generic_vector_cost,
222 NAMED_PARAM (memmov_cost, 4)
223 };
224
225 /* A processor implementing AArch64. */
226 struct processor
227 {
228 const char *const name;
229 enum aarch64_processor core;
230 const char *arch;
231 const unsigned long flags;
232 const struct tune_params *const tune;
233 };
234
235 /* Processor cores implementing AArch64. */
236 static const struct processor all_cores[] =
237 {
238 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
239 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
240 #include "aarch64-cores.def"
241 #undef AARCH64_CORE
242 {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
243 {NULL, aarch64_none, NULL, 0, NULL}
244 };
245
246 /* Architectures implementing AArch64. */
247 static const struct processor all_architectures[] =
248 {
249 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
250 {NAME, CORE, #ARCH, FLAGS, NULL},
251 #include "aarch64-arches.def"
252 #undef AARCH64_ARCH
253 {"generic", generic, "8", AARCH64_FL_FOR_ARCH8, NULL},
254 {NULL, aarch64_none, NULL, 0, NULL}
255 };
256
257 /* Target specification. These are populated as commandline arguments
258 are processed, or NULL if not specified. */
259 static const struct processor *selected_arch;
260 static const struct processor *selected_cpu;
261 static const struct processor *selected_tune;
262
263 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
264
265 /* An ISA extension in the co-processor and main instruction set space. */
266 struct aarch64_option_extension
267 {
268 const char *const name;
269 const unsigned long flags_on;
270 const unsigned long flags_off;
271 };
272
273 /* ISA extensions in AArch64. */
274 static const struct aarch64_option_extension all_extensions[] =
275 {
276 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
277 {NAME, FLAGS_ON, FLAGS_OFF},
278 #include "aarch64-option-extensions.def"
279 #undef AARCH64_OPT_EXTENSION
280 {NULL, 0, 0}
281 };
282
283 /* Used to track the size of an address when generating a pre/post
284 increment address. */
285 static enum machine_mode aarch64_memory_reference_mode;
286
287 /* Used to force GTY into this file. */
288 static GTY(()) int gty_dummy;
289
290 /* A table of valid AArch64 "bitmask immediate" values for
291 logical instructions. */
292
293 #define AARCH64_NUM_BITMASKS 5334
294 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
295
296 /* Did we set flag_omit_frame_pointer just so
297 aarch64_frame_pointer_required would be called? */
298 static bool faked_omit_frame_pointer;
299
300 typedef enum aarch64_cond_code
301 {
302 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
303 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
304 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
305 }
306 aarch64_cc;
307
308 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
309
310 /* The condition codes of the processor, and the inverse function. */
311 static const char * const aarch64_condition_codes[] =
312 {
313 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
314 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
315 };
316
317 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
318 unsigned
319 aarch64_dbx_register_number (unsigned regno)
320 {
321 if (GP_REGNUM_P (regno))
322 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
323 else if (regno == SP_REGNUM)
324 return AARCH64_DWARF_SP;
325 else if (FP_REGNUM_P (regno))
326 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
327
328 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
329 equivalent DWARF register. */
330 return DWARF_FRAME_REGISTERS;
331 }
332
333 /* Return TRUE if MODE is any of the large INT modes. */
334 static bool
335 aarch64_vect_struct_mode_p (enum machine_mode mode)
336 {
337 return mode == OImode || mode == CImode || mode == XImode;
338 }
339
340 /* Return TRUE if MODE is any of the vector modes. */
341 static bool
342 aarch64_vector_mode_p (enum machine_mode mode)
343 {
344 return aarch64_vector_mode_supported_p (mode)
345 || aarch64_vect_struct_mode_p (mode);
346 }
347
348 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
349 static bool
350 aarch64_array_mode_supported_p (enum machine_mode mode,
351 unsigned HOST_WIDE_INT nelems)
352 {
353 if (TARGET_SIMD
354 && AARCH64_VALID_SIMD_QREG_MODE (mode)
355 && (nelems >= 2 && nelems <= 4))
356 return true;
357
358 return false;
359 }
360
361 /* Implement HARD_REGNO_NREGS. */
362
363 int
364 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
365 {
366 switch (aarch64_regno_regclass (regno))
367 {
368 case FP_REGS:
369 case FP_LO_REGS:
370 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
371 default:
372 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
373 }
374 gcc_unreachable ();
375 }
376
377 /* Implement HARD_REGNO_MODE_OK. */
378
379 int
380 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
381 {
382 if (GET_MODE_CLASS (mode) == MODE_CC)
383 return regno == CC_REGNUM;
384
385 if (regno == SP_REGNUM || regno == FRAME_POINTER_REGNUM
386 || regno == ARG_POINTER_REGNUM)
387 return mode == Pmode;
388
389 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
390 return 1;
391
392 if (FP_REGNUM_P (regno))
393 {
394 if (aarch64_vect_struct_mode_p (mode))
395 return
396 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
397 else
398 return 1;
399 }
400
401 return 0;
402 }
403
404 /* Return true if calls to DECL should be treated as
405 long-calls (ie called via a register). */
406 static bool
407 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
408 {
409 return false;
410 }
411
412 /* Return true if calls to symbol-ref SYM should be treated as
413 long-calls (ie called via a register). */
414 bool
415 aarch64_is_long_call_p (rtx sym)
416 {
417 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
418 }
419
420 /* Return true if the offsets to a zero/sign-extract operation
421 represent an expression that matches an extend operation. The
422 operands represent the paramters from
423
424 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
425 bool
426 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
427 rtx extract_imm)
428 {
429 HOST_WIDE_INT mult_val, extract_val;
430
431 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
432 return false;
433
434 mult_val = INTVAL (mult_imm);
435 extract_val = INTVAL (extract_imm);
436
437 if (extract_val > 8
438 && extract_val < GET_MODE_BITSIZE (mode)
439 && exact_log2 (extract_val & ~7) > 0
440 && (extract_val & 7) <= 4
441 && mult_val == (1 << (extract_val & 7)))
442 return true;
443
444 return false;
445 }
446
447 /* Emit an insn that's a simple single-set. Both the operands must be
448 known to be valid. */
449 inline static rtx
450 emit_set_insn (rtx x, rtx y)
451 {
452 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
453 }
454
455 /* X and Y are two things to compare using CODE. Emit the compare insn and
456 return the rtx for register 0 in the proper mode. */
457 rtx
458 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
459 {
460 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
461 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
462
463 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
464 return cc_reg;
465 }
466
467 /* Build the SYMBOL_REF for __tls_get_addr. */
468
469 static GTY(()) rtx tls_get_addr_libfunc;
470
471 rtx
472 aarch64_tls_get_addr (void)
473 {
474 if (!tls_get_addr_libfunc)
475 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
476 return tls_get_addr_libfunc;
477 }
478
479 /* Return the TLS model to use for ADDR. */
480
481 static enum tls_model
482 tls_symbolic_operand_type (rtx addr)
483 {
484 enum tls_model tls_kind = TLS_MODEL_NONE;
485 rtx sym, addend;
486
487 if (GET_CODE (addr) == CONST)
488 {
489 split_const (addr, &sym, &addend);
490 if (GET_CODE (sym) == SYMBOL_REF)
491 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
492 }
493 else if (GET_CODE (addr) == SYMBOL_REF)
494 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
495
496 return tls_kind;
497 }
498
499 /* We'll allow lo_sum's in addresses in our legitimate addresses
500 so that combine would take care of combining addresses where
501 necessary, but for generation purposes, we'll generate the address
502 as :
503 RTL Absolute
504 tmp = hi (symbol_ref); adrp x1, foo
505 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
506 nop
507
508 PIC TLS
509 adrp x1, :got:foo adrp tmp, :tlsgd:foo
510 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
511 bl __tls_get_addr
512 nop
513
514 Load TLS symbol, depending on TLS mechanism and TLS access model.
515
516 Global Dynamic - Traditional TLS:
517 adrp tmp, :tlsgd:imm
518 add dest, tmp, #:tlsgd_lo12:imm
519 bl __tls_get_addr
520
521 Global Dynamic - TLS Descriptors:
522 adrp dest, :tlsdesc:imm
523 ldr tmp, [dest, #:tlsdesc_lo12:imm]
524 add dest, dest, #:tlsdesc_lo12:imm
525 blr tmp
526 mrs tp, tpidr_el0
527 add dest, dest, tp
528
529 Initial Exec:
530 mrs tp, tpidr_el0
531 adrp tmp, :gottprel:imm
532 ldr dest, [tmp, #:gottprel_lo12:imm]
533 add dest, dest, tp
534
535 Local Exec:
536 mrs tp, tpidr_el0
537 add t0, tp, #:tprel_hi12:imm
538 add t0, #:tprel_lo12_nc:imm
539 */
540
541 static void
542 aarch64_load_symref_appropriately (rtx dest, rtx imm,
543 enum aarch64_symbol_type type)
544 {
545 switch (type)
546 {
547 case SYMBOL_SMALL_ABSOLUTE:
548 {
549 /* In ILP32, the mode of dest can be either SImode or DImode. */
550 rtx tmp_reg = dest;
551 enum machine_mode mode = GET_MODE (dest);
552
553 gcc_assert (mode == Pmode || mode == ptr_mode);
554
555 if (can_create_pseudo_p ())
556 tmp_reg = gen_reg_rtx (mode);
557
558 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
559 emit_insn (gen_add_losym (dest, tmp_reg, imm));
560 return;
561 }
562
563 case SYMBOL_TINY_ABSOLUTE:
564 emit_insn (gen_rtx_SET (Pmode, dest, imm));
565 return;
566
567 case SYMBOL_SMALL_GOT:
568 {
569 /* In ILP32, the mode of dest can be either SImode or DImode,
570 while the got entry is always of SImode size. The mode of
571 dest depends on how dest is used: if dest is assigned to a
572 pointer (e.g. in the memory), it has SImode; it may have
573 DImode if dest is dereferenced to access the memeory.
574 This is why we have to handle three different ldr_got_small
575 patterns here (two patterns for ILP32). */
576 rtx tmp_reg = dest;
577 enum machine_mode mode = GET_MODE (dest);
578
579 if (can_create_pseudo_p ())
580 tmp_reg = gen_reg_rtx (mode);
581
582 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
583 if (mode == ptr_mode)
584 {
585 if (mode == DImode)
586 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
587 else
588 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
589 }
590 else
591 {
592 gcc_assert (mode == Pmode);
593 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
594 }
595
596 return;
597 }
598
599 case SYMBOL_SMALL_TLSGD:
600 {
601 rtx insns;
602 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
603
604 start_sequence ();
605 emit_call_insn (gen_tlsgd_small (result, imm));
606 insns = get_insns ();
607 end_sequence ();
608
609 RTL_CONST_CALL_P (insns) = 1;
610 emit_libcall_block (insns, dest, result, imm);
611 return;
612 }
613
614 case SYMBOL_SMALL_TLSDESC:
615 {
616 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
617 rtx tp;
618
619 emit_insn (gen_tlsdesc_small (imm));
620 tp = aarch64_load_tp (NULL);
621 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
622 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
623 return;
624 }
625
626 case SYMBOL_SMALL_GOTTPREL:
627 {
628 rtx tmp_reg = gen_reg_rtx (Pmode);
629 rtx tp = aarch64_load_tp (NULL);
630 emit_insn (gen_tlsie_small (tmp_reg, imm));
631 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
632 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
633 return;
634 }
635
636 case SYMBOL_SMALL_TPREL:
637 {
638 rtx tp = aarch64_load_tp (NULL);
639 emit_insn (gen_tlsle_small (dest, tp, imm));
640 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
641 return;
642 }
643
644 case SYMBOL_TINY_GOT:
645 emit_insn (gen_ldr_got_tiny (dest, imm));
646 return;
647
648 default:
649 gcc_unreachable ();
650 }
651 }
652
653 /* Emit a move from SRC to DEST. Assume that the move expanders can
654 handle all moves if !can_create_pseudo_p (). The distinction is
655 important because, unlike emit_move_insn, the move expanders know
656 how to force Pmode objects into the constant pool even when the
657 constant pool address is not itself legitimate. */
658 static rtx
659 aarch64_emit_move (rtx dest, rtx src)
660 {
661 return (can_create_pseudo_p ()
662 ? emit_move_insn (dest, src)
663 : emit_move_insn_1 (dest, src));
664 }
665
666 void
667 aarch64_split_128bit_move (rtx dst, rtx src)
668 {
669 rtx low_dst;
670
671 enum machine_mode src_mode = GET_MODE (src);
672 enum machine_mode dst_mode = GET_MODE (dst);
673 int src_regno = REGNO (src);
674 int dst_regno = REGNO (dst);
675
676 gcc_assert (dst_mode == TImode || dst_mode == TFmode);
677
678 if (REG_P (dst) && REG_P (src))
679 {
680 gcc_assert (src_mode == TImode || src_mode == TFmode);
681
682 /* Handle r -> w, w -> r. */
683 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
684 {
685 switch (src_mode) {
686 case TImode:
687 emit_insn
688 (gen_aarch64_movtilow_di (dst, gen_lowpart (word_mode, src)));
689 emit_insn
690 (gen_aarch64_movtihigh_di (dst, gen_highpart (word_mode, src)));
691 return;
692 case TFmode:
693 emit_insn
694 (gen_aarch64_movtflow_di (dst, gen_lowpart (word_mode, src)));
695 emit_insn
696 (gen_aarch64_movtfhigh_di (dst, gen_highpart (word_mode, src)));
697 return;
698 default:
699 gcc_unreachable ();
700 }
701 }
702 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
703 {
704 switch (src_mode) {
705 case TImode:
706 emit_insn
707 (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst), src));
708 emit_insn
709 (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst), src));
710 return;
711 case TFmode:
712 emit_insn
713 (gen_aarch64_movdi_tflow (gen_lowpart (word_mode, dst), src));
714 emit_insn
715 (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode, dst), src));
716 return;
717 default:
718 gcc_unreachable ();
719 }
720 }
721 /* Fall through to r -> r cases. */
722 }
723
724 switch (dst_mode) {
725 case TImode:
726 low_dst = gen_lowpart (word_mode, dst);
727 if (REG_P (low_dst)
728 && reg_overlap_mentioned_p (low_dst, src))
729 {
730 aarch64_emit_move (gen_highpart (word_mode, dst),
731 gen_highpart_mode (word_mode, TImode, src));
732 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
733 }
734 else
735 {
736 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
737 aarch64_emit_move (gen_highpart (word_mode, dst),
738 gen_highpart_mode (word_mode, TImode, src));
739 }
740 return;
741 case TFmode:
742 emit_move_insn (gen_rtx_REG (DFmode, dst_regno),
743 gen_rtx_REG (DFmode, src_regno));
744 emit_move_insn (gen_rtx_REG (DFmode, dst_regno + 1),
745 gen_rtx_REG (DFmode, src_regno + 1));
746 return;
747 default:
748 gcc_unreachable ();
749 }
750 }
751
752 bool
753 aarch64_split_128bit_move_p (rtx dst, rtx src)
754 {
755 return (! REG_P (src)
756 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
757 }
758
759 /* Split a complex SIMD combine. */
760
761 void
762 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
763 {
764 enum machine_mode src_mode = GET_MODE (src1);
765 enum machine_mode dst_mode = GET_MODE (dst);
766
767 gcc_assert (VECTOR_MODE_P (dst_mode));
768
769 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
770 {
771 rtx (*gen) (rtx, rtx, rtx);
772
773 switch (src_mode)
774 {
775 case V8QImode:
776 gen = gen_aarch64_simd_combinev8qi;
777 break;
778 case V4HImode:
779 gen = gen_aarch64_simd_combinev4hi;
780 break;
781 case V2SImode:
782 gen = gen_aarch64_simd_combinev2si;
783 break;
784 case V2SFmode:
785 gen = gen_aarch64_simd_combinev2sf;
786 break;
787 case DImode:
788 gen = gen_aarch64_simd_combinedi;
789 break;
790 case DFmode:
791 gen = gen_aarch64_simd_combinedf;
792 break;
793 default:
794 gcc_unreachable ();
795 }
796
797 emit_insn (gen (dst, src1, src2));
798 return;
799 }
800 }
801
802 /* Split a complex SIMD move. */
803
804 void
805 aarch64_split_simd_move (rtx dst, rtx src)
806 {
807 enum machine_mode src_mode = GET_MODE (src);
808 enum machine_mode dst_mode = GET_MODE (dst);
809
810 gcc_assert (VECTOR_MODE_P (dst_mode));
811
812 if (REG_P (dst) && REG_P (src))
813 {
814 rtx (*gen) (rtx, rtx);
815
816 gcc_assert (VECTOR_MODE_P (src_mode));
817
818 switch (src_mode)
819 {
820 case V16QImode:
821 gen = gen_aarch64_split_simd_movv16qi;
822 break;
823 case V8HImode:
824 gen = gen_aarch64_split_simd_movv8hi;
825 break;
826 case V4SImode:
827 gen = gen_aarch64_split_simd_movv4si;
828 break;
829 case V2DImode:
830 gen = gen_aarch64_split_simd_movv2di;
831 break;
832 case V4SFmode:
833 gen = gen_aarch64_split_simd_movv4sf;
834 break;
835 case V2DFmode:
836 gen = gen_aarch64_split_simd_movv2df;
837 break;
838 default:
839 gcc_unreachable ();
840 }
841
842 emit_insn (gen (dst, src));
843 return;
844 }
845 }
846
847 static rtx
848 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
849 {
850 if (can_create_pseudo_p ())
851 return force_reg (mode, value);
852 else
853 {
854 x = aarch64_emit_move (x, value);
855 return x;
856 }
857 }
858
859
860 static rtx
861 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
862 {
863 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
864 {
865 rtx high;
866 /* Load the full offset into a register. This
867 might be improvable in the future. */
868 high = GEN_INT (offset);
869 offset = 0;
870 high = aarch64_force_temporary (mode, temp, high);
871 reg = aarch64_force_temporary (mode, temp,
872 gen_rtx_PLUS (mode, high, reg));
873 }
874 return plus_constant (mode, reg, offset);
875 }
876
877 void
878 aarch64_expand_mov_immediate (rtx dest, rtx imm)
879 {
880 enum machine_mode mode = GET_MODE (dest);
881 unsigned HOST_WIDE_INT mask;
882 int i;
883 bool first;
884 unsigned HOST_WIDE_INT val;
885 bool subtargets;
886 rtx subtarget;
887 int one_match, zero_match;
888
889 gcc_assert (mode == SImode || mode == DImode);
890
891 /* Check on what type of symbol it is. */
892 if (GET_CODE (imm) == SYMBOL_REF
893 || GET_CODE (imm) == LABEL_REF
894 || GET_CODE (imm) == CONST)
895 {
896 rtx mem, base, offset;
897 enum aarch64_symbol_type sty;
898
899 /* If we have (const (plus symbol offset)), separate out the offset
900 before we start classifying the symbol. */
901 split_const (imm, &base, &offset);
902
903 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
904 switch (sty)
905 {
906 case SYMBOL_FORCE_TO_MEM:
907 if (offset != const0_rtx
908 && targetm.cannot_force_const_mem (mode, imm))
909 {
910 gcc_assert(can_create_pseudo_p ());
911 base = aarch64_force_temporary (mode, dest, base);
912 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
913 aarch64_emit_move (dest, base);
914 return;
915 }
916 mem = force_const_mem (ptr_mode, imm);
917 gcc_assert (mem);
918 if (mode != ptr_mode)
919 mem = gen_rtx_ZERO_EXTEND (mode, mem);
920 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
921 return;
922
923 case SYMBOL_SMALL_TLSGD:
924 case SYMBOL_SMALL_TLSDESC:
925 case SYMBOL_SMALL_GOTTPREL:
926 case SYMBOL_SMALL_GOT:
927 case SYMBOL_TINY_GOT:
928 if (offset != const0_rtx)
929 {
930 gcc_assert(can_create_pseudo_p ());
931 base = aarch64_force_temporary (mode, dest, base);
932 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
933 aarch64_emit_move (dest, base);
934 return;
935 }
936 /* FALLTHRU */
937
938 case SYMBOL_SMALL_TPREL:
939 case SYMBOL_SMALL_ABSOLUTE:
940 case SYMBOL_TINY_ABSOLUTE:
941 aarch64_load_symref_appropriately (dest, imm, sty);
942 return;
943
944 default:
945 gcc_unreachable ();
946 }
947 }
948
949 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
950 {
951 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
952 return;
953 }
954
955 if (!CONST_INT_P (imm))
956 {
957 if (GET_CODE (imm) == HIGH)
958 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
959 else
960 {
961 rtx mem = force_const_mem (mode, imm);
962 gcc_assert (mem);
963 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
964 }
965
966 return;
967 }
968
969 if (mode == SImode)
970 {
971 /* We know we can't do this in 1 insn, and we must be able to do it
972 in two; so don't mess around looking for sequences that don't buy
973 us anything. */
974 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
975 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
976 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
977 return;
978 }
979
980 /* Remaining cases are all for DImode. */
981
982 val = INTVAL (imm);
983 subtargets = optimize && can_create_pseudo_p ();
984
985 one_match = 0;
986 zero_match = 0;
987 mask = 0xffff;
988
989 for (i = 0; i < 64; i += 16, mask <<= 16)
990 {
991 if ((val & mask) == 0)
992 zero_match++;
993 else if ((val & mask) == mask)
994 one_match++;
995 }
996
997 if (one_match == 2)
998 {
999 mask = 0xffff;
1000 for (i = 0; i < 64; i += 16, mask <<= 16)
1001 {
1002 if ((val & mask) != mask)
1003 {
1004 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1005 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1006 GEN_INT ((val >> i) & 0xffff)));
1007 return;
1008 }
1009 }
1010 gcc_unreachable ();
1011 }
1012
1013 if (zero_match == 2)
1014 goto simple_sequence;
1015
1016 mask = 0x0ffff0000UL;
1017 for (i = 16; i < 64; i += 16, mask <<= 16)
1018 {
1019 HOST_WIDE_INT comp = mask & ~(mask - 1);
1020
1021 if (aarch64_uimm12_shift (val - (val & mask)))
1022 {
1023 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1024
1025 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1026 emit_insn (gen_adddi3 (dest, subtarget,
1027 GEN_INT (val - (val & mask))));
1028 return;
1029 }
1030 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1031 {
1032 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1033
1034 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1035 GEN_INT ((val + comp) & mask)));
1036 emit_insn (gen_adddi3 (dest, subtarget,
1037 GEN_INT (val - ((val + comp) & mask))));
1038 return;
1039 }
1040 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1041 {
1042 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1043
1044 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1045 GEN_INT ((val - comp) | ~mask)));
1046 emit_insn (gen_adddi3 (dest, subtarget,
1047 GEN_INT (val - ((val - comp) | ~mask))));
1048 return;
1049 }
1050 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1051 {
1052 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1053
1054 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1055 GEN_INT (val | ~mask)));
1056 emit_insn (gen_adddi3 (dest, subtarget,
1057 GEN_INT (val - (val | ~mask))));
1058 return;
1059 }
1060 }
1061
1062 /* See if we can do it by arithmetically combining two
1063 immediates. */
1064 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1065 {
1066 int j;
1067 mask = 0xffff;
1068
1069 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1070 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1071 {
1072 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1073 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1074 GEN_INT (aarch64_bitmasks[i])));
1075 emit_insn (gen_adddi3 (dest, subtarget,
1076 GEN_INT (val - aarch64_bitmasks[i])));
1077 return;
1078 }
1079
1080 for (j = 0; j < 64; j += 16, mask <<= 16)
1081 {
1082 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1083 {
1084 emit_insn (gen_rtx_SET (VOIDmode, dest,
1085 GEN_INT (aarch64_bitmasks[i])));
1086 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1087 GEN_INT ((val >> j) & 0xffff)));
1088 return;
1089 }
1090 }
1091 }
1092
1093 /* See if we can do it by logically combining two immediates. */
1094 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1095 {
1096 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1097 {
1098 int j;
1099
1100 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1101 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1102 {
1103 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1104 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1105 GEN_INT (aarch64_bitmasks[i])));
1106 emit_insn (gen_iordi3 (dest, subtarget,
1107 GEN_INT (aarch64_bitmasks[j])));
1108 return;
1109 }
1110 }
1111 else if ((val & aarch64_bitmasks[i]) == val)
1112 {
1113 int j;
1114
1115 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1116 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1117 {
1118
1119 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1120 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1121 GEN_INT (aarch64_bitmasks[j])));
1122 emit_insn (gen_anddi3 (dest, subtarget,
1123 GEN_INT (aarch64_bitmasks[i])));
1124 return;
1125 }
1126 }
1127 }
1128
1129 simple_sequence:
1130 first = true;
1131 mask = 0xffff;
1132 for (i = 0; i < 64; i += 16, mask <<= 16)
1133 {
1134 if ((val & mask) != 0)
1135 {
1136 if (first)
1137 {
1138 emit_insn (gen_rtx_SET (VOIDmode, dest,
1139 GEN_INT (val & mask)));
1140 first = false;
1141 }
1142 else
1143 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1144 GEN_INT ((val >> i) & 0xffff)));
1145 }
1146 }
1147 }
1148
1149 static bool
1150 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1151 {
1152 /* Indirect calls are not currently supported. */
1153 if (decl == NULL)
1154 return false;
1155
1156 /* Cannot tail-call to long-calls, since these are outside of the
1157 range of a branch instruction (we could handle this if we added
1158 support for indirect tail-calls. */
1159 if (aarch64_decl_is_long_call_p (decl))
1160 return false;
1161
1162 return true;
1163 }
1164
1165 /* Implement TARGET_PASS_BY_REFERENCE. */
1166
1167 static bool
1168 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1169 enum machine_mode mode,
1170 const_tree type,
1171 bool named ATTRIBUTE_UNUSED)
1172 {
1173 HOST_WIDE_INT size;
1174 enum machine_mode dummymode;
1175 int nregs;
1176
1177 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1178 size = (mode == BLKmode && type)
1179 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1180
1181 if (type)
1182 {
1183 /* Arrays always passed by reference. */
1184 if (TREE_CODE (type) == ARRAY_TYPE)
1185 return true;
1186 /* Other aggregates based on their size. */
1187 if (AGGREGATE_TYPE_P (type))
1188 size = int_size_in_bytes (type);
1189 }
1190
1191 /* Variable sized arguments are always returned by reference. */
1192 if (size < 0)
1193 return true;
1194
1195 /* Can this be a candidate to be passed in fp/simd register(s)? */
1196 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1197 &dummymode, &nregs,
1198 NULL))
1199 return false;
1200
1201 /* Arguments which are variable sized or larger than 2 registers are
1202 passed by reference unless they are a homogenous floating point
1203 aggregate. */
1204 return size > 2 * UNITS_PER_WORD;
1205 }
1206
1207 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1208 static bool
1209 aarch64_return_in_msb (const_tree valtype)
1210 {
1211 enum machine_mode dummy_mode;
1212 int dummy_int;
1213
1214 /* Never happens in little-endian mode. */
1215 if (!BYTES_BIG_ENDIAN)
1216 return false;
1217
1218 /* Only composite types smaller than or equal to 16 bytes can
1219 be potentially returned in registers. */
1220 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1221 || int_size_in_bytes (valtype) <= 0
1222 || int_size_in_bytes (valtype) > 16)
1223 return false;
1224
1225 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1226 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1227 is always passed/returned in the least significant bits of fp/simd
1228 register(s). */
1229 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1230 &dummy_mode, &dummy_int, NULL))
1231 return false;
1232
1233 return true;
1234 }
1235
1236 /* Implement TARGET_FUNCTION_VALUE.
1237 Define how to find the value returned by a function. */
1238
1239 static rtx
1240 aarch64_function_value (const_tree type, const_tree func,
1241 bool outgoing ATTRIBUTE_UNUSED)
1242 {
1243 enum machine_mode mode;
1244 int unsignedp;
1245 int count;
1246 enum machine_mode ag_mode;
1247
1248 mode = TYPE_MODE (type);
1249 if (INTEGRAL_TYPE_P (type))
1250 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1251
1252 if (aarch64_return_in_msb (type))
1253 {
1254 HOST_WIDE_INT size = int_size_in_bytes (type);
1255
1256 if (size % UNITS_PER_WORD != 0)
1257 {
1258 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1259 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1260 }
1261 }
1262
1263 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1264 &ag_mode, &count, NULL))
1265 {
1266 if (!aarch64_composite_type_p (type, mode))
1267 {
1268 gcc_assert (count == 1 && mode == ag_mode);
1269 return gen_rtx_REG (mode, V0_REGNUM);
1270 }
1271 else
1272 {
1273 int i;
1274 rtx par;
1275
1276 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1277 for (i = 0; i < count; i++)
1278 {
1279 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1280 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1281 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1282 XVECEXP (par, 0, i) = tmp;
1283 }
1284 return par;
1285 }
1286 }
1287 else
1288 return gen_rtx_REG (mode, R0_REGNUM);
1289 }
1290
1291 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1292 Return true if REGNO is the number of a hard register in which the values
1293 of called function may come back. */
1294
1295 static bool
1296 aarch64_function_value_regno_p (const unsigned int regno)
1297 {
1298 /* Maximum of 16 bytes can be returned in the general registers. Examples
1299 of 16-byte return values are: 128-bit integers and 16-byte small
1300 structures (excluding homogeneous floating-point aggregates). */
1301 if (regno == R0_REGNUM || regno == R1_REGNUM)
1302 return true;
1303
1304 /* Up to four fp/simd registers can return a function value, e.g. a
1305 homogeneous floating-point aggregate having four members. */
1306 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1307 return !TARGET_GENERAL_REGS_ONLY;
1308
1309 return false;
1310 }
1311
1312 /* Implement TARGET_RETURN_IN_MEMORY.
1313
1314 If the type T of the result of a function is such that
1315 void func (T arg)
1316 would require that arg be passed as a value in a register (or set of
1317 registers) according to the parameter passing rules, then the result
1318 is returned in the same registers as would be used for such an
1319 argument. */
1320
1321 static bool
1322 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1323 {
1324 HOST_WIDE_INT size;
1325 enum machine_mode ag_mode;
1326 int count;
1327
1328 if (!AGGREGATE_TYPE_P (type)
1329 && TREE_CODE (type) != COMPLEX_TYPE
1330 && TREE_CODE (type) != VECTOR_TYPE)
1331 /* Simple scalar types always returned in registers. */
1332 return false;
1333
1334 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1335 type,
1336 &ag_mode,
1337 &count,
1338 NULL))
1339 return false;
1340
1341 /* Types larger than 2 registers returned in memory. */
1342 size = int_size_in_bytes (type);
1343 return (size < 0 || size > 2 * UNITS_PER_WORD);
1344 }
1345
1346 static bool
1347 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1348 const_tree type, int *nregs)
1349 {
1350 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1351 return aarch64_vfp_is_call_or_return_candidate (mode,
1352 type,
1353 &pcum->aapcs_vfp_rmode,
1354 nregs,
1355 NULL);
1356 }
1357
1358 /* Given MODE and TYPE of a function argument, return the alignment in
1359 bits. The idea is to suppress any stronger alignment requested by
1360 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1361 This is a helper function for local use only. */
1362
1363 static unsigned int
1364 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1365 {
1366 unsigned int alignment;
1367
1368 if (type)
1369 {
1370 if (!integer_zerop (TYPE_SIZE (type)))
1371 {
1372 if (TYPE_MODE (type) == mode)
1373 alignment = TYPE_ALIGN (type);
1374 else
1375 alignment = GET_MODE_ALIGNMENT (mode);
1376 }
1377 else
1378 alignment = 0;
1379 }
1380 else
1381 alignment = GET_MODE_ALIGNMENT (mode);
1382
1383 return alignment;
1384 }
1385
1386 /* Layout a function argument according to the AAPCS64 rules. The rule
1387 numbers refer to the rule numbers in the AAPCS64. */
1388
1389 static void
1390 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1391 const_tree type,
1392 bool named ATTRIBUTE_UNUSED)
1393 {
1394 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1395 int ncrn, nvrn, nregs;
1396 bool allocate_ncrn, allocate_nvrn;
1397
1398 /* We need to do this once per argument. */
1399 if (pcum->aapcs_arg_processed)
1400 return;
1401
1402 pcum->aapcs_arg_processed = true;
1403
1404 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1405 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1406 mode,
1407 type,
1408 &nregs);
1409
1410 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1411 The following code thus handles passing by SIMD/FP registers first. */
1412
1413 nvrn = pcum->aapcs_nvrn;
1414
1415 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1416 and homogenous short-vector aggregates (HVA). */
1417 if (allocate_nvrn)
1418 {
1419 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1420 {
1421 pcum->aapcs_nextnvrn = nvrn + nregs;
1422 if (!aarch64_composite_type_p (type, mode))
1423 {
1424 gcc_assert (nregs == 1);
1425 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1426 }
1427 else
1428 {
1429 rtx par;
1430 int i;
1431 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1432 for (i = 0; i < nregs; i++)
1433 {
1434 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1435 V0_REGNUM + nvrn + i);
1436 tmp = gen_rtx_EXPR_LIST
1437 (VOIDmode, tmp,
1438 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1439 XVECEXP (par, 0, i) = tmp;
1440 }
1441 pcum->aapcs_reg = par;
1442 }
1443 return;
1444 }
1445 else
1446 {
1447 /* C.3 NSRN is set to 8. */
1448 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1449 goto on_stack;
1450 }
1451 }
1452
1453 ncrn = pcum->aapcs_ncrn;
1454 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1455 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1456
1457
1458 /* C6 - C9. though the sign and zero extension semantics are
1459 handled elsewhere. This is the case where the argument fits
1460 entirely general registers. */
1461 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1462 {
1463 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1464
1465 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1466
1467 /* C.8 if the argument has an alignment of 16 then the NGRN is
1468 rounded up to the next even number. */
1469 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1470 {
1471 ++ncrn;
1472 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1473 }
1474 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1475 A reg is still generated for it, but the caller should be smart
1476 enough not to use it. */
1477 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1478 {
1479 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1480 }
1481 else
1482 {
1483 rtx par;
1484 int i;
1485
1486 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1487 for (i = 0; i < nregs; i++)
1488 {
1489 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1490 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1491 GEN_INT (i * UNITS_PER_WORD));
1492 XVECEXP (par, 0, i) = tmp;
1493 }
1494 pcum->aapcs_reg = par;
1495 }
1496
1497 pcum->aapcs_nextncrn = ncrn + nregs;
1498 return;
1499 }
1500
1501 /* C.11 */
1502 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1503
1504 /* The argument is passed on stack; record the needed number of words for
1505 this argument (we can re-use NREGS) and align the total size if
1506 necessary. */
1507 on_stack:
1508 pcum->aapcs_stack_words = nregs;
1509 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1510 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1511 16 / UNITS_PER_WORD) + 1;
1512 return;
1513 }
1514
1515 /* Implement TARGET_FUNCTION_ARG. */
1516
1517 static rtx
1518 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1519 const_tree type, bool named)
1520 {
1521 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1522 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1523
1524 if (mode == VOIDmode)
1525 return NULL_RTX;
1526
1527 aarch64_layout_arg (pcum_v, mode, type, named);
1528 return pcum->aapcs_reg;
1529 }
1530
1531 void
1532 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1533 const_tree fntype ATTRIBUTE_UNUSED,
1534 rtx libname ATTRIBUTE_UNUSED,
1535 const_tree fndecl ATTRIBUTE_UNUSED,
1536 unsigned n_named ATTRIBUTE_UNUSED)
1537 {
1538 pcum->aapcs_ncrn = 0;
1539 pcum->aapcs_nvrn = 0;
1540 pcum->aapcs_nextncrn = 0;
1541 pcum->aapcs_nextnvrn = 0;
1542 pcum->pcs_variant = ARM_PCS_AAPCS64;
1543 pcum->aapcs_reg = NULL_RTX;
1544 pcum->aapcs_arg_processed = false;
1545 pcum->aapcs_stack_words = 0;
1546 pcum->aapcs_stack_size = 0;
1547
1548 return;
1549 }
1550
1551 static void
1552 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1553 enum machine_mode mode,
1554 const_tree type,
1555 bool named)
1556 {
1557 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1558 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1559 {
1560 aarch64_layout_arg (pcum_v, mode, type, named);
1561 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1562 != (pcum->aapcs_stack_words != 0));
1563 pcum->aapcs_arg_processed = false;
1564 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1565 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1566 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1567 pcum->aapcs_stack_words = 0;
1568 pcum->aapcs_reg = NULL_RTX;
1569 }
1570 }
1571
1572 bool
1573 aarch64_function_arg_regno_p (unsigned regno)
1574 {
1575 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1576 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1577 }
1578
1579 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1580 PARM_BOUNDARY bits of alignment, but will be given anything up
1581 to STACK_BOUNDARY bits if the type requires it. This makes sure
1582 that both before and after the layout of each argument, the Next
1583 Stacked Argument Address (NSAA) will have a minimum alignment of
1584 8 bytes. */
1585
1586 static unsigned int
1587 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1588 {
1589 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1590
1591 if (alignment < PARM_BOUNDARY)
1592 alignment = PARM_BOUNDARY;
1593 if (alignment > STACK_BOUNDARY)
1594 alignment = STACK_BOUNDARY;
1595 return alignment;
1596 }
1597
1598 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1599
1600 Return true if an argument passed on the stack should be padded upwards,
1601 i.e. if the least-significant byte of the stack slot has useful data.
1602
1603 Small aggregate types are placed in the lowest memory address.
1604
1605 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1606
1607 bool
1608 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1609 {
1610 /* On little-endian targets, the least significant byte of every stack
1611 argument is passed at the lowest byte address of the stack slot. */
1612 if (!BYTES_BIG_ENDIAN)
1613 return true;
1614
1615 /* Otherwise, integral, floating-point and pointer types are padded downward:
1616 the least significant byte of a stack argument is passed at the highest
1617 byte address of the stack slot. */
1618 if (type
1619 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1620 || POINTER_TYPE_P (type))
1621 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1622 return false;
1623
1624 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1625 return true;
1626 }
1627
1628 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1629
1630 It specifies padding for the last (may also be the only)
1631 element of a block move between registers and memory. If
1632 assuming the block is in the memory, padding upward means that
1633 the last element is padded after its highest significant byte,
1634 while in downward padding, the last element is padded at the
1635 its least significant byte side.
1636
1637 Small aggregates and small complex types are always padded
1638 upwards.
1639
1640 We don't need to worry about homogeneous floating-point or
1641 short-vector aggregates; their move is not affected by the
1642 padding direction determined here. Regardless of endianness,
1643 each element of such an aggregate is put in the least
1644 significant bits of a fp/simd register.
1645
1646 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1647 register has useful data, and return the opposite if the most
1648 significant byte does. */
1649
1650 bool
1651 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1652 bool first ATTRIBUTE_UNUSED)
1653 {
1654
1655 /* Small composite types are always padded upward. */
1656 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1657 {
1658 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1659 : GET_MODE_SIZE (mode));
1660 if (size < 2 * UNITS_PER_WORD)
1661 return true;
1662 }
1663
1664 /* Otherwise, use the default padding. */
1665 return !BYTES_BIG_ENDIAN;
1666 }
1667
1668 static enum machine_mode
1669 aarch64_libgcc_cmp_return_mode (void)
1670 {
1671 return SImode;
1672 }
1673
1674 static bool
1675 aarch64_frame_pointer_required (void)
1676 {
1677 /* If the function contains dynamic stack allocations, we need to
1678 use the frame pointer to access the static parts of the frame. */
1679 if (cfun->calls_alloca)
1680 return true;
1681
1682 /* We may have turned flag_omit_frame_pointer on in order to have this
1683 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1684 and we'll check it here.
1685 If we really did set flag_omit_frame_pointer normally, then we return false
1686 (no frame pointer required) in all cases. */
1687
1688 if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1689 return false;
1690 else if (flag_omit_leaf_frame_pointer)
1691 return !crtl->is_leaf;
1692 return true;
1693 }
1694
1695 /* Mark the registers that need to be saved by the callee and calculate
1696 the size of the callee-saved registers area and frame record (both FP
1697 and LR may be omitted). */
1698 static void
1699 aarch64_layout_frame (void)
1700 {
1701 HOST_WIDE_INT offset = 0;
1702 int regno;
1703
1704 if (reload_completed && cfun->machine->frame.laid_out)
1705 return;
1706
1707 cfun->machine->frame.fp_lr_offset = 0;
1708
1709 /* First mark all the registers that really need to be saved... */
1710 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1711 cfun->machine->frame.reg_offset[regno] = -1;
1712
1713 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1714 cfun->machine->frame.reg_offset[regno] = -1;
1715
1716 /* ... that includes the eh data registers (if needed)... */
1717 if (crtl->calls_eh_return)
1718 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1719 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1720
1721 /* ... and any callee saved register that dataflow says is live. */
1722 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1723 if (df_regs_ever_live_p (regno)
1724 && !call_used_regs[regno])
1725 cfun->machine->frame.reg_offset[regno] = 0;
1726
1727 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1728 if (df_regs_ever_live_p (regno)
1729 && !call_used_regs[regno])
1730 cfun->machine->frame.reg_offset[regno] = 0;
1731
1732 if (frame_pointer_needed)
1733 {
1734 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1735 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1736 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1737 }
1738
1739 /* Now assign stack slots for them. */
1740 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1741 if (cfun->machine->frame.reg_offset[regno] != -1)
1742 {
1743 cfun->machine->frame.reg_offset[regno] = offset;
1744 offset += UNITS_PER_WORD;
1745 }
1746
1747 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1748 if (cfun->machine->frame.reg_offset[regno] != -1)
1749 {
1750 cfun->machine->frame.reg_offset[regno] = offset;
1751 offset += UNITS_PER_WORD;
1752 }
1753
1754 if (frame_pointer_needed)
1755 {
1756 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1757 offset += UNITS_PER_WORD;
1758 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1759 }
1760
1761 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1762 {
1763 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1764 offset += UNITS_PER_WORD;
1765 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1766 }
1767
1768 cfun->machine->frame.padding0 =
1769 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1770 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1771
1772 cfun->machine->frame.saved_regs_size = offset;
1773 cfun->machine->frame.laid_out = true;
1774 }
1775
1776 /* Make the last instruction frame-related and note that it performs
1777 the operation described by FRAME_PATTERN. */
1778
1779 static void
1780 aarch64_set_frame_expr (rtx frame_pattern)
1781 {
1782 rtx insn;
1783
1784 insn = get_last_insn ();
1785 RTX_FRAME_RELATED_P (insn) = 1;
1786 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1787 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1788 frame_pattern,
1789 REG_NOTES (insn));
1790 }
1791
1792 static bool
1793 aarch64_register_saved_on_entry (int regno)
1794 {
1795 return cfun->machine->frame.reg_offset[regno] != -1;
1796 }
1797
1798
1799 static void
1800 aarch64_save_or_restore_fprs (int start_offset, int increment,
1801 bool restore, rtx base_rtx)
1802
1803 {
1804 unsigned regno;
1805 unsigned regno2;
1806 rtx insn;
1807 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1808
1809
1810 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1811 {
1812 if (aarch64_register_saved_on_entry (regno))
1813 {
1814 rtx mem;
1815 mem = gen_mem_ref (DFmode,
1816 plus_constant (Pmode,
1817 base_rtx,
1818 start_offset));
1819
1820 for (regno2 = regno + 1;
1821 regno2 <= V31_REGNUM
1822 && !aarch64_register_saved_on_entry (regno2);
1823 regno2++)
1824 {
1825 /* Empty loop. */
1826 }
1827 if (regno2 <= V31_REGNUM &&
1828 aarch64_register_saved_on_entry (regno2))
1829 {
1830 rtx mem2;
1831 /* Next highest register to be saved. */
1832 mem2 = gen_mem_ref (DFmode,
1833 plus_constant
1834 (Pmode,
1835 base_rtx,
1836 start_offset + increment));
1837 if (restore == false)
1838 {
1839 insn = emit_insn
1840 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1841 mem2, gen_rtx_REG (DFmode, regno2)));
1842
1843 }
1844 else
1845 {
1846 insn = emit_insn
1847 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1848 gen_rtx_REG (DFmode, regno2), mem2));
1849
1850 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno));
1851 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2));
1852 }
1853
1854 /* The first part of a frame-related parallel insn
1855 is always assumed to be relevant to the frame
1856 calculations; subsequent parts, are only
1857 frame-related if explicitly marked. */
1858 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1859 1)) = 1;
1860 regno = regno2;
1861 start_offset += increment * 2;
1862 }
1863 else
1864 {
1865 if (restore == false)
1866 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1867 else
1868 {
1869 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1870 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1871 }
1872 start_offset += increment;
1873 }
1874 RTX_FRAME_RELATED_P (insn) = 1;
1875 }
1876 }
1877
1878 }
1879
1880
1881 /* offset from the stack pointer of where the saves and
1882 restore's have to happen. */
1883 static void
1884 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1885 bool restore)
1886 {
1887 rtx insn;
1888 rtx base_rtx = stack_pointer_rtx;
1889 HOST_WIDE_INT start_offset = offset;
1890 HOST_WIDE_INT increment = UNITS_PER_WORD;
1891 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1892 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1893 unsigned regno;
1894 unsigned regno2;
1895
1896 for (regno = R0_REGNUM; regno <= limit; regno++)
1897 {
1898 if (aarch64_register_saved_on_entry (regno))
1899 {
1900 rtx mem;
1901 mem = gen_mem_ref (Pmode,
1902 plus_constant (Pmode,
1903 base_rtx,
1904 start_offset));
1905
1906 for (regno2 = regno + 1;
1907 regno2 <= limit
1908 && !aarch64_register_saved_on_entry (regno2);
1909 regno2++)
1910 {
1911 /* Empty loop. */
1912 }
1913 if (regno2 <= limit &&
1914 aarch64_register_saved_on_entry (regno2))
1915 {
1916 rtx mem2;
1917 /* Next highest register to be saved. */
1918 mem2 = gen_mem_ref (Pmode,
1919 plus_constant
1920 (Pmode,
1921 base_rtx,
1922 start_offset + increment));
1923 if (restore == false)
1924 {
1925 insn = emit_insn
1926 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1927 mem2, gen_rtx_REG (DImode, regno2)));
1928
1929 }
1930 else
1931 {
1932 insn = emit_insn
1933 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1934 gen_rtx_REG (DImode, regno2), mem2));
1935
1936 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1937 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1938 }
1939
1940 /* The first part of a frame-related parallel insn
1941 is always assumed to be relevant to the frame
1942 calculations; subsequent parts, are only
1943 frame-related if explicitly marked. */
1944 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1945 1)) = 1;
1946 regno = regno2;
1947 start_offset += increment * 2;
1948 }
1949 else
1950 {
1951 if (restore == false)
1952 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1953 else
1954 {
1955 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1956 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1957 }
1958 start_offset += increment;
1959 }
1960 RTX_FRAME_RELATED_P (insn) = 1;
1961 }
1962 }
1963
1964 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1965
1966 }
1967
1968 /* AArch64 stack frames generated by this compiler look like:
1969
1970 +-------------------------------+
1971 | |
1972 | incoming stack arguments |
1973 | |
1974 +-------------------------------+ <-- arg_pointer_rtx
1975 | |
1976 | callee-allocated save area |
1977 | for register varargs |
1978 | |
1979 +-------------------------------+
1980 | |
1981 | local variables |
1982 | |
1983 +-------------------------------+ <-- frame_pointer_rtx
1984 | |
1985 | callee-saved registers |
1986 | |
1987 +-------------------------------+
1988 | LR' |
1989 +-------------------------------+
1990 | FP' |
1991 P +-------------------------------+ <-- hard_frame_pointer_rtx
1992 | dynamic allocation |
1993 +-------------------------------+
1994 | |
1995 | outgoing stack arguments |
1996 | |
1997 +-------------------------------+ <-- stack_pointer_rtx
1998
1999 Dynamic stack allocations such as alloca insert data at point P.
2000 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2001 hard_frame_pointer_rtx unchanged. */
2002
2003 /* Generate the prologue instructions for entry into a function.
2004 Establish the stack frame by decreasing the stack pointer with a
2005 properly calculated size and, if necessary, create a frame record
2006 filled with the values of LR and previous frame pointer. The
2007 current FP is also set up is it is in use. */
2008
2009 void
2010 aarch64_expand_prologue (void)
2011 {
2012 /* sub sp, sp, #<frame_size>
2013 stp {fp, lr}, [sp, #<frame_size> - 16]
2014 add fp, sp, #<frame_size> - hardfp_offset
2015 stp {cs_reg}, [fp, #-16] etc.
2016
2017 sub sp, sp, <final_adjustment_if_any>
2018 */
2019 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
2020 HOST_WIDE_INT frame_size, offset;
2021 HOST_WIDE_INT fp_offset; /* FP offset from SP */
2022 rtx insn;
2023
2024 aarch64_layout_frame ();
2025 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2026 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2027 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2028 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2029 + crtl->outgoing_args_size);
2030 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2031 STACK_BOUNDARY / BITS_PER_UNIT);
2032
2033 if (flag_stack_usage_info)
2034 current_function_static_stack_size = frame_size;
2035
2036 fp_offset = (offset
2037 - original_frame_size
2038 - cfun->machine->frame.saved_regs_size);
2039
2040 /* Store pairs and load pairs have a range only -512 to 504. */
2041 if (offset >= 512)
2042 {
2043 /* When the frame has a large size, an initial decrease is done on
2044 the stack pointer to jump over the callee-allocated save area for
2045 register varargs, the local variable area and/or the callee-saved
2046 register area. This will allow the pre-index write-back
2047 store pair instructions to be used for setting up the stack frame
2048 efficiently. */
2049 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2050 if (offset >= 512)
2051 offset = cfun->machine->frame.saved_regs_size;
2052
2053 frame_size -= (offset + crtl->outgoing_args_size);
2054 fp_offset = 0;
2055
2056 if (frame_size >= 0x1000000)
2057 {
2058 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2059 emit_move_insn (op0, GEN_INT (-frame_size));
2060 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2061 aarch64_set_frame_expr (gen_rtx_SET
2062 (Pmode, stack_pointer_rtx,
2063 gen_rtx_PLUS (Pmode,
2064 stack_pointer_rtx,
2065 GEN_INT (-frame_size))));
2066 }
2067 else if (frame_size > 0)
2068 {
2069 if ((frame_size & 0xfff) != frame_size)
2070 {
2071 insn = emit_insn (gen_add2_insn
2072 (stack_pointer_rtx,
2073 GEN_INT (-(frame_size
2074 & ~(HOST_WIDE_INT)0xfff))));
2075 RTX_FRAME_RELATED_P (insn) = 1;
2076 }
2077 if ((frame_size & 0xfff) != 0)
2078 {
2079 insn = emit_insn (gen_add2_insn
2080 (stack_pointer_rtx,
2081 GEN_INT (-(frame_size
2082 & (HOST_WIDE_INT)0xfff))));
2083 RTX_FRAME_RELATED_P (insn) = 1;
2084 }
2085 }
2086 }
2087 else
2088 frame_size = -1;
2089
2090 if (offset > 0)
2091 {
2092 /* Save the frame pointer and lr if the frame pointer is needed
2093 first. Make the frame pointer point to the location of the
2094 old frame pointer on the stack. */
2095 if (frame_pointer_needed)
2096 {
2097 rtx mem_fp, mem_lr;
2098
2099 if (fp_offset)
2100 {
2101 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2102 GEN_INT (-offset)));
2103 RTX_FRAME_RELATED_P (insn) = 1;
2104 aarch64_set_frame_expr (gen_rtx_SET
2105 (Pmode, stack_pointer_rtx,
2106 gen_rtx_MINUS (Pmode,
2107 stack_pointer_rtx,
2108 GEN_INT (offset))));
2109 mem_fp = gen_frame_mem (DImode,
2110 plus_constant (Pmode,
2111 stack_pointer_rtx,
2112 fp_offset));
2113 mem_lr = gen_frame_mem (DImode,
2114 plus_constant (Pmode,
2115 stack_pointer_rtx,
2116 fp_offset
2117 + UNITS_PER_WORD));
2118 insn = emit_insn (gen_store_pairdi (mem_fp,
2119 hard_frame_pointer_rtx,
2120 mem_lr,
2121 gen_rtx_REG (DImode,
2122 LR_REGNUM)));
2123 }
2124 else
2125 {
2126 insn = emit_insn (gen_storewb_pairdi_di
2127 (stack_pointer_rtx, stack_pointer_rtx,
2128 hard_frame_pointer_rtx,
2129 gen_rtx_REG (DImode, LR_REGNUM),
2130 GEN_INT (-offset),
2131 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2132 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2133 }
2134
2135 /* The first part of a frame-related parallel insn is always
2136 assumed to be relevant to the frame calculations;
2137 subsequent parts, are only frame-related if explicitly
2138 marked. */
2139 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2140 RTX_FRAME_RELATED_P (insn) = 1;
2141
2142 /* Set up frame pointer to point to the location of the
2143 previous frame pointer on the stack. */
2144 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2145 stack_pointer_rtx,
2146 GEN_INT (fp_offset)));
2147 aarch64_set_frame_expr (gen_rtx_SET
2148 (Pmode, hard_frame_pointer_rtx,
2149 gen_rtx_PLUS (Pmode,
2150 stack_pointer_rtx,
2151 GEN_INT (fp_offset))));
2152 RTX_FRAME_RELATED_P (insn) = 1;
2153 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2154 hard_frame_pointer_rtx));
2155 }
2156 else
2157 {
2158 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2159 GEN_INT (-offset)));
2160 RTX_FRAME_RELATED_P (insn) = 1;
2161 }
2162
2163 aarch64_save_or_restore_callee_save_registers
2164 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2165 }
2166
2167 /* when offset >= 512,
2168 sub sp, sp, #<outgoing_args_size> */
2169 if (frame_size > -1)
2170 {
2171 if (crtl->outgoing_args_size > 0)
2172 {
2173 insn = emit_insn (gen_add2_insn
2174 (stack_pointer_rtx,
2175 GEN_INT (- crtl->outgoing_args_size)));
2176 RTX_FRAME_RELATED_P (insn) = 1;
2177 }
2178 }
2179 }
2180
2181 /* Generate the epilogue instructions for returning from a function. */
2182 void
2183 aarch64_expand_epilogue (bool for_sibcall)
2184 {
2185 HOST_WIDE_INT original_frame_size, frame_size, offset;
2186 HOST_WIDE_INT fp_offset;
2187 rtx insn;
2188 rtx cfa_reg;
2189
2190 aarch64_layout_frame ();
2191 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2192 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2193 + crtl->outgoing_args_size);
2194 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2195 STACK_BOUNDARY / BITS_PER_UNIT);
2196
2197 fp_offset = (offset
2198 - original_frame_size
2199 - cfun->machine->frame.saved_regs_size);
2200
2201 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2202
2203 /* Store pairs and load pairs have a range only -512 to 504. */
2204 if (offset >= 512)
2205 {
2206 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2207 if (offset >= 512)
2208 offset = cfun->machine->frame.saved_regs_size;
2209
2210 frame_size -= (offset + crtl->outgoing_args_size);
2211 fp_offset = 0;
2212 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2213 {
2214 insn = emit_insn (gen_add2_insn
2215 (stack_pointer_rtx,
2216 GEN_INT (crtl->outgoing_args_size)));
2217 RTX_FRAME_RELATED_P (insn) = 1;
2218 }
2219 }
2220 else
2221 frame_size = -1;
2222
2223 /* If there were outgoing arguments or we've done dynamic stack
2224 allocation, then restore the stack pointer from the frame
2225 pointer. This is at most one insn and more efficient than using
2226 GCC's internal mechanism. */
2227 if (frame_pointer_needed
2228 && (crtl->outgoing_args_size || cfun->calls_alloca))
2229 {
2230 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2231 hard_frame_pointer_rtx,
2232 GEN_INT (- fp_offset)));
2233 RTX_FRAME_RELATED_P (insn) = 1;
2234 /* As SP is set to (FP - fp_offset), according to the rules in
2235 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2236 from the value of SP from now on. */
2237 cfa_reg = stack_pointer_rtx;
2238 }
2239
2240 aarch64_save_or_restore_callee_save_registers
2241 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2242
2243 /* Restore the frame pointer and lr if the frame pointer is needed. */
2244 if (offset > 0)
2245 {
2246 if (frame_pointer_needed)
2247 {
2248 rtx mem_fp, mem_lr;
2249
2250 if (fp_offset)
2251 {
2252 mem_fp = gen_frame_mem (DImode,
2253 plus_constant (Pmode,
2254 stack_pointer_rtx,
2255 fp_offset));
2256 mem_lr = gen_frame_mem (DImode,
2257 plus_constant (Pmode,
2258 stack_pointer_rtx,
2259 fp_offset
2260 + UNITS_PER_WORD));
2261 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2262 mem_fp,
2263 gen_rtx_REG (DImode,
2264 LR_REGNUM),
2265 mem_lr));
2266 }
2267 else
2268 {
2269 insn = emit_insn (gen_loadwb_pairdi_di
2270 (stack_pointer_rtx,
2271 stack_pointer_rtx,
2272 hard_frame_pointer_rtx,
2273 gen_rtx_REG (DImode, LR_REGNUM),
2274 GEN_INT (offset),
2275 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2276 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2277 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2278 (gen_rtx_SET (Pmode, stack_pointer_rtx,
2279 plus_constant (Pmode, cfa_reg,
2280 offset))));
2281 }
2282
2283 /* The first part of a frame-related parallel insn
2284 is always assumed to be relevant to the frame
2285 calculations; subsequent parts, are only
2286 frame-related if explicitly marked. */
2287 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2288 RTX_FRAME_RELATED_P (insn) = 1;
2289 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2290 add_reg_note (insn, REG_CFA_RESTORE,
2291 gen_rtx_REG (DImode, LR_REGNUM));
2292
2293 if (fp_offset)
2294 {
2295 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2296 GEN_INT (offset)));
2297 RTX_FRAME_RELATED_P (insn) = 1;
2298 }
2299 }
2300 else
2301 {
2302 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2303 GEN_INT (offset)));
2304 RTX_FRAME_RELATED_P (insn) = 1;
2305 }
2306 }
2307
2308 /* Stack adjustment for exception handler. */
2309 if (crtl->calls_eh_return)
2310 {
2311 /* We need to unwind the stack by the offset computed by
2312 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2313 based on SP. Ideally we would update the SP and define the
2314 CFA along the lines of:
2315
2316 SP = SP + EH_RETURN_STACKADJ_RTX
2317 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2318
2319 However the dwarf emitter only understands a constant
2320 register offset.
2321
2322 The solution chosen here is to use the otherwise unused IP0
2323 as a temporary register to hold the current SP value. The
2324 CFA is described using IP0 then SP is modified. */
2325
2326 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2327
2328 insn = emit_move_insn (ip0, stack_pointer_rtx);
2329 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2330 RTX_FRAME_RELATED_P (insn) = 1;
2331
2332 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2333
2334 /* Ensure the assignment to IP0 does not get optimized away. */
2335 emit_use (ip0);
2336 }
2337
2338 if (frame_size > -1)
2339 {
2340 if (frame_size >= 0x1000000)
2341 {
2342 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2343 emit_move_insn (op0, GEN_INT (frame_size));
2344 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2345 aarch64_set_frame_expr (gen_rtx_SET
2346 (Pmode, stack_pointer_rtx,
2347 gen_rtx_PLUS (Pmode,
2348 stack_pointer_rtx,
2349 GEN_INT (frame_size))));
2350 }
2351 else if (frame_size > 0)
2352 {
2353 if ((frame_size & 0xfff) != 0)
2354 {
2355 insn = emit_insn (gen_add2_insn
2356 (stack_pointer_rtx,
2357 GEN_INT ((frame_size
2358 & (HOST_WIDE_INT) 0xfff))));
2359 RTX_FRAME_RELATED_P (insn) = 1;
2360 }
2361 if ((frame_size & 0xfff) != frame_size)
2362 {
2363 insn = emit_insn (gen_add2_insn
2364 (stack_pointer_rtx,
2365 GEN_INT ((frame_size
2366 & ~ (HOST_WIDE_INT) 0xfff))));
2367 RTX_FRAME_RELATED_P (insn) = 1;
2368 }
2369 }
2370
2371 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2372 gen_rtx_PLUS (Pmode,
2373 stack_pointer_rtx,
2374 GEN_INT (offset))));
2375 }
2376
2377 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2378 if (!for_sibcall)
2379 emit_jump_insn (ret_rtx);
2380 }
2381
2382 /* Return the place to copy the exception unwinding return address to.
2383 This will probably be a stack slot, but could (in theory be the
2384 return register). */
2385 rtx
2386 aarch64_final_eh_return_addr (void)
2387 {
2388 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2389 aarch64_layout_frame ();
2390 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2391 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2392 + crtl->outgoing_args_size);
2393 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2394 STACK_BOUNDARY / BITS_PER_UNIT);
2395 fp_offset = offset
2396 - original_frame_size
2397 - cfun->machine->frame.saved_regs_size;
2398
2399 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2400 return gen_rtx_REG (DImode, LR_REGNUM);
2401
2402 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2403 result in a store to save LR introduced by builtin_eh_return () being
2404 incorrectly deleted because the alias is not detected.
2405 So in the calculation of the address to copy the exception unwinding
2406 return address to, we note 2 cases.
2407 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2408 we return a SP-relative location since all the addresses are SP-relative
2409 in this case. This prevents the store from being optimized away.
2410 If the fp_offset is not 0, then the addresses will be FP-relative and
2411 therefore we return a FP-relative location. */
2412
2413 if (frame_pointer_needed)
2414 {
2415 if (fp_offset)
2416 return gen_frame_mem (DImode,
2417 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2418 else
2419 return gen_frame_mem (DImode,
2420 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2421 }
2422
2423 /* If FP is not needed, we calculate the location of LR, which would be
2424 at the top of the saved registers block. */
2425
2426 return gen_frame_mem (DImode,
2427 plus_constant (Pmode,
2428 stack_pointer_rtx,
2429 fp_offset
2430 + cfun->machine->frame.saved_regs_size
2431 - 2 * UNITS_PER_WORD));
2432 }
2433
2434 /* Output code to build up a constant in a register. */
2435 static void
2436 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2437 {
2438 if (aarch64_bitmask_imm (val, DImode))
2439 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2440 else
2441 {
2442 int i;
2443 int ncount = 0;
2444 int zcount = 0;
2445 HOST_WIDE_INT valp = val >> 16;
2446 HOST_WIDE_INT valm;
2447 HOST_WIDE_INT tval;
2448
2449 for (i = 16; i < 64; i += 16)
2450 {
2451 valm = (valp & 0xffff);
2452
2453 if (valm != 0)
2454 ++ zcount;
2455
2456 if (valm != 0xffff)
2457 ++ ncount;
2458
2459 valp >>= 16;
2460 }
2461
2462 /* zcount contains the number of additional MOVK instructions
2463 required if the constant is built up with an initial MOVZ instruction,
2464 while ncount is the number of MOVK instructions required if starting
2465 with a MOVN instruction. Choose the sequence that yields the fewest
2466 number of instructions, preferring MOVZ instructions when they are both
2467 the same. */
2468 if (ncount < zcount)
2469 {
2470 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2471 GEN_INT ((~val) & 0xffff));
2472 tval = 0xffff;
2473 }
2474 else
2475 {
2476 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2477 GEN_INT (val & 0xffff));
2478 tval = 0;
2479 }
2480
2481 val >>= 16;
2482
2483 for (i = 16; i < 64; i += 16)
2484 {
2485 if ((val & 0xffff) != tval)
2486 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2487 GEN_INT (i), GEN_INT (val & 0xffff)));
2488 val >>= 16;
2489 }
2490 }
2491 }
2492
2493 static void
2494 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2495 {
2496 HOST_WIDE_INT mdelta = delta;
2497 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2498 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2499
2500 if (mdelta < 0)
2501 mdelta = -mdelta;
2502
2503 if (mdelta >= 4096 * 4096)
2504 {
2505 aarch64_build_constant (scratchreg, delta);
2506 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2507 }
2508 else if (mdelta > 0)
2509 {
2510 if (mdelta >= 4096)
2511 {
2512 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2513 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2514 if (delta < 0)
2515 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2516 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2517 else
2518 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2519 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2520 }
2521 if (mdelta % 4096 != 0)
2522 {
2523 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2524 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2525 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2526 }
2527 }
2528 }
2529
2530 /* Output code to add DELTA to the first argument, and then jump
2531 to FUNCTION. Used for C++ multiple inheritance. */
2532 static void
2533 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2534 HOST_WIDE_INT delta,
2535 HOST_WIDE_INT vcall_offset,
2536 tree function)
2537 {
2538 /* The this pointer is always in x0. Note that this differs from
2539 Arm where the this pointer maybe bumped to r1 if r0 is required
2540 to return a pointer to an aggregate. On AArch64 a result value
2541 pointer will be in x8. */
2542 int this_regno = R0_REGNUM;
2543 rtx this_rtx, temp0, temp1, addr, insn, funexp;
2544
2545 reload_completed = 1;
2546 emit_note (NOTE_INSN_PROLOGUE_END);
2547
2548 if (vcall_offset == 0)
2549 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2550 else
2551 {
2552 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2553
2554 this_rtx = gen_rtx_REG (Pmode, this_regno);
2555 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2556 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2557
2558 addr = this_rtx;
2559 if (delta != 0)
2560 {
2561 if (delta >= -256 && delta < 256)
2562 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2563 plus_constant (Pmode, this_rtx, delta));
2564 else
2565 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2566 }
2567
2568 if (Pmode == ptr_mode)
2569 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2570 else
2571 aarch64_emit_move (temp0,
2572 gen_rtx_ZERO_EXTEND (Pmode,
2573 gen_rtx_MEM (ptr_mode, addr)));
2574
2575 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2576 addr = plus_constant (Pmode, temp0, vcall_offset);
2577 else
2578 {
2579 aarch64_build_constant (IP1_REGNUM, vcall_offset);
2580 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2581 }
2582
2583 if (Pmode == ptr_mode)
2584 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2585 else
2586 aarch64_emit_move (temp1,
2587 gen_rtx_SIGN_EXTEND (Pmode,
2588 gen_rtx_MEM (ptr_mode, addr)));
2589
2590 emit_insn (gen_add2_insn (this_rtx, temp1));
2591 }
2592
2593 /* Generate a tail call to the target function. */
2594 if (!TREE_USED (function))
2595 {
2596 assemble_external (function);
2597 TREE_USED (function) = 1;
2598 }
2599 funexp = XEXP (DECL_RTL (function), 0);
2600 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2601 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2602 SIBLING_CALL_P (insn) = 1;
2603
2604 insn = get_insns ();
2605 shorten_branches (insn);
2606 final_start_function (insn, file, 1);
2607 final (insn, file, 1);
2608 final_end_function ();
2609
2610 /* Stop pretending to be a post-reload pass. */
2611 reload_completed = 0;
2612 }
2613
2614 static int
2615 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2616 {
2617 if (GET_CODE (*x) == SYMBOL_REF)
2618 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2619
2620 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2621 TLS offsets, not real symbol references. */
2622 if (GET_CODE (*x) == UNSPEC
2623 && XINT (*x, 1) == UNSPEC_TLS)
2624 return -1;
2625
2626 return 0;
2627 }
2628
2629 static bool
2630 aarch64_tls_referenced_p (rtx x)
2631 {
2632 if (!TARGET_HAVE_TLS)
2633 return false;
2634
2635 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2636 }
2637
2638
2639 static int
2640 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2641 {
2642 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2643 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2644
2645 if (*imm1 < *imm2)
2646 return -1;
2647 if (*imm1 > *imm2)
2648 return +1;
2649 return 0;
2650 }
2651
2652
2653 static void
2654 aarch64_build_bitmask_table (void)
2655 {
2656 unsigned HOST_WIDE_INT mask, imm;
2657 unsigned int log_e, e, s, r;
2658 unsigned int nimms = 0;
2659
2660 for (log_e = 1; log_e <= 6; log_e++)
2661 {
2662 e = 1 << log_e;
2663 if (e == 64)
2664 mask = ~(HOST_WIDE_INT) 0;
2665 else
2666 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2667 for (s = 1; s < e; s++)
2668 {
2669 for (r = 0; r < e; r++)
2670 {
2671 /* set s consecutive bits to 1 (s < 64) */
2672 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2673 /* rotate right by r */
2674 if (r != 0)
2675 imm = ((imm >> r) | (imm << (e - r))) & mask;
2676 /* replicate the constant depending on SIMD size */
2677 switch (log_e) {
2678 case 1: imm |= (imm << 2);
2679 case 2: imm |= (imm << 4);
2680 case 3: imm |= (imm << 8);
2681 case 4: imm |= (imm << 16);
2682 case 5: imm |= (imm << 32);
2683 case 6:
2684 break;
2685 default:
2686 gcc_unreachable ();
2687 }
2688 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2689 aarch64_bitmasks[nimms++] = imm;
2690 }
2691 }
2692 }
2693
2694 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2695 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2696 aarch64_bitmasks_cmp);
2697 }
2698
2699
2700 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2701 a left shift of 0 or 12 bits. */
2702 bool
2703 aarch64_uimm12_shift (HOST_WIDE_INT val)
2704 {
2705 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2706 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2707 );
2708 }
2709
2710
2711 /* Return true if val is an immediate that can be loaded into a
2712 register by a MOVZ instruction. */
2713 static bool
2714 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2715 {
2716 if (GET_MODE_SIZE (mode) > 4)
2717 {
2718 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2719 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2720 return 1;
2721 }
2722 else
2723 {
2724 /* Ignore sign extension. */
2725 val &= (HOST_WIDE_INT) 0xffffffff;
2726 }
2727 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2728 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2729 }
2730
2731
2732 /* Return true if val is a valid bitmask immediate. */
2733 bool
2734 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2735 {
2736 if (GET_MODE_SIZE (mode) < 8)
2737 {
2738 /* Replicate bit pattern. */
2739 val &= (HOST_WIDE_INT) 0xffffffff;
2740 val |= val << 32;
2741 }
2742 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2743 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2744 }
2745
2746
2747 /* Return true if val is an immediate that can be loaded into a
2748 register in a single instruction. */
2749 bool
2750 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2751 {
2752 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2753 return 1;
2754 return aarch64_bitmask_imm (val, mode);
2755 }
2756
2757 static bool
2758 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2759 {
2760 rtx base, offset;
2761
2762 if (GET_CODE (x) == HIGH)
2763 return true;
2764
2765 split_const (x, &base, &offset);
2766 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2767 {
2768 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2769 != SYMBOL_FORCE_TO_MEM)
2770 return true;
2771 else
2772 /* Avoid generating a 64-bit relocation in ILP32; leave
2773 to aarch64_expand_mov_immediate to handle it properly. */
2774 return mode != ptr_mode;
2775 }
2776
2777 return aarch64_tls_referenced_p (x);
2778 }
2779
2780 /* Return true if register REGNO is a valid index register.
2781 STRICT_P is true if REG_OK_STRICT is in effect. */
2782
2783 bool
2784 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2785 {
2786 if (!HARD_REGISTER_NUM_P (regno))
2787 {
2788 if (!strict_p)
2789 return true;
2790
2791 if (!reg_renumber)
2792 return false;
2793
2794 regno = reg_renumber[regno];
2795 }
2796 return GP_REGNUM_P (regno);
2797 }
2798
2799 /* Return true if register REGNO is a valid base register for mode MODE.
2800 STRICT_P is true if REG_OK_STRICT is in effect. */
2801
2802 bool
2803 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2804 {
2805 if (!HARD_REGISTER_NUM_P (regno))
2806 {
2807 if (!strict_p)
2808 return true;
2809
2810 if (!reg_renumber)
2811 return false;
2812
2813 regno = reg_renumber[regno];
2814 }
2815
2816 /* The fake registers will be eliminated to either the stack or
2817 hard frame pointer, both of which are usually valid base registers.
2818 Reload deals with the cases where the eliminated form isn't valid. */
2819 return (GP_REGNUM_P (regno)
2820 || regno == SP_REGNUM
2821 || regno == FRAME_POINTER_REGNUM
2822 || regno == ARG_POINTER_REGNUM);
2823 }
2824
2825 /* Return true if X is a valid base register for mode MODE.
2826 STRICT_P is true if REG_OK_STRICT is in effect. */
2827
2828 static bool
2829 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2830 {
2831 if (!strict_p && GET_CODE (x) == SUBREG)
2832 x = SUBREG_REG (x);
2833
2834 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2835 }
2836
2837 /* Return true if address offset is a valid index. If it is, fill in INFO
2838 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2839
2840 static bool
2841 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2842 enum machine_mode mode, bool strict_p)
2843 {
2844 enum aarch64_address_type type;
2845 rtx index;
2846 int shift;
2847
2848 /* (reg:P) */
2849 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2850 && GET_MODE (x) == Pmode)
2851 {
2852 type = ADDRESS_REG_REG;
2853 index = x;
2854 shift = 0;
2855 }
2856 /* (sign_extend:DI (reg:SI)) */
2857 else if ((GET_CODE (x) == SIGN_EXTEND
2858 || GET_CODE (x) == ZERO_EXTEND)
2859 && GET_MODE (x) == DImode
2860 && GET_MODE (XEXP (x, 0)) == SImode)
2861 {
2862 type = (GET_CODE (x) == SIGN_EXTEND)
2863 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2864 index = XEXP (x, 0);
2865 shift = 0;
2866 }
2867 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2868 else if (GET_CODE (x) == MULT
2869 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2870 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2871 && GET_MODE (XEXP (x, 0)) == DImode
2872 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2873 && CONST_INT_P (XEXP (x, 1)))
2874 {
2875 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2876 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2877 index = XEXP (XEXP (x, 0), 0);
2878 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2879 }
2880 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2881 else if (GET_CODE (x) == ASHIFT
2882 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2883 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2884 && GET_MODE (XEXP (x, 0)) == DImode
2885 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2886 && CONST_INT_P (XEXP (x, 1)))
2887 {
2888 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2889 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2890 index = XEXP (XEXP (x, 0), 0);
2891 shift = INTVAL (XEXP (x, 1));
2892 }
2893 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2894 else if ((GET_CODE (x) == SIGN_EXTRACT
2895 || GET_CODE (x) == ZERO_EXTRACT)
2896 && GET_MODE (x) == DImode
2897 && GET_CODE (XEXP (x, 0)) == MULT
2898 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2899 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2900 {
2901 type = (GET_CODE (x) == SIGN_EXTRACT)
2902 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2903 index = XEXP (XEXP (x, 0), 0);
2904 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2905 if (INTVAL (XEXP (x, 1)) != 32 + shift
2906 || INTVAL (XEXP (x, 2)) != 0)
2907 shift = -1;
2908 }
2909 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2910 (const_int 0xffffffff<<shift)) */
2911 else if (GET_CODE (x) == AND
2912 && GET_MODE (x) == DImode
2913 && GET_CODE (XEXP (x, 0)) == MULT
2914 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2915 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2916 && CONST_INT_P (XEXP (x, 1)))
2917 {
2918 type = ADDRESS_REG_UXTW;
2919 index = XEXP (XEXP (x, 0), 0);
2920 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2921 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2922 shift = -1;
2923 }
2924 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2925 else if ((GET_CODE (x) == SIGN_EXTRACT
2926 || GET_CODE (x) == ZERO_EXTRACT)
2927 && GET_MODE (x) == DImode
2928 && GET_CODE (XEXP (x, 0)) == ASHIFT
2929 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2930 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2931 {
2932 type = (GET_CODE (x) == SIGN_EXTRACT)
2933 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2934 index = XEXP (XEXP (x, 0), 0);
2935 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2936 if (INTVAL (XEXP (x, 1)) != 32 + shift
2937 || INTVAL (XEXP (x, 2)) != 0)
2938 shift = -1;
2939 }
2940 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2941 (const_int 0xffffffff<<shift)) */
2942 else if (GET_CODE (x) == AND
2943 && GET_MODE (x) == DImode
2944 && GET_CODE (XEXP (x, 0)) == ASHIFT
2945 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2946 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2947 && CONST_INT_P (XEXP (x, 1)))
2948 {
2949 type = ADDRESS_REG_UXTW;
2950 index = XEXP (XEXP (x, 0), 0);
2951 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2952 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2953 shift = -1;
2954 }
2955 /* (mult:P (reg:P) (const_int scale)) */
2956 else if (GET_CODE (x) == MULT
2957 && GET_MODE (x) == Pmode
2958 && GET_MODE (XEXP (x, 0)) == Pmode
2959 && CONST_INT_P (XEXP (x, 1)))
2960 {
2961 type = ADDRESS_REG_REG;
2962 index = XEXP (x, 0);
2963 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2964 }
2965 /* (ashift:P (reg:P) (const_int shift)) */
2966 else if (GET_CODE (x) == ASHIFT
2967 && GET_MODE (x) == Pmode
2968 && GET_MODE (XEXP (x, 0)) == Pmode
2969 && CONST_INT_P (XEXP (x, 1)))
2970 {
2971 type = ADDRESS_REG_REG;
2972 index = XEXP (x, 0);
2973 shift = INTVAL (XEXP (x, 1));
2974 }
2975 else
2976 return false;
2977
2978 if (GET_CODE (index) == SUBREG)
2979 index = SUBREG_REG (index);
2980
2981 if ((shift == 0 ||
2982 (shift > 0 && shift <= 3
2983 && (1 << shift) == GET_MODE_SIZE (mode)))
2984 && REG_P (index)
2985 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2986 {
2987 info->type = type;
2988 info->offset = index;
2989 info->shift = shift;
2990 return true;
2991 }
2992
2993 return false;
2994 }
2995
2996 static inline bool
2997 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2998 {
2999 return (offset >= -64 * GET_MODE_SIZE (mode)
3000 && offset < 64 * GET_MODE_SIZE (mode)
3001 && offset % GET_MODE_SIZE (mode) == 0);
3002 }
3003
3004 static inline bool
3005 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3006 HOST_WIDE_INT offset)
3007 {
3008 return offset >= -256 && offset < 256;
3009 }
3010
3011 static inline bool
3012 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3013 {
3014 return (offset >= 0
3015 && offset < 4096 * GET_MODE_SIZE (mode)
3016 && offset % GET_MODE_SIZE (mode) == 0);
3017 }
3018
3019 /* Return true if X is a valid address for machine mode MODE. If it is,
3020 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3021 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3022
3023 static bool
3024 aarch64_classify_address (struct aarch64_address_info *info,
3025 rtx x, enum machine_mode mode,
3026 RTX_CODE outer_code, bool strict_p)
3027 {
3028 enum rtx_code code = GET_CODE (x);
3029 rtx op0, op1;
3030 bool allow_reg_index_p =
3031 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3032
3033 /* Don't support anything other than POST_INC or REG addressing for
3034 AdvSIMD. */
3035 if (aarch64_vector_mode_p (mode)
3036 && (code != POST_INC && code != REG))
3037 return false;
3038
3039 switch (code)
3040 {
3041 case REG:
3042 case SUBREG:
3043 info->type = ADDRESS_REG_IMM;
3044 info->base = x;
3045 info->offset = const0_rtx;
3046 return aarch64_base_register_rtx_p (x, strict_p);
3047
3048 case PLUS:
3049 op0 = XEXP (x, 0);
3050 op1 = XEXP (x, 1);
3051 if (GET_MODE_SIZE (mode) != 0
3052 && CONST_INT_P (op1)
3053 && aarch64_base_register_rtx_p (op0, strict_p))
3054 {
3055 HOST_WIDE_INT offset = INTVAL (op1);
3056
3057 info->type = ADDRESS_REG_IMM;
3058 info->base = op0;
3059 info->offset = op1;
3060
3061 /* TImode and TFmode values are allowed in both pairs of X
3062 registers and individual Q registers. The available
3063 address modes are:
3064 X,X: 7-bit signed scaled offset
3065 Q: 9-bit signed offset
3066 We conservatively require an offset representable in either mode.
3067 */
3068 if (mode == TImode || mode == TFmode)
3069 return (offset_7bit_signed_scaled_p (mode, offset)
3070 && offset_9bit_signed_unscaled_p (mode, offset));
3071
3072 if (outer_code == PARALLEL)
3073 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3074 && offset_7bit_signed_scaled_p (mode, offset));
3075 else
3076 return (offset_9bit_signed_unscaled_p (mode, offset)
3077 || offset_12bit_unsigned_scaled_p (mode, offset));
3078 }
3079
3080 if (allow_reg_index_p)
3081 {
3082 /* Look for base + (scaled/extended) index register. */
3083 if (aarch64_base_register_rtx_p (op0, strict_p)
3084 && aarch64_classify_index (info, op1, mode, strict_p))
3085 {
3086 info->base = op0;
3087 return true;
3088 }
3089 if (aarch64_base_register_rtx_p (op1, strict_p)
3090 && aarch64_classify_index (info, op0, mode, strict_p))
3091 {
3092 info->base = op1;
3093 return true;
3094 }
3095 }
3096
3097 return false;
3098
3099 case POST_INC:
3100 case POST_DEC:
3101 case PRE_INC:
3102 case PRE_DEC:
3103 info->type = ADDRESS_REG_WB;
3104 info->base = XEXP (x, 0);
3105 info->offset = NULL_RTX;
3106 return aarch64_base_register_rtx_p (info->base, strict_p);
3107
3108 case POST_MODIFY:
3109 case PRE_MODIFY:
3110 info->type = ADDRESS_REG_WB;
3111 info->base = XEXP (x, 0);
3112 if (GET_CODE (XEXP (x, 1)) == PLUS
3113 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3114 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3115 && aarch64_base_register_rtx_p (info->base, strict_p))
3116 {
3117 HOST_WIDE_INT offset;
3118 info->offset = XEXP (XEXP (x, 1), 1);
3119 offset = INTVAL (info->offset);
3120
3121 /* TImode and TFmode values are allowed in both pairs of X
3122 registers and individual Q registers. The available
3123 address modes are:
3124 X,X: 7-bit signed scaled offset
3125 Q: 9-bit signed offset
3126 We conservatively require an offset representable in either mode.
3127 */
3128 if (mode == TImode || mode == TFmode)
3129 return (offset_7bit_signed_scaled_p (mode, offset)
3130 && offset_9bit_signed_unscaled_p (mode, offset));
3131
3132 if (outer_code == PARALLEL)
3133 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3134 && offset_7bit_signed_scaled_p (mode, offset));
3135 else
3136 return offset_9bit_signed_unscaled_p (mode, offset);
3137 }
3138 return false;
3139
3140 case CONST:
3141 case SYMBOL_REF:
3142 case LABEL_REF:
3143 /* load literal: pc-relative constant pool entry. Only supported
3144 for SI mode or larger. */
3145 info->type = ADDRESS_SYMBOLIC;
3146 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3147 {
3148 rtx sym, addend;
3149
3150 split_const (x, &sym, &addend);
3151 return (GET_CODE (sym) == LABEL_REF
3152 || (GET_CODE (sym) == SYMBOL_REF
3153 && CONSTANT_POOL_ADDRESS_P (sym)));
3154 }
3155 return false;
3156
3157 case LO_SUM:
3158 info->type = ADDRESS_LO_SUM;
3159 info->base = XEXP (x, 0);
3160 info->offset = XEXP (x, 1);
3161 if (allow_reg_index_p
3162 && aarch64_base_register_rtx_p (info->base, strict_p))
3163 {
3164 rtx sym, offs;
3165 split_const (info->offset, &sym, &offs);
3166 if (GET_CODE (sym) == SYMBOL_REF
3167 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3168 == SYMBOL_SMALL_ABSOLUTE))
3169 {
3170 /* The symbol and offset must be aligned to the access size. */
3171 unsigned int align;
3172 unsigned int ref_size;
3173
3174 if (CONSTANT_POOL_ADDRESS_P (sym))
3175 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3176 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3177 {
3178 tree exp = SYMBOL_REF_DECL (sym);
3179 align = TYPE_ALIGN (TREE_TYPE (exp));
3180 align = CONSTANT_ALIGNMENT (exp, align);
3181 }
3182 else if (SYMBOL_REF_DECL (sym))
3183 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3184 else
3185 align = BITS_PER_UNIT;
3186
3187 ref_size = GET_MODE_SIZE (mode);
3188 if (ref_size == 0)
3189 ref_size = GET_MODE_SIZE (DImode);
3190
3191 return ((INTVAL (offs) & (ref_size - 1)) == 0
3192 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3193 }
3194 }
3195 return false;
3196
3197 default:
3198 return false;
3199 }
3200 }
3201
3202 bool
3203 aarch64_symbolic_address_p (rtx x)
3204 {
3205 rtx offset;
3206
3207 split_const (x, &x, &offset);
3208 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3209 }
3210
3211 /* Classify the base of symbolic expression X, given that X appears in
3212 context CONTEXT. */
3213
3214 enum aarch64_symbol_type
3215 aarch64_classify_symbolic_expression (rtx x,
3216 enum aarch64_symbol_context context)
3217 {
3218 rtx offset;
3219
3220 split_const (x, &x, &offset);
3221 return aarch64_classify_symbol (x, context);
3222 }
3223
3224
3225 /* Return TRUE if X is a legitimate address for accessing memory in
3226 mode MODE. */
3227 static bool
3228 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3229 {
3230 struct aarch64_address_info addr;
3231
3232 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3233 }
3234
3235 /* Return TRUE if X is a legitimate address for accessing memory in
3236 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3237 pair operation. */
3238 bool
3239 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3240 RTX_CODE outer_code, bool strict_p)
3241 {
3242 struct aarch64_address_info addr;
3243
3244 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3245 }
3246
3247 /* Return TRUE if rtx X is immediate constant 0.0 */
3248 bool
3249 aarch64_float_const_zero_rtx_p (rtx x)
3250 {
3251 REAL_VALUE_TYPE r;
3252
3253 if (GET_MODE (x) == VOIDmode)
3254 return false;
3255
3256 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3257 if (REAL_VALUE_MINUS_ZERO (r))
3258 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3259 return REAL_VALUES_EQUAL (r, dconst0);
3260 }
3261
3262 /* Return the fixed registers used for condition codes. */
3263
3264 static bool
3265 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3266 {
3267 *p1 = CC_REGNUM;
3268 *p2 = INVALID_REGNUM;
3269 return true;
3270 }
3271
3272 enum machine_mode
3273 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3274 {
3275 /* All floating point compares return CCFP if it is an equality
3276 comparison, and CCFPE otherwise. */
3277 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3278 {
3279 switch (code)
3280 {
3281 case EQ:
3282 case NE:
3283 case UNORDERED:
3284 case ORDERED:
3285 case UNLT:
3286 case UNLE:
3287 case UNGT:
3288 case UNGE:
3289 case UNEQ:
3290 case LTGT:
3291 return CCFPmode;
3292
3293 case LT:
3294 case LE:
3295 case GT:
3296 case GE:
3297 return CCFPEmode;
3298
3299 default:
3300 gcc_unreachable ();
3301 }
3302 }
3303
3304 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3305 && y == const0_rtx
3306 && (code == EQ || code == NE || code == LT || code == GE)
3307 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3308 || GET_CODE (x) == NEG))
3309 return CC_NZmode;
3310
3311 /* A compare with a shifted operand. Because of canonicalization,
3312 the comparison will have to be swapped when we emit the assembly
3313 code. */
3314 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3315 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3316 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3317 || GET_CODE (x) == LSHIFTRT
3318 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3319 return CC_SWPmode;
3320
3321 /* A compare of a mode narrower than SI mode against zero can be done
3322 by extending the value in the comparison. */
3323 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3324 && y == const0_rtx)
3325 /* Only use sign-extension if we really need it. */
3326 return ((code == GT || code == GE || code == LE || code == LT)
3327 ? CC_SESWPmode : CC_ZESWPmode);
3328
3329 /* For everything else, return CCmode. */
3330 return CCmode;
3331 }
3332
3333 static unsigned
3334 aarch64_get_condition_code (rtx x)
3335 {
3336 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3337 enum rtx_code comp_code = GET_CODE (x);
3338
3339 if (GET_MODE_CLASS (mode) != MODE_CC)
3340 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3341
3342 switch (mode)
3343 {
3344 case CCFPmode:
3345 case CCFPEmode:
3346 switch (comp_code)
3347 {
3348 case GE: return AARCH64_GE;
3349 case GT: return AARCH64_GT;
3350 case LE: return AARCH64_LS;
3351 case LT: return AARCH64_MI;
3352 case NE: return AARCH64_NE;
3353 case EQ: return AARCH64_EQ;
3354 case ORDERED: return AARCH64_VC;
3355 case UNORDERED: return AARCH64_VS;
3356 case UNLT: return AARCH64_LT;
3357 case UNLE: return AARCH64_LE;
3358 case UNGT: return AARCH64_HI;
3359 case UNGE: return AARCH64_PL;
3360 default: gcc_unreachable ();
3361 }
3362 break;
3363
3364 case CCmode:
3365 switch (comp_code)
3366 {
3367 case NE: return AARCH64_NE;
3368 case EQ: return AARCH64_EQ;
3369 case GE: return AARCH64_GE;
3370 case GT: return AARCH64_GT;
3371 case LE: return AARCH64_LE;
3372 case LT: return AARCH64_LT;
3373 case GEU: return AARCH64_CS;
3374 case GTU: return AARCH64_HI;
3375 case LEU: return AARCH64_LS;
3376 case LTU: return AARCH64_CC;
3377 default: gcc_unreachable ();
3378 }
3379 break;
3380
3381 case CC_SWPmode:
3382 case CC_ZESWPmode:
3383 case CC_SESWPmode:
3384 switch (comp_code)
3385 {
3386 case NE: return AARCH64_NE;
3387 case EQ: return AARCH64_EQ;
3388 case GE: return AARCH64_LE;
3389 case GT: return AARCH64_LT;
3390 case LE: return AARCH64_GE;
3391 case LT: return AARCH64_GT;
3392 case GEU: return AARCH64_LS;
3393 case GTU: return AARCH64_CC;
3394 case LEU: return AARCH64_CS;
3395 case LTU: return AARCH64_HI;
3396 default: gcc_unreachable ();
3397 }
3398 break;
3399
3400 case CC_NZmode:
3401 switch (comp_code)
3402 {
3403 case NE: return AARCH64_NE;
3404 case EQ: return AARCH64_EQ;
3405 case GE: return AARCH64_PL;
3406 case LT: return AARCH64_MI;
3407 default: gcc_unreachable ();
3408 }
3409 break;
3410
3411 default:
3412 gcc_unreachable ();
3413 break;
3414 }
3415 }
3416
3417 static unsigned
3418 bit_count (unsigned HOST_WIDE_INT value)
3419 {
3420 unsigned count = 0;
3421
3422 while (value)
3423 {
3424 count++;
3425 value &= value - 1;
3426 }
3427
3428 return count;
3429 }
3430
3431 void
3432 aarch64_print_operand (FILE *f, rtx x, char code)
3433 {
3434 switch (code)
3435 {
3436 case 'e':
3437 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3438 {
3439 int n;
3440
3441 if (GET_CODE (x) != CONST_INT
3442 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3443 {
3444 output_operand_lossage ("invalid operand for '%%%c'", code);
3445 return;
3446 }
3447
3448 switch (n)
3449 {
3450 case 3:
3451 fputc ('b', f);
3452 break;
3453 case 4:
3454 fputc ('h', f);
3455 break;
3456 case 5:
3457 fputc ('w', f);
3458 break;
3459 default:
3460 output_operand_lossage ("invalid operand for '%%%c'", code);
3461 return;
3462 }
3463 }
3464 break;
3465
3466 case 'p':
3467 {
3468 int n;
3469
3470 /* Print N such that 2^N == X. */
3471 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3472 {
3473 output_operand_lossage ("invalid operand for '%%%c'", code);
3474 return;
3475 }
3476
3477 asm_fprintf (f, "%d", n);
3478 }
3479 break;
3480
3481 case 'P':
3482 /* Print the number of non-zero bits in X (a const_int). */
3483 if (GET_CODE (x) != CONST_INT)
3484 {
3485 output_operand_lossage ("invalid operand for '%%%c'", code);
3486 return;
3487 }
3488
3489 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3490 break;
3491
3492 case 'H':
3493 /* Print the higher numbered register of a pair (TImode) of regs. */
3494 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3495 {
3496 output_operand_lossage ("invalid operand for '%%%c'", code);
3497 return;
3498 }
3499
3500 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3501 break;
3502
3503 case 'm':
3504 /* Print a condition (eq, ne, etc). */
3505
3506 /* CONST_TRUE_RTX means always -- that's the default. */
3507 if (x == const_true_rtx)
3508 return;
3509
3510 if (!COMPARISON_P (x))
3511 {
3512 output_operand_lossage ("invalid operand for '%%%c'", code);
3513 return;
3514 }
3515
3516 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3517 break;
3518
3519 case 'M':
3520 /* Print the inverse of a condition (eq <-> ne, etc). */
3521
3522 /* CONST_TRUE_RTX means never -- that's the default. */
3523 if (x == const_true_rtx)
3524 {
3525 fputs ("nv", f);
3526 return;
3527 }
3528
3529 if (!COMPARISON_P (x))
3530 {
3531 output_operand_lossage ("invalid operand for '%%%c'", code);
3532 return;
3533 }
3534
3535 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3536 (aarch64_get_condition_code (x))], f);
3537 break;
3538
3539 case 'b':
3540 case 'h':
3541 case 's':
3542 case 'd':
3543 case 'q':
3544 /* Print a scalar FP/SIMD register name. */
3545 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3546 {
3547 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3548 return;
3549 }
3550 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3551 break;
3552
3553 case 'S':
3554 case 'T':
3555 case 'U':
3556 case 'V':
3557 /* Print the first FP/SIMD register name in a list. */
3558 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3559 {
3560 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3561 return;
3562 }
3563 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3564 break;
3565
3566 case 'X':
3567 /* Print bottom 16 bits of integer constant in hex. */
3568 if (GET_CODE (x) != CONST_INT)
3569 {
3570 output_operand_lossage ("invalid operand for '%%%c'", code);
3571 return;
3572 }
3573 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3574 break;
3575
3576 case 'w':
3577 case 'x':
3578 /* Print a general register name or the zero register (32-bit or
3579 64-bit). */
3580 if (x == const0_rtx
3581 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3582 {
3583 asm_fprintf (f, "%czr", code);
3584 break;
3585 }
3586
3587 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3588 {
3589 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3590 break;
3591 }
3592
3593 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3594 {
3595 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3596 break;
3597 }
3598
3599 /* Fall through */
3600
3601 case 0:
3602 /* Print a normal operand, if it's a general register, then we
3603 assume DImode. */
3604 if (x == NULL)
3605 {
3606 output_operand_lossage ("missing operand");
3607 return;
3608 }
3609
3610 switch (GET_CODE (x))
3611 {
3612 case REG:
3613 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3614 break;
3615
3616 case MEM:
3617 aarch64_memory_reference_mode = GET_MODE (x);
3618 output_address (XEXP (x, 0));
3619 break;
3620
3621 case LABEL_REF:
3622 case SYMBOL_REF:
3623 output_addr_const (asm_out_file, x);
3624 break;
3625
3626 case CONST_INT:
3627 asm_fprintf (f, "%wd", INTVAL (x));
3628 break;
3629
3630 case CONST_VECTOR:
3631 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3632 {
3633 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3634 HOST_WIDE_INT_MIN,
3635 HOST_WIDE_INT_MAX));
3636 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3637 }
3638 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3639 {
3640 fputc ('0', f);
3641 }
3642 else
3643 gcc_unreachable ();
3644 break;
3645
3646 case CONST_DOUBLE:
3647 /* CONST_DOUBLE can represent a double-width integer.
3648 In this case, the mode of x is VOIDmode. */
3649 if (GET_MODE (x) == VOIDmode)
3650 ; /* Do Nothing. */
3651 else if (aarch64_float_const_zero_rtx_p (x))
3652 {
3653 fputc ('0', f);
3654 break;
3655 }
3656 else if (aarch64_float_const_representable_p (x))
3657 {
3658 #define buf_size 20
3659 char float_buf[buf_size] = {'\0'};
3660 REAL_VALUE_TYPE r;
3661 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3662 real_to_decimal_for_mode (float_buf, &r,
3663 buf_size, buf_size,
3664 1, GET_MODE (x));
3665 asm_fprintf (asm_out_file, "%s", float_buf);
3666 break;
3667 #undef buf_size
3668 }
3669 output_operand_lossage ("invalid constant");
3670 return;
3671 default:
3672 output_operand_lossage ("invalid operand");
3673 return;
3674 }
3675 break;
3676
3677 case 'A':
3678 if (GET_CODE (x) == HIGH)
3679 x = XEXP (x, 0);
3680
3681 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3682 {
3683 case SYMBOL_SMALL_GOT:
3684 asm_fprintf (asm_out_file, ":got:");
3685 break;
3686
3687 case SYMBOL_SMALL_TLSGD:
3688 asm_fprintf (asm_out_file, ":tlsgd:");
3689 break;
3690
3691 case SYMBOL_SMALL_TLSDESC:
3692 asm_fprintf (asm_out_file, ":tlsdesc:");
3693 break;
3694
3695 case SYMBOL_SMALL_GOTTPREL:
3696 asm_fprintf (asm_out_file, ":gottprel:");
3697 break;
3698
3699 case SYMBOL_SMALL_TPREL:
3700 asm_fprintf (asm_out_file, ":tprel:");
3701 break;
3702
3703 case SYMBOL_TINY_GOT:
3704 gcc_unreachable ();
3705 break;
3706
3707 default:
3708 break;
3709 }
3710 output_addr_const (asm_out_file, x);
3711 break;
3712
3713 case 'L':
3714 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3715 {
3716 case SYMBOL_SMALL_GOT:
3717 asm_fprintf (asm_out_file, ":lo12:");
3718 break;
3719
3720 case SYMBOL_SMALL_TLSGD:
3721 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3722 break;
3723
3724 case SYMBOL_SMALL_TLSDESC:
3725 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3726 break;
3727
3728 case SYMBOL_SMALL_GOTTPREL:
3729 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3730 break;
3731
3732 case SYMBOL_SMALL_TPREL:
3733 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3734 break;
3735
3736 case SYMBOL_TINY_GOT:
3737 asm_fprintf (asm_out_file, ":got:");
3738 break;
3739
3740 default:
3741 break;
3742 }
3743 output_addr_const (asm_out_file, x);
3744 break;
3745
3746 case 'G':
3747
3748 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3749 {
3750 case SYMBOL_SMALL_TPREL:
3751 asm_fprintf (asm_out_file, ":tprel_hi12:");
3752 break;
3753 default:
3754 break;
3755 }
3756 output_addr_const (asm_out_file, x);
3757 break;
3758
3759 default:
3760 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3761 return;
3762 }
3763 }
3764
3765 void
3766 aarch64_print_operand_address (FILE *f, rtx x)
3767 {
3768 struct aarch64_address_info addr;
3769
3770 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3771 MEM, true))
3772 switch (addr.type)
3773 {
3774 case ADDRESS_REG_IMM:
3775 if (addr.offset == const0_rtx)
3776 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3777 else
3778 asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3779 INTVAL (addr.offset));
3780 return;
3781
3782 case ADDRESS_REG_REG:
3783 if (addr.shift == 0)
3784 asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3785 reg_names [REGNO (addr.offset)]);
3786 else
3787 asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3788 reg_names [REGNO (addr.offset)], addr.shift);
3789 return;
3790
3791 case ADDRESS_REG_UXTW:
3792 if (addr.shift == 0)
3793 asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3794 REGNO (addr.offset) - R0_REGNUM);
3795 else
3796 asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3797 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3798 return;
3799
3800 case ADDRESS_REG_SXTW:
3801 if (addr.shift == 0)
3802 asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3803 REGNO (addr.offset) - R0_REGNUM);
3804 else
3805 asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3806 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3807 return;
3808
3809 case ADDRESS_REG_WB:
3810 switch (GET_CODE (x))
3811 {
3812 case PRE_INC:
3813 asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3814 GET_MODE_SIZE (aarch64_memory_reference_mode));
3815 return;
3816 case POST_INC:
3817 asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3818 GET_MODE_SIZE (aarch64_memory_reference_mode));
3819 return;
3820 case PRE_DEC:
3821 asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3822 GET_MODE_SIZE (aarch64_memory_reference_mode));
3823 return;
3824 case POST_DEC:
3825 asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3826 GET_MODE_SIZE (aarch64_memory_reference_mode));
3827 return;
3828 case PRE_MODIFY:
3829 asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3830 INTVAL (addr.offset));
3831 return;
3832 case POST_MODIFY:
3833 asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3834 INTVAL (addr.offset));
3835 return;
3836 default:
3837 break;
3838 }
3839 break;
3840
3841 case ADDRESS_LO_SUM:
3842 asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3843 output_addr_const (f, addr.offset);
3844 asm_fprintf (f, "]");
3845 return;
3846
3847 case ADDRESS_SYMBOLIC:
3848 break;
3849 }
3850
3851 output_addr_const (f, x);
3852 }
3853
3854 void
3855 aarch64_function_profiler (FILE *f ATTRIBUTE_UNUSED,
3856 int labelno ATTRIBUTE_UNUSED)
3857 {
3858 sorry ("function profiling");
3859 }
3860
3861 bool
3862 aarch64_label_mentioned_p (rtx x)
3863 {
3864 const char *fmt;
3865 int i;
3866
3867 if (GET_CODE (x) == LABEL_REF)
3868 return true;
3869
3870 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3871 referencing instruction, but they are constant offsets, not
3872 symbols. */
3873 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3874 return false;
3875
3876 fmt = GET_RTX_FORMAT (GET_CODE (x));
3877 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3878 {
3879 if (fmt[i] == 'E')
3880 {
3881 int j;
3882
3883 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3884 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3885 return 1;
3886 }
3887 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3888 return 1;
3889 }
3890
3891 return 0;
3892 }
3893
3894 /* Implement REGNO_REG_CLASS. */
3895
3896 enum reg_class
3897 aarch64_regno_regclass (unsigned regno)
3898 {
3899 if (GP_REGNUM_P (regno))
3900 return CORE_REGS;
3901
3902 if (regno == SP_REGNUM)
3903 return STACK_REG;
3904
3905 if (regno == FRAME_POINTER_REGNUM
3906 || regno == ARG_POINTER_REGNUM)
3907 return CORE_REGS;
3908
3909 if (FP_REGNUM_P (regno))
3910 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
3911
3912 return NO_REGS;
3913 }
3914
3915 /* Try a machine-dependent way of reloading an illegitimate address
3916 operand. If we find one, push the reload and return the new rtx. */
3917
3918 rtx
3919 aarch64_legitimize_reload_address (rtx *x_p,
3920 enum machine_mode mode,
3921 int opnum, int type,
3922 int ind_levels ATTRIBUTE_UNUSED)
3923 {
3924 rtx x = *x_p;
3925
3926 /* Do not allow mem (plus (reg, const)) if vector mode. */
3927 if (aarch64_vector_mode_p (mode)
3928 && GET_CODE (x) == PLUS
3929 && REG_P (XEXP (x, 0))
3930 && CONST_INT_P (XEXP (x, 1)))
3931 {
3932 rtx orig_rtx = x;
3933 x = copy_rtx (x);
3934 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3935 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3936 opnum, (enum reload_type) type);
3937 return x;
3938 }
3939
3940 /* We must recognize output that we have already generated ourselves. */
3941 if (GET_CODE (x) == PLUS
3942 && GET_CODE (XEXP (x, 0)) == PLUS
3943 && REG_P (XEXP (XEXP (x, 0), 0))
3944 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3945 && CONST_INT_P (XEXP (x, 1)))
3946 {
3947 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3948 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3949 opnum, (enum reload_type) type);
3950 return x;
3951 }
3952
3953 /* We wish to handle large displacements off a base register by splitting
3954 the addend across an add and the mem insn. This can cut the number of
3955 extra insns needed from 3 to 1. It is only useful for load/store of a
3956 single register with 12 bit offset field. */
3957 if (GET_CODE (x) == PLUS
3958 && REG_P (XEXP (x, 0))
3959 && CONST_INT_P (XEXP (x, 1))
3960 && HARD_REGISTER_P (XEXP (x, 0))
3961 && mode != TImode
3962 && mode != TFmode
3963 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3964 {
3965 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3966 HOST_WIDE_INT low = val & 0xfff;
3967 HOST_WIDE_INT high = val - low;
3968 HOST_WIDE_INT offs;
3969 rtx cst;
3970 enum machine_mode xmode = GET_MODE (x);
3971
3972 /* In ILP32, xmode can be either DImode or SImode. */
3973 gcc_assert (xmode == DImode || xmode == SImode);
3974
3975 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
3976 BLKmode alignment. */
3977 if (GET_MODE_SIZE (mode) == 0)
3978 return NULL_RTX;
3979
3980 offs = low % GET_MODE_SIZE (mode);
3981
3982 /* Align misaligned offset by adjusting high part to compensate. */
3983 if (offs != 0)
3984 {
3985 if (aarch64_uimm12_shift (high + offs))
3986 {
3987 /* Align down. */
3988 low = low - offs;
3989 high = high + offs;
3990 }
3991 else
3992 {
3993 /* Align up. */
3994 offs = GET_MODE_SIZE (mode) - offs;
3995 low = low + offs;
3996 high = high + (low & 0x1000) - offs;
3997 low &= 0xfff;
3998 }
3999 }
4000
4001 /* Check for overflow. */
4002 if (high + low != val)
4003 return NULL_RTX;
4004
4005 cst = GEN_INT (high);
4006 if (!aarch64_uimm12_shift (high))
4007 cst = force_const_mem (xmode, cst);
4008
4009 /* Reload high part into base reg, leaving the low part
4010 in the mem instruction. */
4011 x = gen_rtx_PLUS (xmode,
4012 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4013 GEN_INT (low));
4014
4015 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4016 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4017 opnum, (enum reload_type) type);
4018 return x;
4019 }
4020
4021 return NULL_RTX;
4022 }
4023
4024
4025 static reg_class_t
4026 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4027 reg_class_t rclass,
4028 enum machine_mode mode,
4029 secondary_reload_info *sri)
4030 {
4031 /* Address expressions of the form PLUS (SP, large_offset) need two
4032 scratch registers, one for the constant, and one for holding a
4033 copy of SP, since SP cannot be used on the RHS of an add-reg
4034 instruction. */
4035 if (mode == DImode
4036 && GET_CODE (x) == PLUS
4037 && XEXP (x, 0) == stack_pointer_rtx
4038 && CONST_INT_P (XEXP (x, 1))
4039 && !aarch64_uimm12_shift (INTVAL (XEXP (x, 1))))
4040 {
4041 sri->icode = CODE_FOR_reload_sp_immediate;
4042 return NO_REGS;
4043 }
4044
4045 /* Without the TARGET_SIMD instructions we cannot move a Q register
4046 to a Q register directly. We need a scratch. */
4047 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4048 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4049 && reg_class_subset_p (rclass, FP_REGS))
4050 {
4051 if (mode == TFmode)
4052 sri->icode = CODE_FOR_aarch64_reload_movtf;
4053 else if (mode == TImode)
4054 sri->icode = CODE_FOR_aarch64_reload_movti;
4055 return NO_REGS;
4056 }
4057
4058 /* A TFmode or TImode memory access should be handled via an FP_REGS
4059 because AArch64 has richer addressing modes for LDR/STR instructions
4060 than LDP/STP instructions. */
4061 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4062 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4063 return FP_REGS;
4064
4065 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4066 return CORE_REGS;
4067
4068 return NO_REGS;
4069 }
4070
4071 static bool
4072 aarch64_can_eliminate (const int from, const int to)
4073 {
4074 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4075 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4076
4077 if (frame_pointer_needed)
4078 {
4079 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4080 return true;
4081 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4082 return false;
4083 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4084 && !cfun->calls_alloca)
4085 return true;
4086 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4087 return true;
4088 return false;
4089 }
4090 else
4091 {
4092 /* If we decided that we didn't need a leaf frame pointer but then used
4093 LR in the function, then we'll want a frame pointer after all, so
4094 prevent this elimination to ensure a frame pointer is used.
4095
4096 NOTE: the original value of flag_omit_frame_pointer gets trashed
4097 IFF flag_omit_leaf_frame_pointer is true, so we check the value
4098 of faked_omit_frame_pointer here (which is true when we always
4099 wish to keep non-leaf frame pointers but only wish to keep leaf frame
4100 pointers when LR is clobbered). */
4101 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4102 && df_regs_ever_live_p (LR_REGNUM)
4103 && faked_omit_frame_pointer)
4104 return false;
4105 }
4106
4107 return true;
4108 }
4109
4110 HOST_WIDE_INT
4111 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4112 {
4113 HOST_WIDE_INT frame_size;
4114 HOST_WIDE_INT offset;
4115
4116 aarch64_layout_frame ();
4117 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4118 + crtl->outgoing_args_size
4119 + cfun->machine->saved_varargs_size);
4120
4121 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4122 offset = frame_size;
4123
4124 if (to == HARD_FRAME_POINTER_REGNUM)
4125 {
4126 if (from == ARG_POINTER_REGNUM)
4127 return offset - crtl->outgoing_args_size;
4128
4129 if (from == FRAME_POINTER_REGNUM)
4130 return cfun->machine->frame.saved_regs_size;
4131 }
4132
4133 if (to == STACK_POINTER_REGNUM)
4134 {
4135 if (from == FRAME_POINTER_REGNUM)
4136 {
4137 HOST_WIDE_INT elim = crtl->outgoing_args_size
4138 + cfun->machine->frame.saved_regs_size
4139 - cfun->machine->frame.fp_lr_offset;
4140 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4141 return elim;
4142 }
4143 }
4144
4145 return offset;
4146 }
4147
4148
4149 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4150 previous frame. */
4151
4152 rtx
4153 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4154 {
4155 if (count != 0)
4156 return const0_rtx;
4157 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4158 }
4159
4160
4161 static void
4162 aarch64_asm_trampoline_template (FILE *f)
4163 {
4164 if (TARGET_ILP32)
4165 {
4166 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4167 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4168 }
4169 else
4170 {
4171 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4172 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4173 }
4174 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4175 assemble_aligned_integer (4, const0_rtx);
4176 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4177 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4178 }
4179
4180 static void
4181 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4182 {
4183 rtx fnaddr, mem, a_tramp;
4184 const int tramp_code_sz = 16;
4185
4186 /* Don't need to copy the trailing D-words, we fill those in below. */
4187 emit_block_move (m_tramp, assemble_trampoline_template (),
4188 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4189 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4190 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4191 if (GET_MODE (fnaddr) != ptr_mode)
4192 fnaddr = convert_memory_address (ptr_mode, fnaddr);
4193 emit_move_insn (mem, fnaddr);
4194
4195 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4196 emit_move_insn (mem, chain_value);
4197
4198 /* XXX We should really define a "clear_cache" pattern and use
4199 gen_clear_cache(). */
4200 a_tramp = XEXP (m_tramp, 0);
4201 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4202 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4203 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4204 ptr_mode);
4205 }
4206
4207 static unsigned char
4208 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4209 {
4210 switch (regclass)
4211 {
4212 case CORE_REGS:
4213 case POINTER_REGS:
4214 case GENERAL_REGS:
4215 case ALL_REGS:
4216 case FP_REGS:
4217 case FP_LO_REGS:
4218 return
4219 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4220 (GET_MODE_SIZE (mode) + 7) / 8;
4221 case STACK_REG:
4222 return 1;
4223
4224 case NO_REGS:
4225 return 0;
4226
4227 default:
4228 break;
4229 }
4230 gcc_unreachable ();
4231 }
4232
4233 static reg_class_t
4234 aarch64_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t regclass)
4235 {
4236 return ((regclass == POINTER_REGS || regclass == STACK_REG)
4237 ? GENERAL_REGS : regclass);
4238 }
4239
4240 void
4241 aarch64_asm_output_labelref (FILE* f, const char *name)
4242 {
4243 asm_fprintf (f, "%U%s", name);
4244 }
4245
4246 static void
4247 aarch64_elf_asm_constructor (rtx symbol, int priority)
4248 {
4249 if (priority == DEFAULT_INIT_PRIORITY)
4250 default_ctor_section_asm_out_constructor (symbol, priority);
4251 else
4252 {
4253 section *s;
4254 char buf[18];
4255 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4256 s = get_section (buf, SECTION_WRITE, NULL);
4257 switch_to_section (s);
4258 assemble_align (POINTER_SIZE);
4259 assemble_aligned_integer (POINTER_BYTES, symbol);
4260 }
4261 }
4262
4263 static void
4264 aarch64_elf_asm_destructor (rtx symbol, int priority)
4265 {
4266 if (priority == DEFAULT_INIT_PRIORITY)
4267 default_dtor_section_asm_out_destructor (symbol, priority);
4268 else
4269 {
4270 section *s;
4271 char buf[18];
4272 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4273 s = get_section (buf, SECTION_WRITE, NULL);
4274 switch_to_section (s);
4275 assemble_align (POINTER_SIZE);
4276 assemble_aligned_integer (POINTER_BYTES, symbol);
4277 }
4278 }
4279
4280 const char*
4281 aarch64_output_casesi (rtx *operands)
4282 {
4283 char buf[100];
4284 char label[100];
4285 rtx diff_vec = PATTERN (next_active_insn (operands[2]));
4286 int index;
4287 static const char *const patterns[4][2] =
4288 {
4289 {
4290 "ldrb\t%w3, [%0,%w1,uxtw]",
4291 "add\t%3, %4, %w3, sxtb #2"
4292 },
4293 {
4294 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4295 "add\t%3, %4, %w3, sxth #2"
4296 },
4297 {
4298 "ldr\t%w3, [%0,%w1,uxtw #2]",
4299 "add\t%3, %4, %w3, sxtw #2"
4300 },
4301 /* We assume that DImode is only generated when not optimizing and
4302 that we don't really need 64-bit address offsets. That would
4303 imply an object file with 8GB of code in a single function! */
4304 {
4305 "ldr\t%w3, [%0,%w1,uxtw #2]",
4306 "add\t%3, %4, %w3, sxtw #2"
4307 }
4308 };
4309
4310 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4311
4312 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4313
4314 gcc_assert (index >= 0 && index <= 3);
4315
4316 /* Need to implement table size reduction, by chaning the code below. */
4317 output_asm_insn (patterns[index][0], operands);
4318 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4319 snprintf (buf, sizeof (buf),
4320 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4321 output_asm_insn (buf, operands);
4322 output_asm_insn (patterns[index][1], operands);
4323 output_asm_insn ("br\t%3", operands);
4324 assemble_label (asm_out_file, label);
4325 return "";
4326 }
4327
4328
4329 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4330 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4331 operator. */
4332
4333 int
4334 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4335 {
4336 if (shift >= 0 && shift <= 3)
4337 {
4338 int size;
4339 for (size = 8; size <= 32; size *= 2)
4340 {
4341 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4342 if (mask == bits << shift)
4343 return size;
4344 }
4345 }
4346 return 0;
4347 }
4348
4349 static bool
4350 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4351 const_rtx x ATTRIBUTE_UNUSED)
4352 {
4353 /* We can't use blocks for constants when we're using a per-function
4354 constant pool. */
4355 return false;
4356 }
4357
4358 static section *
4359 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4360 rtx x ATTRIBUTE_UNUSED,
4361 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4362 {
4363 /* Force all constant pool entries into the current function section. */
4364 return function_section (current_function_decl);
4365 }
4366
4367
4368 /* Costs. */
4369
4370 /* Helper function for rtx cost calculation. Strip a shift expression
4371 from X. Returns the inner operand if successful, or the original
4372 expression on failure. */
4373 static rtx
4374 aarch64_strip_shift (rtx x)
4375 {
4376 rtx op = x;
4377
4378 if ((GET_CODE (op) == ASHIFT
4379 || GET_CODE (op) == ASHIFTRT
4380 || GET_CODE (op) == LSHIFTRT)
4381 && CONST_INT_P (XEXP (op, 1)))
4382 return XEXP (op, 0);
4383
4384 if (GET_CODE (op) == MULT
4385 && CONST_INT_P (XEXP (op, 1))
4386 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4387 return XEXP (op, 0);
4388
4389 return x;
4390 }
4391
4392 /* Helper function for rtx cost calculation. Strip a shift or extend
4393 expression from X. Returns the inner operand if successful, or the
4394 original expression on failure. We deal with a number of possible
4395 canonicalization variations here. */
4396 static rtx
4397 aarch64_strip_shift_or_extend (rtx x)
4398 {
4399 rtx op = x;
4400
4401 /* Zero and sign extraction of a widened value. */
4402 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4403 && XEXP (op, 2) == const0_rtx
4404 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4405 XEXP (op, 1)))
4406 return XEXP (XEXP (op, 0), 0);
4407
4408 /* It can also be represented (for zero-extend) as an AND with an
4409 immediate. */
4410 if (GET_CODE (op) == AND
4411 && GET_CODE (XEXP (op, 0)) == MULT
4412 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4413 && CONST_INT_P (XEXP (op, 1))
4414 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4415 INTVAL (XEXP (op, 1))) != 0)
4416 return XEXP (XEXP (op, 0), 0);
4417
4418 /* Now handle extended register, as this may also have an optional
4419 left shift by 1..4. */
4420 if (GET_CODE (op) == ASHIFT
4421 && CONST_INT_P (XEXP (op, 1))
4422 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4423 op = XEXP (op, 0);
4424
4425 if (GET_CODE (op) == ZERO_EXTEND
4426 || GET_CODE (op) == SIGN_EXTEND)
4427 op = XEXP (op, 0);
4428
4429 if (op != x)
4430 return op;
4431
4432 return aarch64_strip_shift (x);
4433 }
4434
4435 /* Calculate the cost of calculating X, storing it in *COST. Result
4436 is true if the total cost of the operation has now been calculated. */
4437 static bool
4438 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4439 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4440 {
4441 rtx op0, op1;
4442 const struct cpu_rtx_cost_table *extra_cost
4443 = aarch64_tune_params->insn_extra_cost;
4444
4445 switch (code)
4446 {
4447 case SET:
4448 op0 = SET_DEST (x);
4449 op1 = SET_SRC (x);
4450
4451 switch (GET_CODE (op0))
4452 {
4453 case MEM:
4454 if (speed)
4455 *cost += extra_cost->memory_store;
4456
4457 if (op1 != const0_rtx)
4458 *cost += rtx_cost (op1, SET, 1, speed);
4459 return true;
4460
4461 case SUBREG:
4462 if (! REG_P (SUBREG_REG (op0)))
4463 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4464 /* Fall through. */
4465 case REG:
4466 /* Cost is just the cost of the RHS of the set. */
4467 *cost += rtx_cost (op1, SET, 1, true);
4468 return true;
4469
4470 case ZERO_EXTRACT: /* Bit-field insertion. */
4471 case SIGN_EXTRACT:
4472 /* Strip any redundant widening of the RHS to meet the width of
4473 the target. */
4474 if (GET_CODE (op1) == SUBREG)
4475 op1 = SUBREG_REG (op1);
4476 if ((GET_CODE (op1) == ZERO_EXTEND
4477 || GET_CODE (op1) == SIGN_EXTEND)
4478 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4479 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4480 >= INTVAL (XEXP (op0, 1))))
4481 op1 = XEXP (op1, 0);
4482 *cost += rtx_cost (op1, SET, 1, speed);
4483 return true;
4484
4485 default:
4486 break;
4487 }
4488 return false;
4489
4490 case MEM:
4491 if (speed)
4492 *cost += extra_cost->memory_load;
4493
4494 return true;
4495
4496 case NEG:
4497 op0 = CONST0_RTX (GET_MODE (x));
4498 op1 = XEXP (x, 0);
4499 goto cost_minus;
4500
4501 case COMPARE:
4502 op0 = XEXP (x, 0);
4503 op1 = XEXP (x, 1);
4504
4505 if (op1 == const0_rtx
4506 && GET_CODE (op0) == AND)
4507 {
4508 x = op0;
4509 goto cost_logic;
4510 }
4511
4512 /* Comparisons can work if the order is swapped.
4513 Canonicalization puts the more complex operation first, but
4514 we want it in op1. */
4515 if (! (REG_P (op0)
4516 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4517 {
4518 op0 = XEXP (x, 1);
4519 op1 = XEXP (x, 0);
4520 }
4521 goto cost_minus;
4522
4523 case MINUS:
4524 op0 = XEXP (x, 0);
4525 op1 = XEXP (x, 1);
4526
4527 cost_minus:
4528 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4529 || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4530 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4531 {
4532 if (op0 != const0_rtx)
4533 *cost += rtx_cost (op0, MINUS, 0, speed);
4534
4535 if (CONST_INT_P (op1))
4536 {
4537 if (!aarch64_uimm12_shift (INTVAL (op1)))
4538 *cost += rtx_cost (op1, MINUS, 1, speed);
4539 }
4540 else
4541 {
4542 op1 = aarch64_strip_shift_or_extend (op1);
4543 *cost += rtx_cost (op1, MINUS, 1, speed);
4544 }
4545 return true;
4546 }
4547
4548 return false;
4549
4550 case PLUS:
4551 op0 = XEXP (x, 0);
4552 op1 = XEXP (x, 1);
4553
4554 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4555 {
4556 if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4557 {
4558 *cost += rtx_cost (op0, PLUS, 0, speed);
4559 }
4560 else
4561 {
4562 rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4563
4564 if (new_op0 == op0
4565 && GET_CODE (op0) == MULT)
4566 {
4567 if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4568 && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4569 || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4570 && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4571 {
4572 *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4573 speed)
4574 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4575 speed)
4576 + rtx_cost (op1, PLUS, 1, speed));
4577 if (speed)
4578 *cost += extra_cost->int_multiply_extend_add;
4579 return true;
4580 }
4581 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4582 + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4583 + rtx_cost (op1, PLUS, 1, speed));
4584
4585 if (speed)
4586 *cost += extra_cost->int_multiply_add;
4587 }
4588
4589 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4590 + rtx_cost (op1, PLUS, 1, speed));
4591 }
4592 return true;
4593 }
4594
4595 return false;
4596
4597 case IOR:
4598 case XOR:
4599 case AND:
4600 cost_logic:
4601 op0 = XEXP (x, 0);
4602 op1 = XEXP (x, 1);
4603
4604 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4605 {
4606 if (CONST_INT_P (op1)
4607 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4608 {
4609 *cost += rtx_cost (op0, AND, 0, speed);
4610 }
4611 else
4612 {
4613 if (GET_CODE (op0) == NOT)
4614 op0 = XEXP (op0, 0);
4615 op0 = aarch64_strip_shift (op0);
4616 *cost += (rtx_cost (op0, AND, 0, speed)
4617 + rtx_cost (op1, AND, 1, speed));
4618 }
4619 return true;
4620 }
4621 return false;
4622
4623 case ZERO_EXTEND:
4624 if ((GET_MODE (x) == DImode
4625 && GET_MODE (XEXP (x, 0)) == SImode)
4626 || GET_CODE (XEXP (x, 0)) == MEM)
4627 {
4628 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4629 return true;
4630 }
4631 return false;
4632
4633 case SIGN_EXTEND:
4634 if (GET_CODE (XEXP (x, 0)) == MEM)
4635 {
4636 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4637 return true;
4638 }
4639 return false;
4640
4641 case ROTATE:
4642 if (!CONST_INT_P (XEXP (x, 1)))
4643 *cost += COSTS_N_INSNS (2);
4644 /* Fall through. */
4645 case ROTATERT:
4646 case LSHIFTRT:
4647 case ASHIFT:
4648 case ASHIFTRT:
4649
4650 /* Shifting by a register often takes an extra cycle. */
4651 if (speed && !CONST_INT_P (XEXP (x, 1)))
4652 *cost += extra_cost->register_shift;
4653
4654 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4655 return true;
4656
4657 case HIGH:
4658 if (!CONSTANT_P (XEXP (x, 0)))
4659 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4660 return true;
4661
4662 case LO_SUM:
4663 if (!CONSTANT_P (XEXP (x, 1)))
4664 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4665 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4666 return true;
4667
4668 case ZERO_EXTRACT:
4669 case SIGN_EXTRACT:
4670 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4671 return true;
4672
4673 case MULT:
4674 op0 = XEXP (x, 0);
4675 op1 = XEXP (x, 1);
4676
4677 *cost = COSTS_N_INSNS (1);
4678 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4679 {
4680 if (CONST_INT_P (op1)
4681 && exact_log2 (INTVAL (op1)) > 0)
4682 {
4683 *cost += rtx_cost (op0, ASHIFT, 0, speed);
4684 return true;
4685 }
4686
4687 if ((GET_CODE (op0) == ZERO_EXTEND
4688 && GET_CODE (op1) == ZERO_EXTEND)
4689 || (GET_CODE (op0) == SIGN_EXTEND
4690 && GET_CODE (op1) == SIGN_EXTEND))
4691 {
4692 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4693 + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4694 if (speed)
4695 *cost += extra_cost->int_multiply_extend;
4696 return true;
4697 }
4698
4699 if (speed)
4700 *cost += extra_cost->int_multiply;
4701 }
4702 else if (speed)
4703 {
4704 if (GET_MODE (x) == DFmode)
4705 *cost += extra_cost->double_multiply;
4706 else if (GET_MODE (x) == SFmode)
4707 *cost += extra_cost->float_multiply;
4708 }
4709
4710 return false; /* All arguments need to be in registers. */
4711
4712 case MOD:
4713 case UMOD:
4714 *cost = COSTS_N_INSNS (2);
4715 if (speed)
4716 {
4717 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4718 *cost += (extra_cost->int_multiply_add
4719 + extra_cost->int_divide);
4720 else if (GET_MODE (x) == DFmode)
4721 *cost += (extra_cost->double_multiply
4722 + extra_cost->double_divide);
4723 else if (GET_MODE (x) == SFmode)
4724 *cost += (extra_cost->float_multiply
4725 + extra_cost->float_divide);
4726 }
4727 return false; /* All arguments need to be in registers. */
4728
4729 case DIV:
4730 case UDIV:
4731 *cost = COSTS_N_INSNS (1);
4732 if (speed)
4733 {
4734 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4735 *cost += extra_cost->int_divide;
4736 else if (GET_MODE (x) == DFmode)
4737 *cost += extra_cost->double_divide;
4738 else if (GET_MODE (x) == SFmode)
4739 *cost += extra_cost->float_divide;
4740 }
4741 return false; /* All arguments need to be in registers. */
4742
4743 default:
4744 break;
4745 }
4746 return false;
4747 }
4748
4749 static int
4750 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4751 enum machine_mode mode ATTRIBUTE_UNUSED,
4752 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4753 {
4754 enum rtx_code c = GET_CODE (x);
4755 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4756
4757 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4758 return addr_cost->pre_modify;
4759
4760 if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4761 return addr_cost->post_modify;
4762
4763 if (c == PLUS)
4764 {
4765 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4766 return addr_cost->imm_offset;
4767 else if (GET_CODE (XEXP (x, 0)) == MULT
4768 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4769 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4770 return addr_cost->register_extend;
4771
4772 return addr_cost->register_offset;
4773 }
4774 else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4775 return addr_cost->imm_offset;
4776
4777 return 0;
4778 }
4779
4780 static int
4781 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4782 reg_class_t from, reg_class_t to)
4783 {
4784 const struct cpu_regmove_cost *regmove_cost
4785 = aarch64_tune_params->regmove_cost;
4786
4787 if (from == GENERAL_REGS && to == GENERAL_REGS)
4788 return regmove_cost->GP2GP;
4789 else if (from == GENERAL_REGS)
4790 return regmove_cost->GP2FP;
4791 else if (to == GENERAL_REGS)
4792 return regmove_cost->FP2GP;
4793
4794 /* When AdvSIMD instructions are disabled it is not possible to move
4795 a 128-bit value directly between Q registers. This is handled in
4796 secondary reload. A general register is used as a scratch to move
4797 the upper DI value and the lower DI value is moved directly,
4798 hence the cost is the sum of three moves. */
4799
4800 if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4801 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4802
4803 return regmove_cost->FP2FP;
4804 }
4805
4806 static int
4807 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4808 reg_class_t rclass ATTRIBUTE_UNUSED,
4809 bool in ATTRIBUTE_UNUSED)
4810 {
4811 return aarch64_tune_params->memmov_cost;
4812 }
4813
4814 /* Vectorizer cost model target hooks. */
4815
4816 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4817 static int
4818 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4819 tree vectype,
4820 int misalign ATTRIBUTE_UNUSED)
4821 {
4822 unsigned elements;
4823
4824 switch (type_of_cost)
4825 {
4826 case scalar_stmt:
4827 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
4828
4829 case scalar_load:
4830 return aarch64_tune_params->vec_costs->scalar_load_cost;
4831
4832 case scalar_store:
4833 return aarch64_tune_params->vec_costs->scalar_store_cost;
4834
4835 case vector_stmt:
4836 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4837
4838 case vector_load:
4839 return aarch64_tune_params->vec_costs->vec_align_load_cost;
4840
4841 case vector_store:
4842 return aarch64_tune_params->vec_costs->vec_store_cost;
4843
4844 case vec_to_scalar:
4845 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
4846
4847 case scalar_to_vec:
4848 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
4849
4850 case unaligned_load:
4851 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
4852
4853 case unaligned_store:
4854 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
4855
4856 case cond_branch_taken:
4857 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
4858
4859 case cond_branch_not_taken:
4860 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
4861
4862 case vec_perm:
4863 case vec_promote_demote:
4864 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4865
4866 case vec_construct:
4867 elements = TYPE_VECTOR_SUBPARTS (vectype);
4868 return elements / 2 + 1;
4869
4870 default:
4871 gcc_unreachable ();
4872 }
4873 }
4874
4875 /* Implement targetm.vectorize.add_stmt_cost. */
4876 static unsigned
4877 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4878 struct _stmt_vec_info *stmt_info, int misalign,
4879 enum vect_cost_model_location where)
4880 {
4881 unsigned *cost = (unsigned *) data;
4882 unsigned retval = 0;
4883
4884 if (flag_vect_cost_model)
4885 {
4886 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4887 int stmt_cost =
4888 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
4889
4890 /* Statements in an inner loop relative to the loop being
4891 vectorized are weighted more heavily. The value here is
4892 a function (linear for now) of the loop nest level. */
4893 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4894 {
4895 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4896 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
4897 unsigned nest_level = loop_depth (loop);
4898
4899 count *= nest_level;
4900 }
4901
4902 retval = (unsigned) (count * stmt_cost);
4903 cost[where] += retval;
4904 }
4905
4906 return retval;
4907 }
4908
4909 static void initialize_aarch64_code_model (void);
4910
4911 /* Parse the architecture extension string. */
4912
4913 static void
4914 aarch64_parse_extension (char *str)
4915 {
4916 /* The extension string is parsed left to right. */
4917 const struct aarch64_option_extension *opt = NULL;
4918
4919 /* Flag to say whether we are adding or removing an extension. */
4920 int adding_ext = -1;
4921
4922 while (str != NULL && *str != 0)
4923 {
4924 char *ext;
4925 size_t len;
4926
4927 str++;
4928 ext = strchr (str, '+');
4929
4930 if (ext != NULL)
4931 len = ext - str;
4932 else
4933 len = strlen (str);
4934
4935 if (len >= 2 && strncmp (str, "no", 2) == 0)
4936 {
4937 adding_ext = 0;
4938 len -= 2;
4939 str += 2;
4940 }
4941 else if (len > 0)
4942 adding_ext = 1;
4943
4944 if (len == 0)
4945 {
4946 error ("missing feature modifier after %qs", "+no");
4947 return;
4948 }
4949
4950 /* Scan over the extensions table trying to find an exact match. */
4951 for (opt = all_extensions; opt->name != NULL; opt++)
4952 {
4953 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
4954 {
4955 /* Add or remove the extension. */
4956 if (adding_ext)
4957 aarch64_isa_flags |= opt->flags_on;
4958 else
4959 aarch64_isa_flags &= ~(opt->flags_off);
4960 break;
4961 }
4962 }
4963
4964 if (opt->name == NULL)
4965 {
4966 /* Extension not found in list. */
4967 error ("unknown feature modifier %qs", str);
4968 return;
4969 }
4970
4971 str = ext;
4972 };
4973
4974 return;
4975 }
4976
4977 /* Parse the ARCH string. */
4978
4979 static void
4980 aarch64_parse_arch (void)
4981 {
4982 char *ext;
4983 const struct processor *arch;
4984 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
4985 size_t len;
4986
4987 strcpy (str, aarch64_arch_string);
4988
4989 ext = strchr (str, '+');
4990
4991 if (ext != NULL)
4992 len = ext - str;
4993 else
4994 len = strlen (str);
4995
4996 if (len == 0)
4997 {
4998 error ("missing arch name in -march=%qs", str);
4999 return;
5000 }
5001
5002 /* Loop through the list of supported ARCHs to find a match. */
5003 for (arch = all_architectures; arch->name != NULL; arch++)
5004 {
5005 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5006 {
5007 selected_arch = arch;
5008 aarch64_isa_flags = selected_arch->flags;
5009 selected_cpu = &all_cores[selected_arch->core];
5010
5011 if (ext != NULL)
5012 {
5013 /* ARCH string contains at least one extension. */
5014 aarch64_parse_extension (ext);
5015 }
5016
5017 return;
5018 }
5019 }
5020
5021 /* ARCH name not found in list. */
5022 error ("unknown value %qs for -march", str);
5023 return;
5024 }
5025
5026 /* Parse the CPU string. */
5027
5028 static void
5029 aarch64_parse_cpu (void)
5030 {
5031 char *ext;
5032 const struct processor *cpu;
5033 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5034 size_t len;
5035
5036 strcpy (str, aarch64_cpu_string);
5037
5038 ext = strchr (str, '+');
5039
5040 if (ext != NULL)
5041 len = ext - str;
5042 else
5043 len = strlen (str);
5044
5045 if (len == 0)
5046 {
5047 error ("missing cpu name in -mcpu=%qs", str);
5048 return;
5049 }
5050
5051 /* Loop through the list of supported CPUs to find a match. */
5052 for (cpu = all_cores; cpu->name != NULL; cpu++)
5053 {
5054 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5055 {
5056 selected_cpu = cpu;
5057 aarch64_isa_flags = selected_cpu->flags;
5058
5059 if (ext != NULL)
5060 {
5061 /* CPU string contains at least one extension. */
5062 aarch64_parse_extension (ext);
5063 }
5064
5065 return;
5066 }
5067 }
5068
5069 /* CPU name not found in list. */
5070 error ("unknown value %qs for -mcpu", str);
5071 return;
5072 }
5073
5074 /* Parse the TUNE string. */
5075
5076 static void
5077 aarch64_parse_tune (void)
5078 {
5079 const struct processor *cpu;
5080 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5081 strcpy (str, aarch64_tune_string);
5082
5083 /* Loop through the list of supported CPUs to find a match. */
5084 for (cpu = all_cores; cpu->name != NULL; cpu++)
5085 {
5086 if (strcmp (cpu->name, str) == 0)
5087 {
5088 selected_tune = cpu;
5089 return;
5090 }
5091 }
5092
5093 /* CPU name not found in list. */
5094 error ("unknown value %qs for -mtune", str);
5095 return;
5096 }
5097
5098
5099 /* Implement TARGET_OPTION_OVERRIDE. */
5100
5101 static void
5102 aarch64_override_options (void)
5103 {
5104 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
5105 otherwise march remains undefined. mtune can be used with either march or
5106 mcpu. */
5107
5108 if (aarch64_arch_string)
5109 {
5110 aarch64_parse_arch ();
5111 aarch64_cpu_string = NULL;
5112 }
5113
5114 if (aarch64_cpu_string)
5115 {
5116 aarch64_parse_cpu ();
5117 selected_arch = NULL;
5118 }
5119
5120 if (aarch64_tune_string)
5121 {
5122 aarch64_parse_tune ();
5123 }
5124
5125 initialize_aarch64_code_model ();
5126
5127 aarch64_build_bitmask_table ();
5128
5129 /* This target defaults to strict volatile bitfields. */
5130 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5131 flag_strict_volatile_bitfields = 1;
5132
5133 /* If the user did not specify a processor, choose the default
5134 one for them. This will be the CPU set during configuration using
5135 --with-cpu, otherwise it is "generic". */
5136 if (!selected_cpu)
5137 {
5138 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5139 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5140 }
5141
5142 gcc_assert (selected_cpu);
5143
5144 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
5145 if (!selected_tune)
5146 selected_tune = &all_cores[selected_cpu->core];
5147
5148 aarch64_tune_flags = selected_tune->flags;
5149 aarch64_tune = selected_tune->core;
5150 aarch64_tune_params = selected_tune->tune;
5151
5152 aarch64_override_options_after_change ();
5153 }
5154
5155 /* Implement targetm.override_options_after_change. */
5156
5157 static void
5158 aarch64_override_options_after_change (void)
5159 {
5160 faked_omit_frame_pointer = false;
5161
5162 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
5163 that aarch64_frame_pointer_required will be called. We need to remember
5164 whether flag_omit_frame_pointer was turned on normally or just faked. */
5165
5166 if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
5167 {
5168 flag_omit_frame_pointer = true;
5169 faked_omit_frame_pointer = true;
5170 }
5171 }
5172
5173 static struct machine_function *
5174 aarch64_init_machine_status (void)
5175 {
5176 struct machine_function *machine;
5177 machine = ggc_alloc_cleared_machine_function ();
5178 return machine;
5179 }
5180
5181 void
5182 aarch64_init_expanders (void)
5183 {
5184 init_machine_status = aarch64_init_machine_status;
5185 }
5186
5187 /* A checking mechanism for the implementation of the various code models. */
5188 static void
5189 initialize_aarch64_code_model (void)
5190 {
5191 if (flag_pic)
5192 {
5193 switch (aarch64_cmodel_var)
5194 {
5195 case AARCH64_CMODEL_TINY:
5196 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5197 break;
5198 case AARCH64_CMODEL_SMALL:
5199 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5200 break;
5201 case AARCH64_CMODEL_LARGE:
5202 sorry ("code model %qs with -f%s", "large",
5203 flag_pic > 1 ? "PIC" : "pic");
5204 default:
5205 gcc_unreachable ();
5206 }
5207 }
5208 else
5209 aarch64_cmodel = aarch64_cmodel_var;
5210 }
5211
5212 /* Return true if SYMBOL_REF X binds locally. */
5213
5214 static bool
5215 aarch64_symbol_binds_local_p (const_rtx x)
5216 {
5217 return (SYMBOL_REF_DECL (x)
5218 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5219 : SYMBOL_REF_LOCAL_P (x));
5220 }
5221
5222 /* Return true if SYMBOL_REF X is thread local */
5223 static bool
5224 aarch64_tls_symbol_p (rtx x)
5225 {
5226 if (! TARGET_HAVE_TLS)
5227 return false;
5228
5229 if (GET_CODE (x) != SYMBOL_REF)
5230 return false;
5231
5232 return SYMBOL_REF_TLS_MODEL (x) != 0;
5233 }
5234
5235 /* Classify a TLS symbol into one of the TLS kinds. */
5236 enum aarch64_symbol_type
5237 aarch64_classify_tls_symbol (rtx x)
5238 {
5239 enum tls_model tls_kind = tls_symbolic_operand_type (x);
5240
5241 switch (tls_kind)
5242 {
5243 case TLS_MODEL_GLOBAL_DYNAMIC:
5244 case TLS_MODEL_LOCAL_DYNAMIC:
5245 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5246
5247 case TLS_MODEL_INITIAL_EXEC:
5248 return SYMBOL_SMALL_GOTTPREL;
5249
5250 case TLS_MODEL_LOCAL_EXEC:
5251 return SYMBOL_SMALL_TPREL;
5252
5253 case TLS_MODEL_EMULATED:
5254 case TLS_MODEL_NONE:
5255 return SYMBOL_FORCE_TO_MEM;
5256
5257 default:
5258 gcc_unreachable ();
5259 }
5260 }
5261
5262 /* Return the method that should be used to access SYMBOL_REF or
5263 LABEL_REF X in context CONTEXT. */
5264
5265 enum aarch64_symbol_type
5266 aarch64_classify_symbol (rtx x,
5267 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5268 {
5269 if (GET_CODE (x) == LABEL_REF)
5270 {
5271 switch (aarch64_cmodel)
5272 {
5273 case AARCH64_CMODEL_LARGE:
5274 return SYMBOL_FORCE_TO_MEM;
5275
5276 case AARCH64_CMODEL_TINY_PIC:
5277 case AARCH64_CMODEL_TINY:
5278 return SYMBOL_TINY_ABSOLUTE;
5279
5280 case AARCH64_CMODEL_SMALL_PIC:
5281 case AARCH64_CMODEL_SMALL:
5282 return SYMBOL_SMALL_ABSOLUTE;
5283
5284 default:
5285 gcc_unreachable ();
5286 }
5287 }
5288
5289 if (GET_CODE (x) == SYMBOL_REF)
5290 {
5291 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
5292 || CONSTANT_POOL_ADDRESS_P (x))
5293 return SYMBOL_FORCE_TO_MEM;
5294
5295 if (aarch64_tls_symbol_p (x))
5296 return aarch64_classify_tls_symbol (x);
5297
5298 switch (aarch64_cmodel)
5299 {
5300 case AARCH64_CMODEL_TINY:
5301 if (SYMBOL_REF_WEAK (x))
5302 return SYMBOL_FORCE_TO_MEM;
5303 return SYMBOL_TINY_ABSOLUTE;
5304
5305 case AARCH64_CMODEL_SMALL:
5306 if (SYMBOL_REF_WEAK (x))
5307 return SYMBOL_FORCE_TO_MEM;
5308 return SYMBOL_SMALL_ABSOLUTE;
5309
5310 case AARCH64_CMODEL_TINY_PIC:
5311 if (!aarch64_symbol_binds_local_p (x))
5312 return SYMBOL_TINY_GOT;
5313 return SYMBOL_TINY_ABSOLUTE;
5314
5315 case AARCH64_CMODEL_SMALL_PIC:
5316 if (!aarch64_symbol_binds_local_p (x))
5317 return SYMBOL_SMALL_GOT;
5318 return SYMBOL_SMALL_ABSOLUTE;
5319
5320 default:
5321 gcc_unreachable ();
5322 }
5323 }
5324
5325 /* By default push everything into the constant pool. */
5326 return SYMBOL_FORCE_TO_MEM;
5327 }
5328
5329 bool
5330 aarch64_constant_address_p (rtx x)
5331 {
5332 return (CONSTANT_P (x) && memory_address_p (DImode, x));
5333 }
5334
5335 bool
5336 aarch64_legitimate_pic_operand_p (rtx x)
5337 {
5338 if (GET_CODE (x) == SYMBOL_REF
5339 || (GET_CODE (x) == CONST
5340 && GET_CODE (XEXP (x, 0)) == PLUS
5341 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5342 return false;
5343
5344 return true;
5345 }
5346
5347 /* Return true if X holds either a quarter-precision or
5348 floating-point +0.0 constant. */
5349 static bool
5350 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5351 {
5352 if (!CONST_DOUBLE_P (x))
5353 return false;
5354
5355 /* TODO: We could handle moving 0.0 to a TFmode register,
5356 but first we would like to refactor the movtf_aarch64
5357 to be more amicable to split moves properly and
5358 correctly gate on TARGET_SIMD. For now - reject all
5359 constants which are not to SFmode or DFmode registers. */
5360 if (!(mode == SFmode || mode == DFmode))
5361 return false;
5362
5363 if (aarch64_float_const_zero_rtx_p (x))
5364 return true;
5365 return aarch64_float_const_representable_p (x);
5366 }
5367
5368 static bool
5369 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5370 {
5371 /* Do not allow vector struct mode constants. We could support
5372 0 and -1 easily, but they need support in aarch64-simd.md. */
5373 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5374 return false;
5375
5376 /* This could probably go away because
5377 we now decompose CONST_INTs according to expand_mov_immediate. */
5378 if ((GET_CODE (x) == CONST_VECTOR
5379 && aarch64_simd_valid_immediate (x, mode, false, NULL))
5380 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5381 return !targetm.cannot_force_const_mem (mode, x);
5382
5383 if (GET_CODE (x) == HIGH
5384 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5385 return true;
5386
5387 return aarch64_constant_address_p (x);
5388 }
5389
5390 rtx
5391 aarch64_load_tp (rtx target)
5392 {
5393 if (!target
5394 || GET_MODE (target) != Pmode
5395 || !register_operand (target, Pmode))
5396 target = gen_reg_rtx (Pmode);
5397
5398 /* Can return in any reg. */
5399 emit_insn (gen_aarch64_load_tp_hard (target));
5400 return target;
5401 }
5402
5403 /* On AAPCS systems, this is the "struct __va_list". */
5404 static GTY(()) tree va_list_type;
5405
5406 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5407 Return the type to use as __builtin_va_list.
5408
5409 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5410
5411 struct __va_list
5412 {
5413 void *__stack;
5414 void *__gr_top;
5415 void *__vr_top;
5416 int __gr_offs;
5417 int __vr_offs;
5418 }; */
5419
5420 static tree
5421 aarch64_build_builtin_va_list (void)
5422 {
5423 tree va_list_name;
5424 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5425
5426 /* Create the type. */
5427 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5428 /* Give it the required name. */
5429 va_list_name = build_decl (BUILTINS_LOCATION,
5430 TYPE_DECL,
5431 get_identifier ("__va_list"),
5432 va_list_type);
5433 DECL_ARTIFICIAL (va_list_name) = 1;
5434 TYPE_NAME (va_list_type) = va_list_name;
5435 TYPE_STUB_DECL (va_list_type) = va_list_name;
5436
5437 /* Create the fields. */
5438 f_stack = build_decl (BUILTINS_LOCATION,
5439 FIELD_DECL, get_identifier ("__stack"),
5440 ptr_type_node);
5441 f_grtop = build_decl (BUILTINS_LOCATION,
5442 FIELD_DECL, get_identifier ("__gr_top"),
5443 ptr_type_node);
5444 f_vrtop = build_decl (BUILTINS_LOCATION,
5445 FIELD_DECL, get_identifier ("__vr_top"),
5446 ptr_type_node);
5447 f_groff = build_decl (BUILTINS_LOCATION,
5448 FIELD_DECL, get_identifier ("__gr_offs"),
5449 integer_type_node);
5450 f_vroff = build_decl (BUILTINS_LOCATION,
5451 FIELD_DECL, get_identifier ("__vr_offs"),
5452 integer_type_node);
5453
5454 DECL_ARTIFICIAL (f_stack) = 1;
5455 DECL_ARTIFICIAL (f_grtop) = 1;
5456 DECL_ARTIFICIAL (f_vrtop) = 1;
5457 DECL_ARTIFICIAL (f_groff) = 1;
5458 DECL_ARTIFICIAL (f_vroff) = 1;
5459
5460 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5461 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5462 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5463 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5464 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5465
5466 TYPE_FIELDS (va_list_type) = f_stack;
5467 DECL_CHAIN (f_stack) = f_grtop;
5468 DECL_CHAIN (f_grtop) = f_vrtop;
5469 DECL_CHAIN (f_vrtop) = f_groff;
5470 DECL_CHAIN (f_groff) = f_vroff;
5471
5472 /* Compute its layout. */
5473 layout_type (va_list_type);
5474
5475 return va_list_type;
5476 }
5477
5478 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5479 static void
5480 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5481 {
5482 const CUMULATIVE_ARGS *cum;
5483 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5484 tree stack, grtop, vrtop, groff, vroff;
5485 tree t;
5486 int gr_save_area_size;
5487 int vr_save_area_size;
5488 int vr_offset;
5489
5490 cum = &crtl->args.info;
5491 gr_save_area_size
5492 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5493 vr_save_area_size
5494 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5495
5496 if (TARGET_GENERAL_REGS_ONLY)
5497 {
5498 if (cum->aapcs_nvrn > 0)
5499 sorry ("%qs and floating point or vector arguments",
5500 "-mgeneral-regs-only");
5501 vr_save_area_size = 0;
5502 }
5503
5504 f_stack = TYPE_FIELDS (va_list_type_node);
5505 f_grtop = DECL_CHAIN (f_stack);
5506 f_vrtop = DECL_CHAIN (f_grtop);
5507 f_groff = DECL_CHAIN (f_vrtop);
5508 f_vroff = DECL_CHAIN (f_groff);
5509
5510 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5511 NULL_TREE);
5512 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5513 NULL_TREE);
5514 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5515 NULL_TREE);
5516 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5517 NULL_TREE);
5518 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5519 NULL_TREE);
5520
5521 /* Emit code to initialize STACK, which points to the next varargs stack
5522 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5523 by named arguments. STACK is 8-byte aligned. */
5524 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5525 if (cum->aapcs_stack_size > 0)
5526 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5527 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5528 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5529
5530 /* Emit code to initialize GRTOP, the top of the GR save area.
5531 virtual_incoming_args_rtx should have been 16 byte aligned. */
5532 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5533 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5534 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5535
5536 /* Emit code to initialize VRTOP, the top of the VR save area.
5537 This address is gr_save_area_bytes below GRTOP, rounded
5538 down to the next 16-byte boundary. */
5539 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5540 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5541 STACK_BOUNDARY / BITS_PER_UNIT);
5542
5543 if (vr_offset)
5544 t = fold_build_pointer_plus_hwi (t, -vr_offset);
5545 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5546 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5547
5548 /* Emit code to initialize GROFF, the offset from GRTOP of the
5549 next GPR argument. */
5550 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5551 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5552 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5553
5554 /* Likewise emit code to initialize VROFF, the offset from FTOP
5555 of the next VR argument. */
5556 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5557 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5558 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5559 }
5560
5561 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5562
5563 static tree
5564 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5565 gimple_seq *post_p ATTRIBUTE_UNUSED)
5566 {
5567 tree addr;
5568 bool indirect_p;
5569 bool is_ha; /* is HFA or HVA. */
5570 bool dw_align; /* double-word align. */
5571 enum machine_mode ag_mode = VOIDmode;
5572 int nregs;
5573 enum machine_mode mode;
5574
5575 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5576 tree stack, f_top, f_off, off, arg, roundup, on_stack;
5577 HOST_WIDE_INT size, rsize, adjust, align;
5578 tree t, u, cond1, cond2;
5579
5580 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5581 if (indirect_p)
5582 type = build_pointer_type (type);
5583
5584 mode = TYPE_MODE (type);
5585
5586 f_stack = TYPE_FIELDS (va_list_type_node);
5587 f_grtop = DECL_CHAIN (f_stack);
5588 f_vrtop = DECL_CHAIN (f_grtop);
5589 f_groff = DECL_CHAIN (f_vrtop);
5590 f_vroff = DECL_CHAIN (f_groff);
5591
5592 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5593 f_stack, NULL_TREE);
5594 size = int_size_in_bytes (type);
5595 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5596
5597 dw_align = false;
5598 adjust = 0;
5599 if (aarch64_vfp_is_call_or_return_candidate (mode,
5600 type,
5601 &ag_mode,
5602 &nregs,
5603 &is_ha))
5604 {
5605 /* TYPE passed in fp/simd registers. */
5606 if (TARGET_GENERAL_REGS_ONLY)
5607 sorry ("%qs and floating point or vector arguments",
5608 "-mgeneral-regs-only");
5609
5610 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5611 unshare_expr (valist), f_vrtop, NULL_TREE);
5612 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5613 unshare_expr (valist), f_vroff, NULL_TREE);
5614
5615 rsize = nregs * UNITS_PER_VREG;
5616
5617 if (is_ha)
5618 {
5619 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5620 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5621 }
5622 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5623 && size < UNITS_PER_VREG)
5624 {
5625 adjust = UNITS_PER_VREG - size;
5626 }
5627 }
5628 else
5629 {
5630 /* TYPE passed in general registers. */
5631 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5632 unshare_expr (valist), f_grtop, NULL_TREE);
5633 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5634 unshare_expr (valist), f_groff, NULL_TREE);
5635 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5636 nregs = rsize / UNITS_PER_WORD;
5637
5638 if (align > 8)
5639 dw_align = true;
5640
5641 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5642 && size < UNITS_PER_WORD)
5643 {
5644 adjust = UNITS_PER_WORD - size;
5645 }
5646 }
5647
5648 /* Get a local temporary for the field value. */
5649 off = get_initialized_tmp_var (f_off, pre_p, NULL);
5650
5651 /* Emit code to branch if off >= 0. */
5652 t = build2 (GE_EXPR, boolean_type_node, off,
5653 build_int_cst (TREE_TYPE (off), 0));
5654 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5655
5656 if (dw_align)
5657 {
5658 /* Emit: offs = (offs + 15) & -16. */
5659 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5660 build_int_cst (TREE_TYPE (off), 15));
5661 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5662 build_int_cst (TREE_TYPE (off), -16));
5663 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5664 }
5665 else
5666 roundup = NULL;
5667
5668 /* Update ap.__[g|v]r_offs */
5669 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5670 build_int_cst (TREE_TYPE (off), rsize));
5671 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5672
5673 /* String up. */
5674 if (roundup)
5675 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5676
5677 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5678 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5679 build_int_cst (TREE_TYPE (f_off), 0));
5680 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5681
5682 /* String up: make sure the assignment happens before the use. */
5683 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5684 COND_EXPR_ELSE (cond1) = t;
5685
5686 /* Prepare the trees handling the argument that is passed on the stack;
5687 the top level node will store in ON_STACK. */
5688 arg = get_initialized_tmp_var (stack, pre_p, NULL);
5689 if (align > 8)
5690 {
5691 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5692 t = fold_convert (intDI_type_node, arg);
5693 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5694 build_int_cst (TREE_TYPE (t), 15));
5695 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5696 build_int_cst (TREE_TYPE (t), -16));
5697 t = fold_convert (TREE_TYPE (arg), t);
5698 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5699 }
5700 else
5701 roundup = NULL;
5702 /* Advance ap.__stack */
5703 t = fold_convert (intDI_type_node, arg);
5704 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5705 build_int_cst (TREE_TYPE (t), size + 7));
5706 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5707 build_int_cst (TREE_TYPE (t), -8));
5708 t = fold_convert (TREE_TYPE (arg), t);
5709 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5710 /* String up roundup and advance. */
5711 if (roundup)
5712 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5713 /* String up with arg */
5714 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5715 /* Big-endianness related address adjustment. */
5716 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5717 && size < UNITS_PER_WORD)
5718 {
5719 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5720 size_int (UNITS_PER_WORD - size));
5721 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5722 }
5723
5724 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5725 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5726
5727 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5728 t = off;
5729 if (adjust)
5730 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5731 build_int_cst (TREE_TYPE (off), adjust));
5732
5733 t = fold_convert (sizetype, t);
5734 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5735
5736 if (is_ha)
5737 {
5738 /* type ha; // treat as "struct {ftype field[n];}"
5739 ... [computing offs]
5740 for (i = 0; i <nregs; ++i, offs += 16)
5741 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5742 return ha; */
5743 int i;
5744 tree tmp_ha, field_t, field_ptr_t;
5745
5746 /* Declare a local variable. */
5747 tmp_ha = create_tmp_var_raw (type, "ha");
5748 gimple_add_tmp_var (tmp_ha);
5749
5750 /* Establish the base type. */
5751 switch (ag_mode)
5752 {
5753 case SFmode:
5754 field_t = float_type_node;
5755 field_ptr_t = float_ptr_type_node;
5756 break;
5757 case DFmode:
5758 field_t = double_type_node;
5759 field_ptr_t = double_ptr_type_node;
5760 break;
5761 case TFmode:
5762 field_t = long_double_type_node;
5763 field_ptr_t = long_double_ptr_type_node;
5764 break;
5765 /* The half precision and quad precision are not fully supported yet. Enable
5766 the following code after the support is complete. Need to find the correct
5767 type node for __fp16 *. */
5768 #if 0
5769 case HFmode:
5770 field_t = float_type_node;
5771 field_ptr_t = float_ptr_type_node;
5772 break;
5773 #endif
5774 case V2SImode:
5775 case V4SImode:
5776 {
5777 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5778 field_t = build_vector_type_for_mode (innertype, ag_mode);
5779 field_ptr_t = build_pointer_type (field_t);
5780 }
5781 break;
5782 default:
5783 gcc_assert (0);
5784 }
5785
5786 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5787 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5788 addr = t;
5789 t = fold_convert (field_ptr_t, addr);
5790 t = build2 (MODIFY_EXPR, field_t,
5791 build1 (INDIRECT_REF, field_t, tmp_ha),
5792 build1 (INDIRECT_REF, field_t, t));
5793
5794 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5795 for (i = 1; i < nregs; ++i)
5796 {
5797 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5798 u = fold_convert (field_ptr_t, addr);
5799 u = build2 (MODIFY_EXPR, field_t,
5800 build2 (MEM_REF, field_t, tmp_ha,
5801 build_int_cst (field_ptr_t,
5802 (i *
5803 int_size_in_bytes (field_t)))),
5804 build1 (INDIRECT_REF, field_t, u));
5805 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5806 }
5807
5808 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5809 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5810 }
5811
5812 COND_EXPR_ELSE (cond2) = t;
5813 addr = fold_convert (build_pointer_type (type), cond1);
5814 addr = build_va_arg_indirect_ref (addr);
5815
5816 if (indirect_p)
5817 addr = build_va_arg_indirect_ref (addr);
5818
5819 return addr;
5820 }
5821
5822 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
5823
5824 static void
5825 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5826 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5827 int no_rtl)
5828 {
5829 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5830 CUMULATIVE_ARGS local_cum;
5831 int gr_saved, vr_saved;
5832
5833 /* The caller has advanced CUM up to, but not beyond, the last named
5834 argument. Advance a local copy of CUM past the last "real" named
5835 argument, to find out how many registers are left over. */
5836 local_cum = *cum;
5837 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5838
5839 /* Found out how many registers we need to save. */
5840 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5841 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5842
5843 if (TARGET_GENERAL_REGS_ONLY)
5844 {
5845 if (local_cum.aapcs_nvrn > 0)
5846 sorry ("%qs and floating point or vector arguments",
5847 "-mgeneral-regs-only");
5848 vr_saved = 0;
5849 }
5850
5851 if (!no_rtl)
5852 {
5853 if (gr_saved > 0)
5854 {
5855 rtx ptr, mem;
5856
5857 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5858 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5859 - gr_saved * UNITS_PER_WORD);
5860 mem = gen_frame_mem (BLKmode, ptr);
5861 set_mem_alias_set (mem, get_varargs_alias_set ());
5862
5863 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5864 mem, gr_saved);
5865 }
5866 if (vr_saved > 0)
5867 {
5868 /* We can't use move_block_from_reg, because it will use
5869 the wrong mode, storing D regs only. */
5870 enum machine_mode mode = TImode;
5871 int off, i;
5872
5873 /* Set OFF to the offset from virtual_incoming_args_rtx of
5874 the first vector register. The VR save area lies below
5875 the GR one, and is aligned to 16 bytes. */
5876 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5877 STACK_BOUNDARY / BITS_PER_UNIT);
5878 off -= vr_saved * UNITS_PER_VREG;
5879
5880 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5881 {
5882 rtx ptr, mem;
5883
5884 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5885 mem = gen_frame_mem (mode, ptr);
5886 set_mem_alias_set (mem, get_varargs_alias_set ());
5887 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5888 off += UNITS_PER_VREG;
5889 }
5890 }
5891 }
5892
5893 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5894 any complication of having crtl->args.pretend_args_size changed. */
5895 cfun->machine->saved_varargs_size
5896 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5897 STACK_BOUNDARY / BITS_PER_UNIT)
5898 + vr_saved * UNITS_PER_VREG);
5899 }
5900
5901 static void
5902 aarch64_conditional_register_usage (void)
5903 {
5904 int i;
5905 if (!TARGET_FLOAT)
5906 {
5907 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5908 {
5909 fixed_regs[i] = 1;
5910 call_used_regs[i] = 1;
5911 }
5912 }
5913 }
5914
5915 /* Walk down the type tree of TYPE counting consecutive base elements.
5916 If *MODEP is VOIDmode, then set it to the first valid floating point
5917 type. If a non-floating point type is found, or if a floating point
5918 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5919 otherwise return the count in the sub-tree. */
5920 static int
5921 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5922 {
5923 enum machine_mode mode;
5924 HOST_WIDE_INT size;
5925
5926 switch (TREE_CODE (type))
5927 {
5928 case REAL_TYPE:
5929 mode = TYPE_MODE (type);
5930 if (mode != DFmode && mode != SFmode && mode != TFmode)
5931 return -1;
5932
5933 if (*modep == VOIDmode)
5934 *modep = mode;
5935
5936 if (*modep == mode)
5937 return 1;
5938
5939 break;
5940
5941 case COMPLEX_TYPE:
5942 mode = TYPE_MODE (TREE_TYPE (type));
5943 if (mode != DFmode && mode != SFmode && mode != TFmode)
5944 return -1;
5945
5946 if (*modep == VOIDmode)
5947 *modep = mode;
5948
5949 if (*modep == mode)
5950 return 2;
5951
5952 break;
5953
5954 case VECTOR_TYPE:
5955 /* Use V2SImode and V4SImode as representatives of all 64-bit
5956 and 128-bit vector types. */
5957 size = int_size_in_bytes (type);
5958 switch (size)
5959 {
5960 case 8:
5961 mode = V2SImode;
5962 break;
5963 case 16:
5964 mode = V4SImode;
5965 break;
5966 default:
5967 return -1;
5968 }
5969
5970 if (*modep == VOIDmode)
5971 *modep = mode;
5972
5973 /* Vector modes are considered to be opaque: two vectors are
5974 equivalent for the purposes of being homogeneous aggregates
5975 if they are the same size. */
5976 if (*modep == mode)
5977 return 1;
5978
5979 break;
5980
5981 case ARRAY_TYPE:
5982 {
5983 int count;
5984 tree index = TYPE_DOMAIN (type);
5985
5986 /* Can't handle incomplete types. */
5987 if (!COMPLETE_TYPE_P (type))
5988 return -1;
5989
5990 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5991 if (count == -1
5992 || !index
5993 || !TYPE_MAX_VALUE (index)
5994 || !host_integerp (TYPE_MAX_VALUE (index), 1)
5995 || !TYPE_MIN_VALUE (index)
5996 || !host_integerp (TYPE_MIN_VALUE (index), 1)
5997 || count < 0)
5998 return -1;
5999
6000 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
6001 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
6002
6003 /* There must be no padding. */
6004 if (!host_integerp (TYPE_SIZE (type), 1)
6005 || (tree_low_cst (TYPE_SIZE (type), 1)
6006 != count * GET_MODE_BITSIZE (*modep)))
6007 return -1;
6008
6009 return count;
6010 }
6011
6012 case RECORD_TYPE:
6013 {
6014 int count = 0;
6015 int sub_count;
6016 tree field;
6017
6018 /* Can't handle incomplete types. */
6019 if (!COMPLETE_TYPE_P (type))
6020 return -1;
6021
6022 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6023 {
6024 if (TREE_CODE (field) != FIELD_DECL)
6025 continue;
6026
6027 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6028 if (sub_count < 0)
6029 return -1;
6030 count += sub_count;
6031 }
6032
6033 /* There must be no padding. */
6034 if (!host_integerp (TYPE_SIZE (type), 1)
6035 || (tree_low_cst (TYPE_SIZE (type), 1)
6036 != count * GET_MODE_BITSIZE (*modep)))
6037 return -1;
6038
6039 return count;
6040 }
6041
6042 case UNION_TYPE:
6043 case QUAL_UNION_TYPE:
6044 {
6045 /* These aren't very interesting except in a degenerate case. */
6046 int count = 0;
6047 int sub_count;
6048 tree field;
6049
6050 /* Can't handle incomplete types. */
6051 if (!COMPLETE_TYPE_P (type))
6052 return -1;
6053
6054 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6055 {
6056 if (TREE_CODE (field) != FIELD_DECL)
6057 continue;
6058
6059 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6060 if (sub_count < 0)
6061 return -1;
6062 count = count > sub_count ? count : sub_count;
6063 }
6064
6065 /* There must be no padding. */
6066 if (!host_integerp (TYPE_SIZE (type), 1)
6067 || (tree_low_cst (TYPE_SIZE (type), 1)
6068 != count * GET_MODE_BITSIZE (*modep)))
6069 return -1;
6070
6071 return count;
6072 }
6073
6074 default:
6075 break;
6076 }
6077
6078 return -1;
6079 }
6080
6081 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6082 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6083 array types. The C99 floating-point complex types are also considered
6084 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6085 types, which are GCC extensions and out of the scope of AAPCS64, are
6086 treated as composite types here as well.
6087
6088 Note that MODE itself is not sufficient in determining whether a type
6089 is such a composite type or not. This is because
6090 stor-layout.c:compute_record_mode may have already changed the MODE
6091 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6092 structure with only one field may have its MODE set to the mode of the
6093 field. Also an integer mode whose size matches the size of the
6094 RECORD_TYPE type may be used to substitute the original mode
6095 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6096 solely relied on. */
6097
6098 static bool
6099 aarch64_composite_type_p (const_tree type,
6100 enum machine_mode mode)
6101 {
6102 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6103 return true;
6104
6105 if (mode == BLKmode
6106 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6107 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6108 return true;
6109
6110 return false;
6111 }
6112
6113 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6114 type as described in AAPCS64 \S 4.1.2.
6115
6116 See the comment above aarch64_composite_type_p for the notes on MODE. */
6117
6118 static bool
6119 aarch64_short_vector_p (const_tree type,
6120 enum machine_mode mode)
6121 {
6122 HOST_WIDE_INT size = -1;
6123
6124 if (type && TREE_CODE (type) == VECTOR_TYPE)
6125 size = int_size_in_bytes (type);
6126 else if (!aarch64_composite_type_p (type, mode)
6127 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6128 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6129 size = GET_MODE_SIZE (mode);
6130
6131 return (size == 8 || size == 16) ? true : false;
6132 }
6133
6134 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6135 shall be passed or returned in simd/fp register(s) (providing these
6136 parameter passing registers are available).
6137
6138 Upon successful return, *COUNT returns the number of needed registers,
6139 *BASE_MODE returns the mode of the individual register and when IS_HAF
6140 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6141 floating-point aggregate or a homogeneous short-vector aggregate. */
6142
6143 static bool
6144 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6145 const_tree type,
6146 enum machine_mode *base_mode,
6147 int *count,
6148 bool *is_ha)
6149 {
6150 enum machine_mode new_mode = VOIDmode;
6151 bool composite_p = aarch64_composite_type_p (type, mode);
6152
6153 if (is_ha != NULL) *is_ha = false;
6154
6155 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6156 || aarch64_short_vector_p (type, mode))
6157 {
6158 *count = 1;
6159 new_mode = mode;
6160 }
6161 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6162 {
6163 if (is_ha != NULL) *is_ha = true;
6164 *count = 2;
6165 new_mode = GET_MODE_INNER (mode);
6166 }
6167 else if (type && composite_p)
6168 {
6169 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6170
6171 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6172 {
6173 if (is_ha != NULL) *is_ha = true;
6174 *count = ag_count;
6175 }
6176 else
6177 return false;
6178 }
6179 else
6180 return false;
6181
6182 *base_mode = new_mode;
6183 return true;
6184 }
6185
6186 /* Implement TARGET_STRUCT_VALUE_RTX. */
6187
6188 static rtx
6189 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6190 int incoming ATTRIBUTE_UNUSED)
6191 {
6192 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6193 }
6194
6195 /* Implements target hook vector_mode_supported_p. */
6196 static bool
6197 aarch64_vector_mode_supported_p (enum machine_mode mode)
6198 {
6199 if (TARGET_SIMD
6200 && (mode == V4SImode || mode == V8HImode
6201 || mode == V16QImode || mode == V2DImode
6202 || mode == V2SImode || mode == V4HImode
6203 || mode == V8QImode || mode == V2SFmode
6204 || mode == V4SFmode || mode == V2DFmode))
6205 return true;
6206
6207 return false;
6208 }
6209
6210 /* Return appropriate SIMD container
6211 for MODE within a vector of WIDTH bits. */
6212 static enum machine_mode
6213 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6214 {
6215 gcc_assert (width == 64 || width == 128);
6216 if (TARGET_SIMD)
6217 {
6218 if (width == 128)
6219 switch (mode)
6220 {
6221 case DFmode:
6222 return V2DFmode;
6223 case SFmode:
6224 return V4SFmode;
6225 case SImode:
6226 return V4SImode;
6227 case HImode:
6228 return V8HImode;
6229 case QImode:
6230 return V16QImode;
6231 case DImode:
6232 return V2DImode;
6233 default:
6234 break;
6235 }
6236 else
6237 switch (mode)
6238 {
6239 case SFmode:
6240 return V2SFmode;
6241 case SImode:
6242 return V2SImode;
6243 case HImode:
6244 return V4HImode;
6245 case QImode:
6246 return V8QImode;
6247 default:
6248 break;
6249 }
6250 }
6251 return word_mode;
6252 }
6253
6254 /* Return 128-bit container as the preferred SIMD mode for MODE. */
6255 static enum machine_mode
6256 aarch64_preferred_simd_mode (enum machine_mode mode)
6257 {
6258 return aarch64_simd_container_mode (mode, 128);
6259 }
6260
6261 /* Return the bitmask of possible vector sizes for the vectorizer
6262 to iterate over. */
6263 static unsigned int
6264 aarch64_autovectorize_vector_sizes (void)
6265 {
6266 return (16 | 8);
6267 }
6268
6269 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6270 vector types in order to conform to the AAPCS64 (see "Procedure
6271 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6272 qualify for emission with the mangled names defined in that document,
6273 a vector type must not only be of the correct mode but also be
6274 composed of AdvSIMD vector element types (e.g.
6275 _builtin_aarch64_simd_qi); these types are registered by
6276 aarch64_init_simd_builtins (). In other words, vector types defined
6277 in other ways e.g. via vector_size attribute will get default
6278 mangled names. */
6279 typedef struct
6280 {
6281 enum machine_mode mode;
6282 const char *element_type_name;
6283 const char *mangled_name;
6284 } aarch64_simd_mangle_map_entry;
6285
6286 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6287 /* 64-bit containerized types. */
6288 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6289 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6290 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6291 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6292 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6293 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6294 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6295 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6296 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6297 /* 128-bit containerized types. */
6298 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6299 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6300 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6301 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6302 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6303 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6304 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6305 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6306 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6307 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6308 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6309 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6310 { VOIDmode, NULL, NULL }
6311 };
6312
6313 /* Implement TARGET_MANGLE_TYPE. */
6314
6315 static const char *
6316 aarch64_mangle_type (const_tree type)
6317 {
6318 /* The AArch64 ABI documents say that "__va_list" has to be
6319 managled as if it is in the "std" namespace. */
6320 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6321 return "St9__va_list";
6322
6323 /* Check the mode of the vector type, and the name of the vector
6324 element type, against the table. */
6325 if (TREE_CODE (type) == VECTOR_TYPE)
6326 {
6327 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6328
6329 while (pos->mode != VOIDmode)
6330 {
6331 tree elt_type = TREE_TYPE (type);
6332
6333 if (pos->mode == TYPE_MODE (type)
6334 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6335 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6336 pos->element_type_name))
6337 return pos->mangled_name;
6338
6339 pos++;
6340 }
6341 }
6342
6343 /* Use the default mangling. */
6344 return NULL;
6345 }
6346
6347 /* Return the equivalent letter for size. */
6348 static char
6349 sizetochar (int size)
6350 {
6351 switch (size)
6352 {
6353 case 64: return 'd';
6354 case 32: return 's';
6355 case 16: return 'h';
6356 case 8 : return 'b';
6357 default: gcc_unreachable ();
6358 }
6359 }
6360
6361 /* Return true iff x is a uniform vector of floating-point
6362 constants, and the constant can be represented in
6363 quarter-precision form. Note, as aarch64_float_const_representable
6364 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6365 static bool
6366 aarch64_vect_float_const_representable_p (rtx x)
6367 {
6368 int i = 0;
6369 REAL_VALUE_TYPE r0, ri;
6370 rtx x0, xi;
6371
6372 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6373 return false;
6374
6375 x0 = CONST_VECTOR_ELT (x, 0);
6376 if (!CONST_DOUBLE_P (x0))
6377 return false;
6378
6379 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6380
6381 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6382 {
6383 xi = CONST_VECTOR_ELT (x, i);
6384 if (!CONST_DOUBLE_P (xi))
6385 return false;
6386
6387 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6388 if (!REAL_VALUES_EQUAL (r0, ri))
6389 return false;
6390 }
6391
6392 return aarch64_float_const_representable_p (x0);
6393 }
6394
6395 /* Return true for valid and false for invalid. */
6396 bool
6397 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6398 struct simd_immediate_info *info)
6399 {
6400 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6401 matches = 1; \
6402 for (i = 0; i < idx; i += (STRIDE)) \
6403 if (!(TEST)) \
6404 matches = 0; \
6405 if (matches) \
6406 { \
6407 immtype = (CLASS); \
6408 elsize = (ELSIZE); \
6409 eshift = (SHIFT); \
6410 emvn = (NEG); \
6411 break; \
6412 }
6413
6414 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6415 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6416 unsigned char bytes[16];
6417 int immtype = -1, matches;
6418 unsigned int invmask = inverse ? 0xff : 0;
6419 int eshift, emvn;
6420
6421 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6422 {
6423 if (! (aarch64_simd_imm_zero_p (op, mode)
6424 || aarch64_vect_float_const_representable_p (op)))
6425 return false;
6426
6427 if (info)
6428 {
6429 info->value = CONST_VECTOR_ELT (op, 0);
6430 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
6431 info->mvn = false;
6432 info->shift = 0;
6433 }
6434
6435 return true;
6436 }
6437
6438 /* Splat vector constant out into a byte vector. */
6439 for (i = 0; i < n_elts; i++)
6440 {
6441 rtx el = CONST_VECTOR_ELT (op, i);
6442 unsigned HOST_WIDE_INT elpart;
6443 unsigned int part, parts;
6444
6445 if (GET_CODE (el) == CONST_INT)
6446 {
6447 elpart = INTVAL (el);
6448 parts = 1;
6449 }
6450 else if (GET_CODE (el) == CONST_DOUBLE)
6451 {
6452 elpart = CONST_DOUBLE_LOW (el);
6453 parts = 2;
6454 }
6455 else
6456 gcc_unreachable ();
6457
6458 for (part = 0; part < parts; part++)
6459 {
6460 unsigned int byte;
6461 for (byte = 0; byte < innersize; byte++)
6462 {
6463 bytes[idx++] = (elpart & 0xff) ^ invmask;
6464 elpart >>= BITS_PER_UNIT;
6465 }
6466 if (GET_CODE (el) == CONST_DOUBLE)
6467 elpart = CONST_DOUBLE_HIGH (el);
6468 }
6469 }
6470
6471 /* Sanity check. */
6472 gcc_assert (idx == GET_MODE_SIZE (mode));
6473
6474 do
6475 {
6476 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6477 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6478
6479 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6480 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6481
6482 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6483 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6484
6485 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6486 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6487
6488 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6489
6490 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6491
6492 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6493 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6494
6495 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6496 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6497
6498 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6499 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6500
6501 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6502 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6503
6504 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6505
6506 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6507
6508 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6509 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6510
6511 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6512 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6513
6514 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6515 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6516
6517 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6518 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6519
6520 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6521
6522 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6523 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6524 }
6525 while (0);
6526
6527 if (immtype == -1)
6528 return false;
6529
6530 if (info)
6531 {
6532 info->element_width = elsize;
6533 info->mvn = emvn != 0;
6534 info->shift = eshift;
6535
6536 unsigned HOST_WIDE_INT imm = 0;
6537
6538 if (immtype >= 12 && immtype <= 15)
6539 info->msl = true;
6540
6541 /* Un-invert bytes of recognized vector, if necessary. */
6542 if (invmask != 0)
6543 for (i = 0; i < idx; i++)
6544 bytes[i] ^= invmask;
6545
6546 if (immtype == 17)
6547 {
6548 /* FIXME: Broken on 32-bit H_W_I hosts. */
6549 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6550
6551 for (i = 0; i < 8; i++)
6552 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6553 << (i * BITS_PER_UNIT);
6554
6555
6556 info->value = GEN_INT (imm);
6557 }
6558 else
6559 {
6560 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6561 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6562
6563 /* Construct 'abcdefgh' because the assembler cannot handle
6564 generic constants. */
6565 if (info->mvn)
6566 imm = ~imm;
6567 imm = (imm >> info->shift) & 0xff;
6568 info->value = GEN_INT (imm);
6569 }
6570 }
6571
6572 return true;
6573 #undef CHECK
6574 }
6575
6576 static bool
6577 aarch64_const_vec_all_same_int_p (rtx x,
6578 HOST_WIDE_INT minval,
6579 HOST_WIDE_INT maxval)
6580 {
6581 HOST_WIDE_INT firstval;
6582 int count, i;
6583
6584 if (GET_CODE (x) != CONST_VECTOR
6585 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6586 return false;
6587
6588 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6589 if (firstval < minval || firstval > maxval)
6590 return false;
6591
6592 count = CONST_VECTOR_NUNITS (x);
6593 for (i = 1; i < count; i++)
6594 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6595 return false;
6596
6597 return true;
6598 }
6599
6600 /* Check of immediate shift constants are within range. */
6601 bool
6602 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6603 {
6604 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6605 if (left)
6606 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6607 else
6608 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6609 }
6610
6611 /* Return true if X is a uniform vector where all elements
6612 are either the floating-point constant 0.0 or the
6613 integer constant 0. */
6614 bool
6615 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6616 {
6617 return x == CONST0_RTX (mode);
6618 }
6619
6620 bool
6621 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6622 {
6623 HOST_WIDE_INT imm = INTVAL (x);
6624 int i;
6625
6626 for (i = 0; i < 8; i++)
6627 {
6628 unsigned int byte = imm & 0xff;
6629 if (byte != 0xff && byte != 0)
6630 return false;
6631 imm >>= 8;
6632 }
6633
6634 return true;
6635 }
6636
6637 bool
6638 aarch64_mov_operand_p (rtx x,
6639 enum aarch64_symbol_context context,
6640 enum machine_mode mode)
6641 {
6642 if (GET_CODE (x) == HIGH
6643 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6644 return true;
6645
6646 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
6647 return true;
6648
6649 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
6650 return true;
6651
6652 return aarch64_classify_symbolic_expression (x, context)
6653 == SYMBOL_TINY_ABSOLUTE;
6654 }
6655
6656 /* Return a const_int vector of VAL. */
6657 rtx
6658 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6659 {
6660 int nunits = GET_MODE_NUNITS (mode);
6661 rtvec v = rtvec_alloc (nunits);
6662 int i;
6663
6664 for (i=0; i < nunits; i++)
6665 RTVEC_ELT (v, i) = GEN_INT (val);
6666
6667 return gen_rtx_CONST_VECTOR (mode, v);
6668 }
6669
6670 /* Check OP is a legal scalar immediate for the MOVI instruction. */
6671
6672 bool
6673 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6674 {
6675 enum machine_mode vmode;
6676
6677 gcc_assert (!VECTOR_MODE_P (mode));
6678 vmode = aarch64_preferred_simd_mode (mode);
6679 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
6680 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
6681 }
6682
6683 /* Construct and return a PARALLEL RTX vector. */
6684 rtx
6685 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6686 {
6687 int nunits = GET_MODE_NUNITS (mode);
6688 rtvec v = rtvec_alloc (nunits / 2);
6689 int base = high ? nunits / 2 : 0;
6690 rtx t1;
6691 int i;
6692
6693 for (i=0; i < nunits / 2; i++)
6694 RTVEC_ELT (v, i) = GEN_INT (base + i);
6695
6696 t1 = gen_rtx_PARALLEL (mode, v);
6697 return t1;
6698 }
6699
6700 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6701 HIGH (exclusive). */
6702 void
6703 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6704 {
6705 HOST_WIDE_INT lane;
6706 gcc_assert (GET_CODE (operand) == CONST_INT);
6707 lane = INTVAL (operand);
6708
6709 if (lane < low || lane >= high)
6710 error ("lane out of range");
6711 }
6712
6713 void
6714 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6715 {
6716 gcc_assert (GET_CODE (operand) == CONST_INT);
6717 HOST_WIDE_INT lane = INTVAL (operand);
6718
6719 if (lane < low || lane >= high)
6720 error ("constant out of range");
6721 }
6722
6723 /* Emit code to reinterpret one AdvSIMD type as another,
6724 without altering bits. */
6725 void
6726 aarch64_simd_reinterpret (rtx dest, rtx src)
6727 {
6728 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6729 }
6730
6731 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6732 registers). */
6733 void
6734 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6735 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6736 rtx op1)
6737 {
6738 rtx mem = gen_rtx_MEM (mode, destaddr);
6739 rtx tmp1 = gen_reg_rtx (mode);
6740 rtx tmp2 = gen_reg_rtx (mode);
6741
6742 emit_insn (intfn (tmp1, op1, tmp2));
6743
6744 emit_move_insn (mem, tmp1);
6745 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6746 emit_move_insn (mem, tmp2);
6747 }
6748
6749 /* Return TRUE if OP is a valid vector addressing mode. */
6750 bool
6751 aarch64_simd_mem_operand_p (rtx op)
6752 {
6753 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6754 || GET_CODE (XEXP (op, 0)) == REG);
6755 }
6756
6757 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6758 not to early-clobber SRC registers in the process.
6759
6760 We assume that the operands described by SRC and DEST represent a
6761 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6762 number of components into which the copy has been decomposed. */
6763 void
6764 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6765 rtx *src, unsigned int count)
6766 {
6767 unsigned int i;
6768
6769 if (!reg_overlap_mentioned_p (operands[0], operands[1])
6770 || REGNO (operands[0]) < REGNO (operands[1]))
6771 {
6772 for (i = 0; i < count; i++)
6773 {
6774 operands[2 * i] = dest[i];
6775 operands[2 * i + 1] = src[i];
6776 }
6777 }
6778 else
6779 {
6780 for (i = 0; i < count; i++)
6781 {
6782 operands[2 * i] = dest[count - i - 1];
6783 operands[2 * i + 1] = src[count - i - 1];
6784 }
6785 }
6786 }
6787
6788 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6789 one of VSTRUCT modes: OI, CI or XI. */
6790 int
6791 aarch64_simd_attr_length_move (rtx insn)
6792 {
6793 enum machine_mode mode;
6794
6795 extract_insn_cached (insn);
6796
6797 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6798 {
6799 mode = GET_MODE (recog_data.operand[0]);
6800 switch (mode)
6801 {
6802 case OImode:
6803 return 8;
6804 case CImode:
6805 return 12;
6806 case XImode:
6807 return 16;
6808 default:
6809 gcc_unreachable ();
6810 }
6811 }
6812 return 4;
6813 }
6814
6815 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6816 alignment of a vector to 128 bits. */
6817 static HOST_WIDE_INT
6818 aarch64_simd_vector_alignment (const_tree type)
6819 {
6820 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
6821 return MIN (align, 128);
6822 }
6823
6824 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6825 static bool
6826 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6827 {
6828 if (is_packed)
6829 return false;
6830
6831 /* We guarantee alignment for vectors up to 128-bits. */
6832 if (tree_int_cst_compare (TYPE_SIZE (type),
6833 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6834 return false;
6835
6836 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6837 return true;
6838 }
6839
6840 /* If VALS is a vector constant that can be loaded into a register
6841 using DUP, generate instructions to do so and return an RTX to
6842 assign to the register. Otherwise return NULL_RTX. */
6843 static rtx
6844 aarch64_simd_dup_constant (rtx vals)
6845 {
6846 enum machine_mode mode = GET_MODE (vals);
6847 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6848 int n_elts = GET_MODE_NUNITS (mode);
6849 bool all_same = true;
6850 rtx x;
6851 int i;
6852
6853 if (GET_CODE (vals) != CONST_VECTOR)
6854 return NULL_RTX;
6855
6856 for (i = 1; i < n_elts; ++i)
6857 {
6858 x = CONST_VECTOR_ELT (vals, i);
6859 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6860 all_same = false;
6861 }
6862
6863 if (!all_same)
6864 return NULL_RTX;
6865
6866 /* We can load this constant by using DUP and a constant in a
6867 single ARM register. This will be cheaper than a vector
6868 load. */
6869 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6870 return gen_rtx_VEC_DUPLICATE (mode, x);
6871 }
6872
6873
6874 /* Generate code to load VALS, which is a PARALLEL containing only
6875 constants (for vec_init) or CONST_VECTOR, efficiently into a
6876 register. Returns an RTX to copy into the register, or NULL_RTX
6877 for a PARALLEL that can not be converted into a CONST_VECTOR. */
6878 static rtx
6879 aarch64_simd_make_constant (rtx vals)
6880 {
6881 enum machine_mode mode = GET_MODE (vals);
6882 rtx const_dup;
6883 rtx const_vec = NULL_RTX;
6884 int n_elts = GET_MODE_NUNITS (mode);
6885 int n_const = 0;
6886 int i;
6887
6888 if (GET_CODE (vals) == CONST_VECTOR)
6889 const_vec = vals;
6890 else if (GET_CODE (vals) == PARALLEL)
6891 {
6892 /* A CONST_VECTOR must contain only CONST_INTs and
6893 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6894 Only store valid constants in a CONST_VECTOR. */
6895 for (i = 0; i < n_elts; ++i)
6896 {
6897 rtx x = XVECEXP (vals, 0, i);
6898 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6899 n_const++;
6900 }
6901 if (n_const == n_elts)
6902 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6903 }
6904 else
6905 gcc_unreachable ();
6906
6907 if (const_vec != NULL_RTX
6908 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
6909 /* Load using MOVI/MVNI. */
6910 return const_vec;
6911 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6912 /* Loaded using DUP. */
6913 return const_dup;
6914 else if (const_vec != NULL_RTX)
6915 /* Load from constant pool. We can not take advantage of single-cycle
6916 LD1 because we need a PC-relative addressing mode. */
6917 return const_vec;
6918 else
6919 /* A PARALLEL containing something not valid inside CONST_VECTOR.
6920 We can not construct an initializer. */
6921 return NULL_RTX;
6922 }
6923
6924 void
6925 aarch64_expand_vector_init (rtx target, rtx vals)
6926 {
6927 enum machine_mode mode = GET_MODE (target);
6928 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6929 int n_elts = GET_MODE_NUNITS (mode);
6930 int n_var = 0, one_var = -1;
6931 bool all_same = true;
6932 rtx x, mem;
6933 int i;
6934
6935 x = XVECEXP (vals, 0, 0);
6936 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6937 n_var = 1, one_var = 0;
6938
6939 for (i = 1; i < n_elts; ++i)
6940 {
6941 x = XVECEXP (vals, 0, i);
6942 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6943 ++n_var, one_var = i;
6944
6945 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6946 all_same = false;
6947 }
6948
6949 if (n_var == 0)
6950 {
6951 rtx constant = aarch64_simd_make_constant (vals);
6952 if (constant != NULL_RTX)
6953 {
6954 emit_move_insn (target, constant);
6955 return;
6956 }
6957 }
6958
6959 /* Splat a single non-constant element if we can. */
6960 if (all_same)
6961 {
6962 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
6963 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
6964 return;
6965 }
6966
6967 /* One field is non-constant. Load constant then overwrite varying
6968 field. This is more efficient than using the stack. */
6969 if (n_var == 1)
6970 {
6971 rtx copy = copy_rtx (vals);
6972 rtx index = GEN_INT (one_var);
6973 enum insn_code icode;
6974
6975 /* Load constant part of vector, substitute neighboring value for
6976 varying element. */
6977 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
6978 aarch64_expand_vector_init (target, copy);
6979
6980 /* Insert variable. */
6981 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
6982 icode = optab_handler (vec_set_optab, mode);
6983 gcc_assert (icode != CODE_FOR_nothing);
6984 emit_insn (GEN_FCN (icode) (target, x, index));
6985 return;
6986 }
6987
6988 /* Construct the vector in memory one field at a time
6989 and load the whole vector. */
6990 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6991 for (i = 0; i < n_elts; i++)
6992 emit_move_insn (adjust_address_nv (mem, inner_mode,
6993 i * GET_MODE_SIZE (inner_mode)),
6994 XVECEXP (vals, 0, i));
6995 emit_move_insn (target, mem);
6996
6997 }
6998
6999 static unsigned HOST_WIDE_INT
7000 aarch64_shift_truncation_mask (enum machine_mode mode)
7001 {
7002 return
7003 (aarch64_vector_mode_supported_p (mode)
7004 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7005 }
7006
7007 #ifndef TLS_SECTION_ASM_FLAG
7008 #define TLS_SECTION_ASM_FLAG 'T'
7009 #endif
7010
7011 void
7012 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7013 tree decl ATTRIBUTE_UNUSED)
7014 {
7015 char flagchars[10], *f = flagchars;
7016
7017 /* If we have already declared this section, we can use an
7018 abbreviated form to switch back to it -- unless this section is
7019 part of a COMDAT groups, in which case GAS requires the full
7020 declaration every time. */
7021 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7022 && (flags & SECTION_DECLARED))
7023 {
7024 fprintf (asm_out_file, "\t.section\t%s\n", name);
7025 return;
7026 }
7027
7028 if (!(flags & SECTION_DEBUG))
7029 *f++ = 'a';
7030 if (flags & SECTION_WRITE)
7031 *f++ = 'w';
7032 if (flags & SECTION_CODE)
7033 *f++ = 'x';
7034 if (flags & SECTION_SMALL)
7035 *f++ = 's';
7036 if (flags & SECTION_MERGE)
7037 *f++ = 'M';
7038 if (flags & SECTION_STRINGS)
7039 *f++ = 'S';
7040 if (flags & SECTION_TLS)
7041 *f++ = TLS_SECTION_ASM_FLAG;
7042 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7043 *f++ = 'G';
7044 *f = '\0';
7045
7046 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7047
7048 if (!(flags & SECTION_NOTYPE))
7049 {
7050 const char *type;
7051 const char *format;
7052
7053 if (flags & SECTION_BSS)
7054 type = "nobits";
7055 else
7056 type = "progbits";
7057
7058 #ifdef TYPE_OPERAND_FMT
7059 format = "," TYPE_OPERAND_FMT;
7060 #else
7061 format = ",@%s";
7062 #endif
7063
7064 fprintf (asm_out_file, format, type);
7065
7066 if (flags & SECTION_ENTSIZE)
7067 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7068 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7069 {
7070 if (TREE_CODE (decl) == IDENTIFIER_NODE)
7071 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7072 else
7073 fprintf (asm_out_file, ",%s,comdat",
7074 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7075 }
7076 }
7077
7078 putc ('\n', asm_out_file);
7079 }
7080
7081 /* Select a format to encode pointers in exception handling data. */
7082 int
7083 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7084 {
7085 int type;
7086 switch (aarch64_cmodel)
7087 {
7088 case AARCH64_CMODEL_TINY:
7089 case AARCH64_CMODEL_TINY_PIC:
7090 case AARCH64_CMODEL_SMALL:
7091 case AARCH64_CMODEL_SMALL_PIC:
7092 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7093 for everything. */
7094 type = DW_EH_PE_sdata4;
7095 break;
7096 default:
7097 /* No assumptions here. 8-byte relocs required. */
7098 type = DW_EH_PE_sdata8;
7099 break;
7100 }
7101 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7102 }
7103
7104 /* Emit load exclusive. */
7105
7106 static void
7107 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7108 rtx mem, rtx model_rtx)
7109 {
7110 rtx (*gen) (rtx, rtx, rtx);
7111
7112 switch (mode)
7113 {
7114 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7115 case HImode: gen = gen_aarch64_load_exclusivehi; break;
7116 case SImode: gen = gen_aarch64_load_exclusivesi; break;
7117 case DImode: gen = gen_aarch64_load_exclusivedi; break;
7118 default:
7119 gcc_unreachable ();
7120 }
7121
7122 emit_insn (gen (rval, mem, model_rtx));
7123 }
7124
7125 /* Emit store exclusive. */
7126
7127 static void
7128 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7129 rtx rval, rtx mem, rtx model_rtx)
7130 {
7131 rtx (*gen) (rtx, rtx, rtx, rtx);
7132
7133 switch (mode)
7134 {
7135 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7136 case HImode: gen = gen_aarch64_store_exclusivehi; break;
7137 case SImode: gen = gen_aarch64_store_exclusivesi; break;
7138 case DImode: gen = gen_aarch64_store_exclusivedi; break;
7139 default:
7140 gcc_unreachable ();
7141 }
7142
7143 emit_insn (gen (bval, rval, mem, model_rtx));
7144 }
7145
7146 /* Mark the previous jump instruction as unlikely. */
7147
7148 static void
7149 aarch64_emit_unlikely_jump (rtx insn)
7150 {
7151 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
7152
7153 insn = emit_jump_insn (insn);
7154 add_reg_note (insn, REG_BR_PROB, very_unlikely);
7155 }
7156
7157 /* Expand a compare and swap pattern. */
7158
7159 void
7160 aarch64_expand_compare_and_swap (rtx operands[])
7161 {
7162 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7163 enum machine_mode mode, cmp_mode;
7164 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7165
7166 bval = operands[0];
7167 rval = operands[1];
7168 mem = operands[2];
7169 oldval = operands[3];
7170 newval = operands[4];
7171 is_weak = operands[5];
7172 mod_s = operands[6];
7173 mod_f = operands[7];
7174 mode = GET_MODE (mem);
7175 cmp_mode = mode;
7176
7177 /* Normally the succ memory model must be stronger than fail, but in the
7178 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7179 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7180
7181 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7182 && INTVAL (mod_s) == MEMMODEL_RELEASE)
7183 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7184
7185 switch (mode)
7186 {
7187 case QImode:
7188 case HImode:
7189 /* For short modes, we're going to perform the comparison in SImode,
7190 so do the zero-extension now. */
7191 cmp_mode = SImode;
7192 rval = gen_reg_rtx (SImode);
7193 oldval = convert_modes (SImode, mode, oldval, true);
7194 /* Fall through. */
7195
7196 case SImode:
7197 case DImode:
7198 /* Force the value into a register if needed. */
7199 if (!aarch64_plus_operand (oldval, mode))
7200 oldval = force_reg (cmp_mode, oldval);
7201 break;
7202
7203 default:
7204 gcc_unreachable ();
7205 }
7206
7207 switch (mode)
7208 {
7209 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7210 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7211 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7212 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7213 default:
7214 gcc_unreachable ();
7215 }
7216
7217 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7218
7219 if (mode == QImode || mode == HImode)
7220 emit_move_insn (operands[1], gen_lowpart (mode, rval));
7221
7222 x = gen_rtx_REG (CCmode, CC_REGNUM);
7223 x = gen_rtx_EQ (SImode, x, const0_rtx);
7224 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7225 }
7226
7227 /* Split a compare and swap pattern. */
7228
7229 void
7230 aarch64_split_compare_and_swap (rtx operands[])
7231 {
7232 rtx rval, mem, oldval, newval, scratch;
7233 enum machine_mode mode;
7234 bool is_weak;
7235 rtx label1, label2, x, cond;
7236
7237 rval = operands[0];
7238 mem = operands[1];
7239 oldval = operands[2];
7240 newval = operands[3];
7241 is_weak = (operands[4] != const0_rtx);
7242 scratch = operands[7];
7243 mode = GET_MODE (mem);
7244
7245 label1 = NULL_RTX;
7246 if (!is_weak)
7247 {
7248 label1 = gen_label_rtx ();
7249 emit_label (label1);
7250 }
7251 label2 = gen_label_rtx ();
7252
7253 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7254
7255 cond = aarch64_gen_compare_reg (NE, rval, oldval);
7256 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7257 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7258 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7259 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7260
7261 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7262
7263 if (!is_weak)
7264 {
7265 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7266 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7267 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7268 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7269 }
7270 else
7271 {
7272 cond = gen_rtx_REG (CCmode, CC_REGNUM);
7273 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7274 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7275 }
7276
7277 emit_label (label2);
7278 }
7279
7280 /* Split an atomic operation. */
7281
7282 void
7283 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7284 rtx value, rtx model_rtx, rtx cond)
7285 {
7286 enum machine_mode mode = GET_MODE (mem);
7287 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7288 rtx label, x;
7289
7290 label = gen_label_rtx ();
7291 emit_label (label);
7292
7293 if (new_out)
7294 new_out = gen_lowpart (wmode, new_out);
7295 if (old_out)
7296 old_out = gen_lowpart (wmode, old_out);
7297 else
7298 old_out = new_out;
7299 value = simplify_gen_subreg (wmode, value, mode, 0);
7300
7301 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7302
7303 switch (code)
7304 {
7305 case SET:
7306 new_out = value;
7307 break;
7308
7309 case NOT:
7310 x = gen_rtx_AND (wmode, old_out, value);
7311 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7312 x = gen_rtx_NOT (wmode, new_out);
7313 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7314 break;
7315
7316 case MINUS:
7317 if (CONST_INT_P (value))
7318 {
7319 value = GEN_INT (-INTVAL (value));
7320 code = PLUS;
7321 }
7322 /* Fall through. */
7323
7324 default:
7325 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7326 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7327 break;
7328 }
7329
7330 aarch64_emit_store_exclusive (mode, cond, mem,
7331 gen_lowpart (mode, new_out), model_rtx);
7332
7333 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7334 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7335 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7336 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7337 }
7338
7339 static void
7340 aarch64_print_extension (void)
7341 {
7342 const struct aarch64_option_extension *opt = NULL;
7343
7344 for (opt = all_extensions; opt->name != NULL; opt++)
7345 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7346 asm_fprintf (asm_out_file, "+%s", opt->name);
7347
7348 asm_fprintf (asm_out_file, "\n");
7349 }
7350
7351 static void
7352 aarch64_start_file (void)
7353 {
7354 if (selected_arch)
7355 {
7356 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7357 aarch64_print_extension ();
7358 }
7359 else if (selected_cpu)
7360 {
7361 asm_fprintf (asm_out_file, "\t.cpu %s", selected_cpu->name);
7362 aarch64_print_extension ();
7363 }
7364 default_file_start();
7365 }
7366
7367 /* Target hook for c_mode_for_suffix. */
7368 static enum machine_mode
7369 aarch64_c_mode_for_suffix (char suffix)
7370 {
7371 if (suffix == 'q')
7372 return TFmode;
7373
7374 return VOIDmode;
7375 }
7376
7377 /* We can only represent floating point constants which will fit in
7378 "quarter-precision" values. These values are characterised by
7379 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7380 by:
7381
7382 (-1)^s * (n/16) * 2^r
7383
7384 Where:
7385 's' is the sign bit.
7386 'n' is an integer in the range 16 <= n <= 31.
7387 'r' is an integer in the range -3 <= r <= 4. */
7388
7389 /* Return true iff X can be represented by a quarter-precision
7390 floating point immediate operand X. Note, we cannot represent 0.0. */
7391 bool
7392 aarch64_float_const_representable_p (rtx x)
7393 {
7394 /* This represents our current view of how many bits
7395 make up the mantissa. */
7396 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7397 int exponent;
7398 unsigned HOST_WIDE_INT mantissa, mask;
7399 HOST_WIDE_INT m1, m2;
7400 REAL_VALUE_TYPE r, m;
7401
7402 if (!CONST_DOUBLE_P (x))
7403 return false;
7404
7405 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7406
7407 /* We cannot represent infinities, NaNs or +/-zero. We won't
7408 know if we have +zero until we analyse the mantissa, but we
7409 can reject the other invalid values. */
7410 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7411 || REAL_VALUE_MINUS_ZERO (r))
7412 return false;
7413
7414 /* Extract exponent. */
7415 r = real_value_abs (&r);
7416 exponent = REAL_EXP (&r);
7417
7418 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7419 highest (sign) bit, with a fixed binary point at bit point_pos.
7420 m1 holds the low part of the mantissa, m2 the high part.
7421 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7422 bits for the mantissa, this can fail (low bits will be lost). */
7423 real_ldexp (&m, &r, point_pos - exponent);
7424 REAL_VALUE_TO_INT (&m1, &m2, m);
7425
7426 /* If the low part of the mantissa has bits set we cannot represent
7427 the value. */
7428 if (m1 != 0)
7429 return false;
7430 /* We have rejected the lower HOST_WIDE_INT, so update our
7431 understanding of how many bits lie in the mantissa and
7432 look only at the high HOST_WIDE_INT. */
7433 mantissa = m2;
7434 point_pos -= HOST_BITS_PER_WIDE_INT;
7435
7436 /* We can only represent values with a mantissa of the form 1.xxxx. */
7437 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7438 if ((mantissa & mask) != 0)
7439 return false;
7440
7441 /* Having filtered unrepresentable values, we may now remove all
7442 but the highest 5 bits. */
7443 mantissa >>= point_pos - 5;
7444
7445 /* We cannot represent the value 0.0, so reject it. This is handled
7446 elsewhere. */
7447 if (mantissa == 0)
7448 return false;
7449
7450 /* Then, as bit 4 is always set, we can mask it off, leaving
7451 the mantissa in the range [0, 15]. */
7452 mantissa &= ~(1 << 4);
7453 gcc_assert (mantissa <= 15);
7454
7455 /* GCC internally does not use IEEE754-like encoding (where normalized
7456 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7457 Our mantissa values are shifted 4 places to the left relative to
7458 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7459 by 5 places to correct for GCC's representation. */
7460 exponent = 5 - exponent;
7461
7462 return (exponent >= 0 && exponent <= 7);
7463 }
7464
7465 char*
7466 aarch64_output_simd_mov_immediate (rtx const_vector,
7467 enum machine_mode mode,
7468 unsigned width)
7469 {
7470 bool is_valid;
7471 static char templ[40];
7472 const char *mnemonic;
7473 const char *shift_op;
7474 unsigned int lane_count = 0;
7475 char element_char;
7476
7477 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
7478
7479 /* This will return true to show const_vector is legal for use as either
7480 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
7481 also update INFO to show how the immediate should be generated. */
7482 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
7483 gcc_assert (is_valid);
7484
7485 element_char = sizetochar (info.element_width);
7486 lane_count = width / info.element_width;
7487
7488 mode = GET_MODE_INNER (mode);
7489 if (mode == SFmode || mode == DFmode)
7490 {
7491 gcc_assert (info.shift == 0 && ! info.mvn);
7492 if (aarch64_float_const_zero_rtx_p (info.value))
7493 info.value = GEN_INT (0);
7494 else
7495 {
7496 #define buf_size 20
7497 REAL_VALUE_TYPE r;
7498 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
7499 char float_buf[buf_size] = {'\0'};
7500 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
7501 #undef buf_size
7502
7503 if (lane_count == 1)
7504 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
7505 else
7506 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
7507 lane_count, element_char, float_buf);
7508 return templ;
7509 }
7510 }
7511
7512 mnemonic = info.mvn ? "mvni" : "movi";
7513 shift_op = info.msl ? "msl" : "lsl";
7514
7515 if (lane_count == 1)
7516 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
7517 mnemonic, UINTVAL (info.value));
7518 else if (info.shift)
7519 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
7520 ", %s %d", mnemonic, lane_count, element_char,
7521 UINTVAL (info.value), shift_op, info.shift);
7522 else
7523 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
7524 mnemonic, lane_count, element_char, UINTVAL (info.value));
7525 return templ;
7526 }
7527
7528 char*
7529 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
7530 enum machine_mode mode)
7531 {
7532 enum machine_mode vmode;
7533
7534 gcc_assert (!VECTOR_MODE_P (mode));
7535 vmode = aarch64_simd_container_mode (mode, 64);
7536 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
7537 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
7538 }
7539
7540 /* Split operands into moves from op[1] + op[2] into op[0]. */
7541
7542 void
7543 aarch64_split_combinev16qi (rtx operands[3])
7544 {
7545 unsigned int dest = REGNO (operands[0]);
7546 unsigned int src1 = REGNO (operands[1]);
7547 unsigned int src2 = REGNO (operands[2]);
7548 enum machine_mode halfmode = GET_MODE (operands[1]);
7549 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7550 rtx destlo, desthi;
7551
7552 gcc_assert (halfmode == V16QImode);
7553
7554 if (src1 == dest && src2 == dest + halfregs)
7555 {
7556 /* No-op move. Can't split to nothing; emit something. */
7557 emit_note (NOTE_INSN_DELETED);
7558 return;
7559 }
7560
7561 /* Preserve register attributes for variable tracking. */
7562 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7563 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7564 GET_MODE_SIZE (halfmode));
7565
7566 /* Special case of reversed high/low parts. */
7567 if (reg_overlap_mentioned_p (operands[2], destlo)
7568 && reg_overlap_mentioned_p (operands[1], desthi))
7569 {
7570 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7571 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7572 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7573 }
7574 else if (!reg_overlap_mentioned_p (operands[2], destlo))
7575 {
7576 /* Try to avoid unnecessary moves if part of the result
7577 is in the right place already. */
7578 if (src1 != dest)
7579 emit_move_insn (destlo, operands[1]);
7580 if (src2 != dest + halfregs)
7581 emit_move_insn (desthi, operands[2]);
7582 }
7583 else
7584 {
7585 if (src2 != dest + halfregs)
7586 emit_move_insn (desthi, operands[2]);
7587 if (src1 != dest)
7588 emit_move_insn (destlo, operands[1]);
7589 }
7590 }
7591
7592 /* vec_perm support. */
7593
7594 #define MAX_VECT_LEN 16
7595
7596 struct expand_vec_perm_d
7597 {
7598 rtx target, op0, op1;
7599 unsigned char perm[MAX_VECT_LEN];
7600 enum machine_mode vmode;
7601 unsigned char nelt;
7602 bool one_vector_p;
7603 bool testing_p;
7604 };
7605
7606 /* Generate a variable permutation. */
7607
7608 static void
7609 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7610 {
7611 enum machine_mode vmode = GET_MODE (target);
7612 bool one_vector_p = rtx_equal_p (op0, op1);
7613
7614 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7615 gcc_checking_assert (GET_MODE (op0) == vmode);
7616 gcc_checking_assert (GET_MODE (op1) == vmode);
7617 gcc_checking_assert (GET_MODE (sel) == vmode);
7618 gcc_checking_assert (TARGET_SIMD);
7619
7620 if (one_vector_p)
7621 {
7622 if (vmode == V8QImode)
7623 {
7624 /* Expand the argument to a V16QI mode by duplicating it. */
7625 rtx pair = gen_reg_rtx (V16QImode);
7626 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7627 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7628 }
7629 else
7630 {
7631 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7632 }
7633 }
7634 else
7635 {
7636 rtx pair;
7637
7638 if (vmode == V8QImode)
7639 {
7640 pair = gen_reg_rtx (V16QImode);
7641 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7642 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7643 }
7644 else
7645 {
7646 pair = gen_reg_rtx (OImode);
7647 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7648 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7649 }
7650 }
7651 }
7652
7653 void
7654 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7655 {
7656 enum machine_mode vmode = GET_MODE (target);
7657 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7658 bool one_vector_p = rtx_equal_p (op0, op1);
7659 rtx rmask[MAX_VECT_LEN], mask;
7660
7661 gcc_checking_assert (!BYTES_BIG_ENDIAN);
7662
7663 /* The TBL instruction does not use a modulo index, so we must take care
7664 of that ourselves. */
7665 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7666 for (i = 0; i < nelt; ++i)
7667 rmask[i] = mask;
7668 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7669 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7670
7671 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7672 }
7673
7674 /* Recognize patterns suitable for the TRN instructions. */
7675 static bool
7676 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7677 {
7678 unsigned int i, odd, mask, nelt = d->nelt;
7679 rtx out, in0, in1, x;
7680 rtx (*gen) (rtx, rtx, rtx);
7681 enum machine_mode vmode = d->vmode;
7682
7683 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7684 return false;
7685
7686 /* Note that these are little-endian tests.
7687 We correct for big-endian later. */
7688 if (d->perm[0] == 0)
7689 odd = 0;
7690 else if (d->perm[0] == 1)
7691 odd = 1;
7692 else
7693 return false;
7694 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7695
7696 for (i = 0; i < nelt; i += 2)
7697 {
7698 if (d->perm[i] != i + odd)
7699 return false;
7700 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7701 return false;
7702 }
7703
7704 /* Success! */
7705 if (d->testing_p)
7706 return true;
7707
7708 in0 = d->op0;
7709 in1 = d->op1;
7710 if (BYTES_BIG_ENDIAN)
7711 {
7712 x = in0, in0 = in1, in1 = x;
7713 odd = !odd;
7714 }
7715 out = d->target;
7716
7717 if (odd)
7718 {
7719 switch (vmode)
7720 {
7721 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7722 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7723 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7724 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7725 case V4SImode: gen = gen_aarch64_trn2v4si; break;
7726 case V2SImode: gen = gen_aarch64_trn2v2si; break;
7727 case V2DImode: gen = gen_aarch64_trn2v2di; break;
7728 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7729 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7730 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7731 default:
7732 return false;
7733 }
7734 }
7735 else
7736 {
7737 switch (vmode)
7738 {
7739 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7740 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7741 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7742 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7743 case V4SImode: gen = gen_aarch64_trn1v4si; break;
7744 case V2SImode: gen = gen_aarch64_trn1v2si; break;
7745 case V2DImode: gen = gen_aarch64_trn1v2di; break;
7746 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7747 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7748 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7749 default:
7750 return false;
7751 }
7752 }
7753
7754 emit_insn (gen (out, in0, in1));
7755 return true;
7756 }
7757
7758 /* Recognize patterns suitable for the UZP instructions. */
7759 static bool
7760 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7761 {
7762 unsigned int i, odd, mask, nelt = d->nelt;
7763 rtx out, in0, in1, x;
7764 rtx (*gen) (rtx, rtx, rtx);
7765 enum machine_mode vmode = d->vmode;
7766
7767 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7768 return false;
7769
7770 /* Note that these are little-endian tests.
7771 We correct for big-endian later. */
7772 if (d->perm[0] == 0)
7773 odd = 0;
7774 else if (d->perm[0] == 1)
7775 odd = 1;
7776 else
7777 return false;
7778 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7779
7780 for (i = 0; i < nelt; i++)
7781 {
7782 unsigned elt = (i * 2 + odd) & mask;
7783 if (d->perm[i] != elt)
7784 return false;
7785 }
7786
7787 /* Success! */
7788 if (d->testing_p)
7789 return true;
7790
7791 in0 = d->op0;
7792 in1 = d->op1;
7793 if (BYTES_BIG_ENDIAN)
7794 {
7795 x = in0, in0 = in1, in1 = x;
7796 odd = !odd;
7797 }
7798 out = d->target;
7799
7800 if (odd)
7801 {
7802 switch (vmode)
7803 {
7804 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7805 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7806 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7807 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7808 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7809 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7810 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7811 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7812 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7813 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7814 default:
7815 return false;
7816 }
7817 }
7818 else
7819 {
7820 switch (vmode)
7821 {
7822 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7823 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7824 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7825 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7826 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7827 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7828 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7829 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7830 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7831 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7832 default:
7833 return false;
7834 }
7835 }
7836
7837 emit_insn (gen (out, in0, in1));
7838 return true;
7839 }
7840
7841 /* Recognize patterns suitable for the ZIP instructions. */
7842 static bool
7843 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7844 {
7845 unsigned int i, high, mask, nelt = d->nelt;
7846 rtx out, in0, in1, x;
7847 rtx (*gen) (rtx, rtx, rtx);
7848 enum machine_mode vmode = d->vmode;
7849
7850 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7851 return false;
7852
7853 /* Note that these are little-endian tests.
7854 We correct for big-endian later. */
7855 high = nelt / 2;
7856 if (d->perm[0] == high)
7857 /* Do Nothing. */
7858 ;
7859 else if (d->perm[0] == 0)
7860 high = 0;
7861 else
7862 return false;
7863 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7864
7865 for (i = 0; i < nelt / 2; i++)
7866 {
7867 unsigned elt = (i + high) & mask;
7868 if (d->perm[i * 2] != elt)
7869 return false;
7870 elt = (elt + nelt) & mask;
7871 if (d->perm[i * 2 + 1] != elt)
7872 return false;
7873 }
7874
7875 /* Success! */
7876 if (d->testing_p)
7877 return true;
7878
7879 in0 = d->op0;
7880 in1 = d->op1;
7881 if (BYTES_BIG_ENDIAN)
7882 {
7883 x = in0, in0 = in1, in1 = x;
7884 high = !high;
7885 }
7886 out = d->target;
7887
7888 if (high)
7889 {
7890 switch (vmode)
7891 {
7892 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7893 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7894 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7895 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7896 case V4SImode: gen = gen_aarch64_zip2v4si; break;
7897 case V2SImode: gen = gen_aarch64_zip2v2si; break;
7898 case V2DImode: gen = gen_aarch64_zip2v2di; break;
7899 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7900 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7901 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7902 default:
7903 return false;
7904 }
7905 }
7906 else
7907 {
7908 switch (vmode)
7909 {
7910 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7911 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7912 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7913 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7914 case V4SImode: gen = gen_aarch64_zip1v4si; break;
7915 case V2SImode: gen = gen_aarch64_zip1v2si; break;
7916 case V2DImode: gen = gen_aarch64_zip1v2di; break;
7917 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7918 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7919 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7920 default:
7921 return false;
7922 }
7923 }
7924
7925 emit_insn (gen (out, in0, in1));
7926 return true;
7927 }
7928
7929 static bool
7930 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
7931 {
7932 rtx rperm[MAX_VECT_LEN], sel;
7933 enum machine_mode vmode = d->vmode;
7934 unsigned int i, nelt = d->nelt;
7935
7936 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
7937 numbering of elements for big-endian, we must reverse the order. */
7938 if (BYTES_BIG_ENDIAN)
7939 return false;
7940
7941 if (d->testing_p)
7942 return true;
7943
7944 /* Generic code will try constant permutation twice. Once with the
7945 original mode and again with the elements lowered to QImode.
7946 So wait and don't do the selector expansion ourselves. */
7947 if (vmode != V8QImode && vmode != V16QImode)
7948 return false;
7949
7950 for (i = 0; i < nelt; ++i)
7951 rperm[i] = GEN_INT (d->perm[i]);
7952 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
7953 sel = force_reg (vmode, sel);
7954
7955 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
7956 return true;
7957 }
7958
7959 static bool
7960 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
7961 {
7962 /* The pattern matching functions above are written to look for a small
7963 number to begin the sequence (0, 1, N/2). If we begin with an index
7964 from the second operand, we can swap the operands. */
7965 if (d->perm[0] >= d->nelt)
7966 {
7967 unsigned i, nelt = d->nelt;
7968 rtx x;
7969
7970 for (i = 0; i < nelt; ++i)
7971 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
7972
7973 x = d->op0;
7974 d->op0 = d->op1;
7975 d->op1 = x;
7976 }
7977
7978 if (TARGET_SIMD)
7979 {
7980 if (aarch64_evpc_zip (d))
7981 return true;
7982 else if (aarch64_evpc_uzp (d))
7983 return true;
7984 else if (aarch64_evpc_trn (d))
7985 return true;
7986 return aarch64_evpc_tbl (d);
7987 }
7988 return false;
7989 }
7990
7991 /* Expand a vec_perm_const pattern. */
7992
7993 bool
7994 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
7995 {
7996 struct expand_vec_perm_d d;
7997 int i, nelt, which;
7998
7999 d.target = target;
8000 d.op0 = op0;
8001 d.op1 = op1;
8002
8003 d.vmode = GET_MODE (target);
8004 gcc_assert (VECTOR_MODE_P (d.vmode));
8005 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8006 d.testing_p = false;
8007
8008 for (i = which = 0; i < nelt; ++i)
8009 {
8010 rtx e = XVECEXP (sel, 0, i);
8011 int ei = INTVAL (e) & (2 * nelt - 1);
8012 which |= (ei < nelt ? 1 : 2);
8013 d.perm[i] = ei;
8014 }
8015
8016 switch (which)
8017 {
8018 default:
8019 gcc_unreachable ();
8020
8021 case 3:
8022 d.one_vector_p = false;
8023 if (!rtx_equal_p (op0, op1))
8024 break;
8025
8026 /* The elements of PERM do not suggest that only the first operand
8027 is used, but both operands are identical. Allow easier matching
8028 of the permutation by folding the permutation into the single
8029 input vector. */
8030 /* Fall Through. */
8031 case 2:
8032 for (i = 0; i < nelt; ++i)
8033 d.perm[i] &= nelt - 1;
8034 d.op0 = op1;
8035 d.one_vector_p = true;
8036 break;
8037
8038 case 1:
8039 d.op1 = op0;
8040 d.one_vector_p = true;
8041 break;
8042 }
8043
8044 return aarch64_expand_vec_perm_const_1 (&d);
8045 }
8046
8047 static bool
8048 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8049 const unsigned char *sel)
8050 {
8051 struct expand_vec_perm_d d;
8052 unsigned int i, nelt, which;
8053 bool ret;
8054
8055 d.vmode = vmode;
8056 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8057 d.testing_p = true;
8058 memcpy (d.perm, sel, nelt);
8059
8060 /* Calculate whether all elements are in one vector. */
8061 for (i = which = 0; i < nelt; ++i)
8062 {
8063 unsigned char e = d.perm[i];
8064 gcc_assert (e < 2 * nelt);
8065 which |= (e < nelt ? 1 : 2);
8066 }
8067
8068 /* If all elements are from the second vector, reindex as if from the
8069 first vector. */
8070 if (which == 2)
8071 for (i = 0; i < nelt; ++i)
8072 d.perm[i] -= nelt;
8073
8074 /* Check whether the mask can be applied to a single vector. */
8075 d.one_vector_p = (which != 3);
8076
8077 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8078 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8079 if (!d.one_vector_p)
8080 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8081
8082 start_sequence ();
8083 ret = aarch64_expand_vec_perm_const_1 (&d);
8084 end_sequence ();
8085
8086 return ret;
8087 }
8088
8089 #undef TARGET_ADDRESS_COST
8090 #define TARGET_ADDRESS_COST aarch64_address_cost
8091
8092 /* This hook will determines whether unnamed bitfields affect the alignment
8093 of the containing structure. The hook returns true if the structure
8094 should inherit the alignment requirements of an unnamed bitfield's
8095 type. */
8096 #undef TARGET_ALIGN_ANON_BITFIELD
8097 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8098
8099 #undef TARGET_ASM_ALIGNED_DI_OP
8100 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8101
8102 #undef TARGET_ASM_ALIGNED_HI_OP
8103 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8104
8105 #undef TARGET_ASM_ALIGNED_SI_OP
8106 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8107
8108 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8109 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8110 hook_bool_const_tree_hwi_hwi_const_tree_true
8111
8112 #undef TARGET_ASM_FILE_START
8113 #define TARGET_ASM_FILE_START aarch64_start_file
8114
8115 #undef TARGET_ASM_OUTPUT_MI_THUNK
8116 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8117
8118 #undef TARGET_ASM_SELECT_RTX_SECTION
8119 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8120
8121 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8122 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8123
8124 #undef TARGET_BUILD_BUILTIN_VA_LIST
8125 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8126
8127 #undef TARGET_CALLEE_COPIES
8128 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8129
8130 #undef TARGET_CAN_ELIMINATE
8131 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8132
8133 #undef TARGET_CANNOT_FORCE_CONST_MEM
8134 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8135
8136 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8137 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8138
8139 /* Only the least significant bit is used for initialization guard
8140 variables. */
8141 #undef TARGET_CXX_GUARD_MASK_BIT
8142 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8143
8144 #undef TARGET_C_MODE_FOR_SUFFIX
8145 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8146
8147 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8148 #undef TARGET_DEFAULT_TARGET_FLAGS
8149 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8150 #endif
8151
8152 #undef TARGET_CLASS_MAX_NREGS
8153 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8154
8155 #undef TARGET_BUILTIN_DECL
8156 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8157
8158 #undef TARGET_EXPAND_BUILTIN
8159 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8160
8161 #undef TARGET_EXPAND_BUILTIN_VA_START
8162 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8163
8164 #undef TARGET_FOLD_BUILTIN
8165 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8166
8167 #undef TARGET_FUNCTION_ARG
8168 #define TARGET_FUNCTION_ARG aarch64_function_arg
8169
8170 #undef TARGET_FUNCTION_ARG_ADVANCE
8171 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8172
8173 #undef TARGET_FUNCTION_ARG_BOUNDARY
8174 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8175
8176 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8177 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8178
8179 #undef TARGET_FUNCTION_VALUE
8180 #define TARGET_FUNCTION_VALUE aarch64_function_value
8181
8182 #undef TARGET_FUNCTION_VALUE_REGNO_P
8183 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8184
8185 #undef TARGET_FRAME_POINTER_REQUIRED
8186 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8187
8188 #undef TARGET_GIMPLE_FOLD_BUILTIN
8189 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8190
8191 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8192 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8193
8194 #undef TARGET_INIT_BUILTINS
8195 #define TARGET_INIT_BUILTINS aarch64_init_builtins
8196
8197 #undef TARGET_LEGITIMATE_ADDRESS_P
8198 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8199
8200 #undef TARGET_LEGITIMATE_CONSTANT_P
8201 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8202
8203 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8204 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8205
8206 #undef TARGET_MANGLE_TYPE
8207 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8208
8209 #undef TARGET_MEMORY_MOVE_COST
8210 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8211
8212 #undef TARGET_MUST_PASS_IN_STACK
8213 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8214
8215 /* This target hook should return true if accesses to volatile bitfields
8216 should use the narrowest mode possible. It should return false if these
8217 accesses should use the bitfield container type. */
8218 #undef TARGET_NARROW_VOLATILE_BITFIELD
8219 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8220
8221 #undef TARGET_OPTION_OVERRIDE
8222 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8223
8224 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8225 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8226 aarch64_override_options_after_change
8227
8228 #undef TARGET_PASS_BY_REFERENCE
8229 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8230
8231 #undef TARGET_PREFERRED_RELOAD_CLASS
8232 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8233
8234 #undef TARGET_SECONDARY_RELOAD
8235 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8236
8237 #undef TARGET_SHIFT_TRUNCATION_MASK
8238 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8239
8240 #undef TARGET_SETUP_INCOMING_VARARGS
8241 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8242
8243 #undef TARGET_STRUCT_VALUE_RTX
8244 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
8245
8246 #undef TARGET_REGISTER_MOVE_COST
8247 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8248
8249 #undef TARGET_RETURN_IN_MEMORY
8250 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8251
8252 #undef TARGET_RETURN_IN_MSB
8253 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8254
8255 #undef TARGET_RTX_COSTS
8256 #define TARGET_RTX_COSTS aarch64_rtx_costs
8257
8258 #undef TARGET_TRAMPOLINE_INIT
8259 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8260
8261 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8262 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8263
8264 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8265 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8266
8267 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8268 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8269
8270 #undef TARGET_VECTORIZE_ADD_STMT_COST
8271 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8272
8273 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8274 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8275 aarch64_builtin_vectorization_cost
8276
8277 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8278 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8279
8280 #undef TARGET_VECTORIZE_BUILTINS
8281 #define TARGET_VECTORIZE_BUILTINS
8282
8283 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8284 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8285 aarch64_builtin_vectorized_function
8286
8287 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8288 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8289 aarch64_autovectorize_vector_sizes
8290
8291 /* Section anchor support. */
8292
8293 #undef TARGET_MIN_ANCHOR_OFFSET
8294 #define TARGET_MIN_ANCHOR_OFFSET -256
8295
8296 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8297 byte offset; we can do much more for larger data types, but have no way
8298 to determine the size of the access. We assume accesses are aligned. */
8299 #undef TARGET_MAX_ANCHOR_OFFSET
8300 #define TARGET_MAX_ANCHOR_OFFSET 4095
8301
8302 #undef TARGET_VECTOR_ALIGNMENT
8303 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8304
8305 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8306 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8307 aarch64_simd_vector_alignment_reachable
8308
8309 /* vec_perm support. */
8310
8311 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8312 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8313 aarch64_vectorize_vec_perm_const_ok
8314
8315
8316 #undef TARGET_FIXED_CONDITION_CODE_REGS
8317 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8318
8319 struct gcc_target targetm = TARGET_INITIALIZER;
8320
8321 #include "gt-aarch64.h"