186db9dca7099032dabd85a74b3aa0d75ed66331
[gcc.git] / gcc / config / aarch64 / aarch64.c
1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "insn-codes.h"
26 #include "rtl.h"
27 #include "insn-attr.h"
28 #include "tree.h"
29 #include "stringpool.h"
30 #include "stor-layout.h"
31 #include "calls.h"
32 #include "varasm.h"
33 #include "regs.h"
34 #include "df.h"
35 #include "hard-reg-set.h"
36 #include "output.h"
37 #include "expr.h"
38 #include "reload.h"
39 #include "toplev.h"
40 #include "target.h"
41 #include "target-def.h"
42 #include "targhooks.h"
43 #include "ggc.h"
44 #include "function.h"
45 #include "tm_p.h"
46 #include "recog.h"
47 #include "langhooks.h"
48 #include "diagnostic-core.h"
49 #include "pointer-set.h"
50 #include "hash-table.h"
51 #include "vec.h"
52 #include "basic-block.h"
53 #include "tree-ssa-alias.h"
54 #include "internal-fn.h"
55 #include "gimple-fold.h"
56 #include "tree-eh.h"
57 #include "gimple-expr.h"
58 #include "is-a.h"
59 #include "gimple.h"
60 #include "gimplify.h"
61 #include "optabs.h"
62 #include "dwarf2.h"
63 #include "cfgloop.h"
64 #include "tree-vectorizer.h"
65 #include "config/arm/aarch-cost-tables.h"
66
67 /* Defined for convenience. */
68 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
69
70 /* Classifies an address.
71
72 ADDRESS_REG_IMM
73 A simple base register plus immediate offset.
74
75 ADDRESS_REG_WB
76 A base register indexed by immediate offset with writeback.
77
78 ADDRESS_REG_REG
79 A base register indexed by (optionally scaled) register.
80
81 ADDRESS_REG_UXTW
82 A base register indexed by (optionally scaled) zero-extended register.
83
84 ADDRESS_REG_SXTW
85 A base register indexed by (optionally scaled) sign-extended register.
86
87 ADDRESS_LO_SUM
88 A LO_SUM rtx with a base register and "LO12" symbol relocation.
89
90 ADDRESS_SYMBOLIC:
91 A constant symbolic address, in pc-relative literal pool. */
92
93 enum aarch64_address_type {
94 ADDRESS_REG_IMM,
95 ADDRESS_REG_WB,
96 ADDRESS_REG_REG,
97 ADDRESS_REG_UXTW,
98 ADDRESS_REG_SXTW,
99 ADDRESS_LO_SUM,
100 ADDRESS_SYMBOLIC
101 };
102
103 struct aarch64_address_info {
104 enum aarch64_address_type type;
105 rtx base;
106 rtx offset;
107 int shift;
108 enum aarch64_symbol_type symbol_type;
109 };
110
111 struct simd_immediate_info
112 {
113 rtx value;
114 int shift;
115 int element_width;
116 bool mvn;
117 bool msl;
118 };
119
120 /* The current code model. */
121 enum aarch64_code_model aarch64_cmodel;
122
123 #ifdef HAVE_AS_TLS
124 #undef TARGET_HAVE_TLS
125 #define TARGET_HAVE_TLS 1
126 #endif
127
128 static bool aarch64_lra_p (void);
129 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
130 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
131 const_tree,
132 enum machine_mode *, int *,
133 bool *);
134 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
135 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
136 static void aarch64_override_options_after_change (void);
137 static bool aarch64_vector_mode_supported_p (enum machine_mode);
138 static unsigned bit_count (unsigned HOST_WIDE_INT);
139 static bool aarch64_const_vec_all_same_int_p (rtx,
140 HOST_WIDE_INT, HOST_WIDE_INT);
141
142 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
143 const unsigned char *sel);
144
145 /* The processor for which instructions should be scheduled. */
146 enum aarch64_processor aarch64_tune = cortexa53;
147
148 /* The current tuning set. */
149 const struct tune_params *aarch64_tune_params;
150
151 /* Mask to specify which instructions we are allowed to generate. */
152 unsigned long aarch64_isa_flags = 0;
153
154 /* Mask to specify which instruction scheduling options should be used. */
155 unsigned long aarch64_tune_flags = 0;
156
157 /* Tuning parameters. */
158
159 #if HAVE_DESIGNATED_INITIALIZERS
160 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
161 #else
162 #define NAMED_PARAM(NAME, VAL) (VAL)
163 #endif
164
165 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
166 __extension__
167 #endif
168
169 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
170 __extension__
171 #endif
172 static const struct cpu_addrcost_table generic_addrcost_table =
173 {
174 NAMED_PARAM (pre_modify, 0),
175 NAMED_PARAM (post_modify, 0),
176 NAMED_PARAM (register_offset, 0),
177 NAMED_PARAM (register_extend, 0),
178 NAMED_PARAM (imm_offset, 0)
179 };
180
181 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
182 __extension__
183 #endif
184 static const struct cpu_regmove_cost generic_regmove_cost =
185 {
186 NAMED_PARAM (GP2GP, 1),
187 NAMED_PARAM (GP2FP, 2),
188 NAMED_PARAM (FP2GP, 2),
189 /* We currently do not provide direct support for TFmode Q->Q move.
190 Therefore we need to raise the cost above 2 in order to have
191 reload handle the situation. */
192 NAMED_PARAM (FP2FP, 4)
193 };
194
195 /* Generic costs for vector insn classes. */
196 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
197 __extension__
198 #endif
199 static const struct cpu_vector_cost generic_vector_cost =
200 {
201 NAMED_PARAM (scalar_stmt_cost, 1),
202 NAMED_PARAM (scalar_load_cost, 1),
203 NAMED_PARAM (scalar_store_cost, 1),
204 NAMED_PARAM (vec_stmt_cost, 1),
205 NAMED_PARAM (vec_to_scalar_cost, 1),
206 NAMED_PARAM (scalar_to_vec_cost, 1),
207 NAMED_PARAM (vec_align_load_cost, 1),
208 NAMED_PARAM (vec_unalign_load_cost, 1),
209 NAMED_PARAM (vec_unalign_store_cost, 1),
210 NAMED_PARAM (vec_store_cost, 1),
211 NAMED_PARAM (cond_taken_branch_cost, 3),
212 NAMED_PARAM (cond_not_taken_branch_cost, 1)
213 };
214
215 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
216 __extension__
217 #endif
218 static const struct tune_params generic_tunings =
219 {
220 &generic_extra_costs,
221 &generic_addrcost_table,
222 &generic_regmove_cost,
223 &generic_vector_cost,
224 NAMED_PARAM (memmov_cost, 4)
225 };
226
227 static const struct tune_params cortexa53_tunings =
228 {
229 &cortexa53_extra_costs,
230 &generic_addrcost_table,
231 &generic_regmove_cost,
232 &generic_vector_cost,
233 NAMED_PARAM (memmov_cost, 4)
234 };
235
236 /* A processor implementing AArch64. */
237 struct processor
238 {
239 const char *const name;
240 enum aarch64_processor core;
241 const char *arch;
242 const unsigned long flags;
243 const struct tune_params *const tune;
244 };
245
246 /* Processor cores implementing AArch64. */
247 static const struct processor all_cores[] =
248 {
249 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
250 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
251 #include "aarch64-cores.def"
252 #undef AARCH64_CORE
253 {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
254 {NULL, aarch64_none, NULL, 0, NULL}
255 };
256
257 /* Architectures implementing AArch64. */
258 static const struct processor all_architectures[] =
259 {
260 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
261 {NAME, CORE, #ARCH, FLAGS, NULL},
262 #include "aarch64-arches.def"
263 #undef AARCH64_ARCH
264 {NULL, aarch64_none, NULL, 0, NULL}
265 };
266
267 /* Target specification. These are populated as commandline arguments
268 are processed, or NULL if not specified. */
269 static const struct processor *selected_arch;
270 static const struct processor *selected_cpu;
271 static const struct processor *selected_tune;
272
273 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
274
275 /* An ISA extension in the co-processor and main instruction set space. */
276 struct aarch64_option_extension
277 {
278 const char *const name;
279 const unsigned long flags_on;
280 const unsigned long flags_off;
281 };
282
283 /* ISA extensions in AArch64. */
284 static const struct aarch64_option_extension all_extensions[] =
285 {
286 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
287 {NAME, FLAGS_ON, FLAGS_OFF},
288 #include "aarch64-option-extensions.def"
289 #undef AARCH64_OPT_EXTENSION
290 {NULL, 0, 0}
291 };
292
293 /* Used to track the size of an address when generating a pre/post
294 increment address. */
295 static enum machine_mode aarch64_memory_reference_mode;
296
297 /* Used to force GTY into this file. */
298 static GTY(()) int gty_dummy;
299
300 /* A table of valid AArch64 "bitmask immediate" values for
301 logical instructions. */
302
303 #define AARCH64_NUM_BITMASKS 5334
304 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
305
306 /* Did we set flag_omit_frame_pointer just so
307 aarch64_frame_pointer_required would be called? */
308 static bool faked_omit_frame_pointer;
309
310 typedef enum aarch64_cond_code
311 {
312 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
313 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
314 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
315 }
316 aarch64_cc;
317
318 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
319
320 /* The condition codes of the processor, and the inverse function. */
321 static const char * const aarch64_condition_codes[] =
322 {
323 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
324 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
325 };
326
327 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
328 unsigned
329 aarch64_dbx_register_number (unsigned regno)
330 {
331 if (GP_REGNUM_P (regno))
332 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
333 else if (regno == SP_REGNUM)
334 return AARCH64_DWARF_SP;
335 else if (FP_REGNUM_P (regno))
336 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
337
338 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
339 equivalent DWARF register. */
340 return DWARF_FRAME_REGISTERS;
341 }
342
343 /* Return TRUE if MODE is any of the large INT modes. */
344 static bool
345 aarch64_vect_struct_mode_p (enum machine_mode mode)
346 {
347 return mode == OImode || mode == CImode || mode == XImode;
348 }
349
350 /* Return TRUE if MODE is any of the vector modes. */
351 static bool
352 aarch64_vector_mode_p (enum machine_mode mode)
353 {
354 return aarch64_vector_mode_supported_p (mode)
355 || aarch64_vect_struct_mode_p (mode);
356 }
357
358 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
359 static bool
360 aarch64_array_mode_supported_p (enum machine_mode mode,
361 unsigned HOST_WIDE_INT nelems)
362 {
363 if (TARGET_SIMD
364 && AARCH64_VALID_SIMD_QREG_MODE (mode)
365 && (nelems >= 2 && nelems <= 4))
366 return true;
367
368 return false;
369 }
370
371 /* Implement HARD_REGNO_NREGS. */
372
373 int
374 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
375 {
376 switch (aarch64_regno_regclass (regno))
377 {
378 case FP_REGS:
379 case FP_LO_REGS:
380 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
381 default:
382 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
383 }
384 gcc_unreachable ();
385 }
386
387 /* Implement HARD_REGNO_MODE_OK. */
388
389 int
390 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
391 {
392 if (GET_MODE_CLASS (mode) == MODE_CC)
393 return regno == CC_REGNUM;
394
395 if (regno == SP_REGNUM)
396 /* The purpose of comparing with ptr_mode is to support the
397 global register variable associated with the stack pointer
398 register via the syntax of asm ("wsp") in ILP32. */
399 return mode == Pmode || mode == ptr_mode;
400
401 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
402 return mode == Pmode;
403
404 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
405 return 1;
406
407 if (FP_REGNUM_P (regno))
408 {
409 if (aarch64_vect_struct_mode_p (mode))
410 return
411 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
412 else
413 return 1;
414 }
415
416 return 0;
417 }
418
419 /* Return true if calls to DECL should be treated as
420 long-calls (ie called via a register). */
421 static bool
422 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
423 {
424 return false;
425 }
426
427 /* Return true if calls to symbol-ref SYM should be treated as
428 long-calls (ie called via a register). */
429 bool
430 aarch64_is_long_call_p (rtx sym)
431 {
432 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
433 }
434
435 /* Return true if the offsets to a zero/sign-extract operation
436 represent an expression that matches an extend operation. The
437 operands represent the paramters from
438
439 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
440 bool
441 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
442 rtx extract_imm)
443 {
444 HOST_WIDE_INT mult_val, extract_val;
445
446 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
447 return false;
448
449 mult_val = INTVAL (mult_imm);
450 extract_val = INTVAL (extract_imm);
451
452 if (extract_val > 8
453 && extract_val < GET_MODE_BITSIZE (mode)
454 && exact_log2 (extract_val & ~7) > 0
455 && (extract_val & 7) <= 4
456 && mult_val == (1 << (extract_val & 7)))
457 return true;
458
459 return false;
460 }
461
462 /* Emit an insn that's a simple single-set. Both the operands must be
463 known to be valid. */
464 inline static rtx
465 emit_set_insn (rtx x, rtx y)
466 {
467 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
468 }
469
470 /* X and Y are two things to compare using CODE. Emit the compare insn and
471 return the rtx for register 0 in the proper mode. */
472 rtx
473 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
474 {
475 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
476 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
477
478 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
479 return cc_reg;
480 }
481
482 /* Build the SYMBOL_REF for __tls_get_addr. */
483
484 static GTY(()) rtx tls_get_addr_libfunc;
485
486 rtx
487 aarch64_tls_get_addr (void)
488 {
489 if (!tls_get_addr_libfunc)
490 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
491 return tls_get_addr_libfunc;
492 }
493
494 /* Return the TLS model to use for ADDR. */
495
496 static enum tls_model
497 tls_symbolic_operand_type (rtx addr)
498 {
499 enum tls_model tls_kind = TLS_MODEL_NONE;
500 rtx sym, addend;
501
502 if (GET_CODE (addr) == CONST)
503 {
504 split_const (addr, &sym, &addend);
505 if (GET_CODE (sym) == SYMBOL_REF)
506 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
507 }
508 else if (GET_CODE (addr) == SYMBOL_REF)
509 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
510
511 return tls_kind;
512 }
513
514 /* We'll allow lo_sum's in addresses in our legitimate addresses
515 so that combine would take care of combining addresses where
516 necessary, but for generation purposes, we'll generate the address
517 as :
518 RTL Absolute
519 tmp = hi (symbol_ref); adrp x1, foo
520 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
521 nop
522
523 PIC TLS
524 adrp x1, :got:foo adrp tmp, :tlsgd:foo
525 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
526 bl __tls_get_addr
527 nop
528
529 Load TLS symbol, depending on TLS mechanism and TLS access model.
530
531 Global Dynamic - Traditional TLS:
532 adrp tmp, :tlsgd:imm
533 add dest, tmp, #:tlsgd_lo12:imm
534 bl __tls_get_addr
535
536 Global Dynamic - TLS Descriptors:
537 adrp dest, :tlsdesc:imm
538 ldr tmp, [dest, #:tlsdesc_lo12:imm]
539 add dest, dest, #:tlsdesc_lo12:imm
540 blr tmp
541 mrs tp, tpidr_el0
542 add dest, dest, tp
543
544 Initial Exec:
545 mrs tp, tpidr_el0
546 adrp tmp, :gottprel:imm
547 ldr dest, [tmp, #:gottprel_lo12:imm]
548 add dest, dest, tp
549
550 Local Exec:
551 mrs tp, tpidr_el0
552 add t0, tp, #:tprel_hi12:imm
553 add t0, #:tprel_lo12_nc:imm
554 */
555
556 static void
557 aarch64_load_symref_appropriately (rtx dest, rtx imm,
558 enum aarch64_symbol_type type)
559 {
560 switch (type)
561 {
562 case SYMBOL_SMALL_ABSOLUTE:
563 {
564 /* In ILP32, the mode of dest can be either SImode or DImode. */
565 rtx tmp_reg = dest;
566 enum machine_mode mode = GET_MODE (dest);
567
568 gcc_assert (mode == Pmode || mode == ptr_mode);
569
570 if (can_create_pseudo_p ())
571 tmp_reg = gen_reg_rtx (mode);
572
573 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
574 emit_insn (gen_add_losym (dest, tmp_reg, imm));
575 return;
576 }
577
578 case SYMBOL_TINY_ABSOLUTE:
579 emit_insn (gen_rtx_SET (Pmode, dest, imm));
580 return;
581
582 case SYMBOL_SMALL_GOT:
583 {
584 /* In ILP32, the mode of dest can be either SImode or DImode,
585 while the got entry is always of SImode size. The mode of
586 dest depends on how dest is used: if dest is assigned to a
587 pointer (e.g. in the memory), it has SImode; it may have
588 DImode if dest is dereferenced to access the memeory.
589 This is why we have to handle three different ldr_got_small
590 patterns here (two patterns for ILP32). */
591 rtx tmp_reg = dest;
592 enum machine_mode mode = GET_MODE (dest);
593
594 if (can_create_pseudo_p ())
595 tmp_reg = gen_reg_rtx (mode);
596
597 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
598 if (mode == ptr_mode)
599 {
600 if (mode == DImode)
601 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
602 else
603 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
604 }
605 else
606 {
607 gcc_assert (mode == Pmode);
608 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
609 }
610
611 return;
612 }
613
614 case SYMBOL_SMALL_TLSGD:
615 {
616 rtx insns;
617 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
618
619 start_sequence ();
620 emit_call_insn (gen_tlsgd_small (result, imm));
621 insns = get_insns ();
622 end_sequence ();
623
624 RTL_CONST_CALL_P (insns) = 1;
625 emit_libcall_block (insns, dest, result, imm);
626 return;
627 }
628
629 case SYMBOL_SMALL_TLSDESC:
630 {
631 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
632 rtx tp;
633
634 emit_insn (gen_tlsdesc_small (imm));
635 tp = aarch64_load_tp (NULL);
636 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
637 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
638 return;
639 }
640
641 case SYMBOL_SMALL_GOTTPREL:
642 {
643 rtx tmp_reg = gen_reg_rtx (Pmode);
644 rtx tp = aarch64_load_tp (NULL);
645 emit_insn (gen_tlsie_small (tmp_reg, imm));
646 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
647 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
648 return;
649 }
650
651 case SYMBOL_SMALL_TPREL:
652 {
653 rtx tp = aarch64_load_tp (NULL);
654 emit_insn (gen_tlsle_small (dest, tp, imm));
655 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
656 return;
657 }
658
659 case SYMBOL_TINY_GOT:
660 emit_insn (gen_ldr_got_tiny (dest, imm));
661 return;
662
663 default:
664 gcc_unreachable ();
665 }
666 }
667
668 /* Emit a move from SRC to DEST. Assume that the move expanders can
669 handle all moves if !can_create_pseudo_p (). The distinction is
670 important because, unlike emit_move_insn, the move expanders know
671 how to force Pmode objects into the constant pool even when the
672 constant pool address is not itself legitimate. */
673 static rtx
674 aarch64_emit_move (rtx dest, rtx src)
675 {
676 return (can_create_pseudo_p ()
677 ? emit_move_insn (dest, src)
678 : emit_move_insn_1 (dest, src));
679 }
680
681 void
682 aarch64_split_128bit_move (rtx dst, rtx src)
683 {
684 rtx low_dst;
685
686 enum machine_mode src_mode = GET_MODE (src);
687 enum machine_mode dst_mode = GET_MODE (dst);
688 int src_regno = REGNO (src);
689 int dst_regno = REGNO (dst);
690
691 gcc_assert (dst_mode == TImode || dst_mode == TFmode);
692
693 if (REG_P (dst) && REG_P (src))
694 {
695 gcc_assert (src_mode == TImode || src_mode == TFmode);
696
697 /* Handle r -> w, w -> r. */
698 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
699 {
700 switch (src_mode) {
701 case TImode:
702 emit_insn
703 (gen_aarch64_movtilow_di (dst, gen_lowpart (word_mode, src)));
704 emit_insn
705 (gen_aarch64_movtihigh_di (dst, gen_highpart (word_mode, src)));
706 return;
707 case TFmode:
708 emit_insn
709 (gen_aarch64_movtflow_di (dst, gen_lowpart (word_mode, src)));
710 emit_insn
711 (gen_aarch64_movtfhigh_di (dst, gen_highpart (word_mode, src)));
712 return;
713 default:
714 gcc_unreachable ();
715 }
716 }
717 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
718 {
719 switch (src_mode) {
720 case TImode:
721 emit_insn
722 (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst), src));
723 emit_insn
724 (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst), src));
725 return;
726 case TFmode:
727 emit_insn
728 (gen_aarch64_movdi_tflow (gen_lowpart (word_mode, dst), src));
729 emit_insn
730 (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode, dst), src));
731 return;
732 default:
733 gcc_unreachable ();
734 }
735 }
736 /* Fall through to r -> r cases. */
737 }
738
739 switch (dst_mode) {
740 case TImode:
741 low_dst = gen_lowpart (word_mode, dst);
742 if (REG_P (low_dst)
743 && reg_overlap_mentioned_p (low_dst, src))
744 {
745 aarch64_emit_move (gen_highpart (word_mode, dst),
746 gen_highpart_mode (word_mode, TImode, src));
747 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
748 }
749 else
750 {
751 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
752 aarch64_emit_move (gen_highpart (word_mode, dst),
753 gen_highpart_mode (word_mode, TImode, src));
754 }
755 return;
756 case TFmode:
757 emit_move_insn (gen_rtx_REG (DFmode, dst_regno),
758 gen_rtx_REG (DFmode, src_regno));
759 emit_move_insn (gen_rtx_REG (DFmode, dst_regno + 1),
760 gen_rtx_REG (DFmode, src_regno + 1));
761 return;
762 default:
763 gcc_unreachable ();
764 }
765 }
766
767 bool
768 aarch64_split_128bit_move_p (rtx dst, rtx src)
769 {
770 return (! REG_P (src)
771 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
772 }
773
774 /* Split a complex SIMD combine. */
775
776 void
777 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
778 {
779 enum machine_mode src_mode = GET_MODE (src1);
780 enum machine_mode dst_mode = GET_MODE (dst);
781
782 gcc_assert (VECTOR_MODE_P (dst_mode));
783
784 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
785 {
786 rtx (*gen) (rtx, rtx, rtx);
787
788 switch (src_mode)
789 {
790 case V8QImode:
791 gen = gen_aarch64_simd_combinev8qi;
792 break;
793 case V4HImode:
794 gen = gen_aarch64_simd_combinev4hi;
795 break;
796 case V2SImode:
797 gen = gen_aarch64_simd_combinev2si;
798 break;
799 case V2SFmode:
800 gen = gen_aarch64_simd_combinev2sf;
801 break;
802 case DImode:
803 gen = gen_aarch64_simd_combinedi;
804 break;
805 case DFmode:
806 gen = gen_aarch64_simd_combinedf;
807 break;
808 default:
809 gcc_unreachable ();
810 }
811
812 emit_insn (gen (dst, src1, src2));
813 return;
814 }
815 }
816
817 /* Split a complex SIMD move. */
818
819 void
820 aarch64_split_simd_move (rtx dst, rtx src)
821 {
822 enum machine_mode src_mode = GET_MODE (src);
823 enum machine_mode dst_mode = GET_MODE (dst);
824
825 gcc_assert (VECTOR_MODE_P (dst_mode));
826
827 if (REG_P (dst) && REG_P (src))
828 {
829 rtx (*gen) (rtx, rtx);
830
831 gcc_assert (VECTOR_MODE_P (src_mode));
832
833 switch (src_mode)
834 {
835 case V16QImode:
836 gen = gen_aarch64_split_simd_movv16qi;
837 break;
838 case V8HImode:
839 gen = gen_aarch64_split_simd_movv8hi;
840 break;
841 case V4SImode:
842 gen = gen_aarch64_split_simd_movv4si;
843 break;
844 case V2DImode:
845 gen = gen_aarch64_split_simd_movv2di;
846 break;
847 case V4SFmode:
848 gen = gen_aarch64_split_simd_movv4sf;
849 break;
850 case V2DFmode:
851 gen = gen_aarch64_split_simd_movv2df;
852 break;
853 default:
854 gcc_unreachable ();
855 }
856
857 emit_insn (gen (dst, src));
858 return;
859 }
860 }
861
862 static rtx
863 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
864 {
865 if (can_create_pseudo_p ())
866 return force_reg (mode, value);
867 else
868 {
869 x = aarch64_emit_move (x, value);
870 return x;
871 }
872 }
873
874
875 static rtx
876 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
877 {
878 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
879 {
880 rtx high;
881 /* Load the full offset into a register. This
882 might be improvable in the future. */
883 high = GEN_INT (offset);
884 offset = 0;
885 high = aarch64_force_temporary (mode, temp, high);
886 reg = aarch64_force_temporary (mode, temp,
887 gen_rtx_PLUS (mode, high, reg));
888 }
889 return plus_constant (mode, reg, offset);
890 }
891
892 void
893 aarch64_expand_mov_immediate (rtx dest, rtx imm)
894 {
895 enum machine_mode mode = GET_MODE (dest);
896 unsigned HOST_WIDE_INT mask;
897 int i;
898 bool first;
899 unsigned HOST_WIDE_INT val;
900 bool subtargets;
901 rtx subtarget;
902 int one_match, zero_match;
903
904 gcc_assert (mode == SImode || mode == DImode);
905
906 /* Check on what type of symbol it is. */
907 if (GET_CODE (imm) == SYMBOL_REF
908 || GET_CODE (imm) == LABEL_REF
909 || GET_CODE (imm) == CONST)
910 {
911 rtx mem, base, offset;
912 enum aarch64_symbol_type sty;
913
914 /* If we have (const (plus symbol offset)), separate out the offset
915 before we start classifying the symbol. */
916 split_const (imm, &base, &offset);
917
918 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
919 switch (sty)
920 {
921 case SYMBOL_FORCE_TO_MEM:
922 if (offset != const0_rtx
923 && targetm.cannot_force_const_mem (mode, imm))
924 {
925 gcc_assert(can_create_pseudo_p ());
926 base = aarch64_force_temporary (mode, dest, base);
927 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
928 aarch64_emit_move (dest, base);
929 return;
930 }
931 mem = force_const_mem (ptr_mode, imm);
932 gcc_assert (mem);
933 if (mode != ptr_mode)
934 mem = gen_rtx_ZERO_EXTEND (mode, mem);
935 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
936 return;
937
938 case SYMBOL_SMALL_TLSGD:
939 case SYMBOL_SMALL_TLSDESC:
940 case SYMBOL_SMALL_GOTTPREL:
941 case SYMBOL_SMALL_GOT:
942 case SYMBOL_TINY_GOT:
943 if (offset != const0_rtx)
944 {
945 gcc_assert(can_create_pseudo_p ());
946 base = aarch64_force_temporary (mode, dest, base);
947 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
948 aarch64_emit_move (dest, base);
949 return;
950 }
951 /* FALLTHRU */
952
953 case SYMBOL_SMALL_TPREL:
954 case SYMBOL_SMALL_ABSOLUTE:
955 case SYMBOL_TINY_ABSOLUTE:
956 aarch64_load_symref_appropriately (dest, imm, sty);
957 return;
958
959 default:
960 gcc_unreachable ();
961 }
962 }
963
964 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
965 {
966 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
967 return;
968 }
969
970 if (!CONST_INT_P (imm))
971 {
972 if (GET_CODE (imm) == HIGH)
973 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
974 else
975 {
976 rtx mem = force_const_mem (mode, imm);
977 gcc_assert (mem);
978 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
979 }
980
981 return;
982 }
983
984 if (mode == SImode)
985 {
986 /* We know we can't do this in 1 insn, and we must be able to do it
987 in two; so don't mess around looking for sequences that don't buy
988 us anything. */
989 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
990 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
991 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
992 return;
993 }
994
995 /* Remaining cases are all for DImode. */
996
997 val = INTVAL (imm);
998 subtargets = optimize && can_create_pseudo_p ();
999
1000 one_match = 0;
1001 zero_match = 0;
1002 mask = 0xffff;
1003
1004 for (i = 0; i < 64; i += 16, mask <<= 16)
1005 {
1006 if ((val & mask) == 0)
1007 zero_match++;
1008 else if ((val & mask) == mask)
1009 one_match++;
1010 }
1011
1012 if (one_match == 2)
1013 {
1014 mask = 0xffff;
1015 for (i = 0; i < 64; i += 16, mask <<= 16)
1016 {
1017 if ((val & mask) != mask)
1018 {
1019 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1020 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1021 GEN_INT ((val >> i) & 0xffff)));
1022 return;
1023 }
1024 }
1025 gcc_unreachable ();
1026 }
1027
1028 if (zero_match == 2)
1029 goto simple_sequence;
1030
1031 mask = 0x0ffff0000UL;
1032 for (i = 16; i < 64; i += 16, mask <<= 16)
1033 {
1034 HOST_WIDE_INT comp = mask & ~(mask - 1);
1035
1036 if (aarch64_uimm12_shift (val - (val & mask)))
1037 {
1038 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1039
1040 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1041 emit_insn (gen_adddi3 (dest, subtarget,
1042 GEN_INT (val - (val & mask))));
1043 return;
1044 }
1045 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1046 {
1047 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1048
1049 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1050 GEN_INT ((val + comp) & mask)));
1051 emit_insn (gen_adddi3 (dest, subtarget,
1052 GEN_INT (val - ((val + comp) & mask))));
1053 return;
1054 }
1055 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1056 {
1057 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1058
1059 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1060 GEN_INT ((val - comp) | ~mask)));
1061 emit_insn (gen_adddi3 (dest, subtarget,
1062 GEN_INT (val - ((val - comp) | ~mask))));
1063 return;
1064 }
1065 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1066 {
1067 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1068
1069 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1070 GEN_INT (val | ~mask)));
1071 emit_insn (gen_adddi3 (dest, subtarget,
1072 GEN_INT (val - (val | ~mask))));
1073 return;
1074 }
1075 }
1076
1077 /* See if we can do it by arithmetically combining two
1078 immediates. */
1079 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1080 {
1081 int j;
1082 mask = 0xffff;
1083
1084 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1085 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1086 {
1087 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1088 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1089 GEN_INT (aarch64_bitmasks[i])));
1090 emit_insn (gen_adddi3 (dest, subtarget,
1091 GEN_INT (val - aarch64_bitmasks[i])));
1092 return;
1093 }
1094
1095 for (j = 0; j < 64; j += 16, mask <<= 16)
1096 {
1097 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1098 {
1099 emit_insn (gen_rtx_SET (VOIDmode, dest,
1100 GEN_INT (aarch64_bitmasks[i])));
1101 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1102 GEN_INT ((val >> j) & 0xffff)));
1103 return;
1104 }
1105 }
1106 }
1107
1108 /* See if we can do it by logically combining two immediates. */
1109 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1110 {
1111 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1112 {
1113 int j;
1114
1115 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1116 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1117 {
1118 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1119 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1120 GEN_INT (aarch64_bitmasks[i])));
1121 emit_insn (gen_iordi3 (dest, subtarget,
1122 GEN_INT (aarch64_bitmasks[j])));
1123 return;
1124 }
1125 }
1126 else if ((val & aarch64_bitmasks[i]) == val)
1127 {
1128 int j;
1129
1130 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1131 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1132 {
1133
1134 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1135 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1136 GEN_INT (aarch64_bitmasks[j])));
1137 emit_insn (gen_anddi3 (dest, subtarget,
1138 GEN_INT (aarch64_bitmasks[i])));
1139 return;
1140 }
1141 }
1142 }
1143
1144 simple_sequence:
1145 first = true;
1146 mask = 0xffff;
1147 for (i = 0; i < 64; i += 16, mask <<= 16)
1148 {
1149 if ((val & mask) != 0)
1150 {
1151 if (first)
1152 {
1153 emit_insn (gen_rtx_SET (VOIDmode, dest,
1154 GEN_INT (val & mask)));
1155 first = false;
1156 }
1157 else
1158 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1159 GEN_INT ((val >> i) & 0xffff)));
1160 }
1161 }
1162 }
1163
1164 static bool
1165 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1166 {
1167 /* Indirect calls are not currently supported. */
1168 if (decl == NULL)
1169 return false;
1170
1171 /* Cannot tail-call to long-calls, since these are outside of the
1172 range of a branch instruction (we could handle this if we added
1173 support for indirect tail-calls. */
1174 if (aarch64_decl_is_long_call_p (decl))
1175 return false;
1176
1177 return true;
1178 }
1179
1180 /* Implement TARGET_PASS_BY_REFERENCE. */
1181
1182 static bool
1183 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1184 enum machine_mode mode,
1185 const_tree type,
1186 bool named ATTRIBUTE_UNUSED)
1187 {
1188 HOST_WIDE_INT size;
1189 enum machine_mode dummymode;
1190 int nregs;
1191
1192 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1193 size = (mode == BLKmode && type)
1194 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1195
1196 if (type)
1197 {
1198 /* Arrays always passed by reference. */
1199 if (TREE_CODE (type) == ARRAY_TYPE)
1200 return true;
1201 /* Other aggregates based on their size. */
1202 if (AGGREGATE_TYPE_P (type))
1203 size = int_size_in_bytes (type);
1204 }
1205
1206 /* Variable sized arguments are always returned by reference. */
1207 if (size < 0)
1208 return true;
1209
1210 /* Can this be a candidate to be passed in fp/simd register(s)? */
1211 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1212 &dummymode, &nregs,
1213 NULL))
1214 return false;
1215
1216 /* Arguments which are variable sized or larger than 2 registers are
1217 passed by reference unless they are a homogenous floating point
1218 aggregate. */
1219 return size > 2 * UNITS_PER_WORD;
1220 }
1221
1222 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1223 static bool
1224 aarch64_return_in_msb (const_tree valtype)
1225 {
1226 enum machine_mode dummy_mode;
1227 int dummy_int;
1228
1229 /* Never happens in little-endian mode. */
1230 if (!BYTES_BIG_ENDIAN)
1231 return false;
1232
1233 /* Only composite types smaller than or equal to 16 bytes can
1234 be potentially returned in registers. */
1235 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1236 || int_size_in_bytes (valtype) <= 0
1237 || int_size_in_bytes (valtype) > 16)
1238 return false;
1239
1240 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1241 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1242 is always passed/returned in the least significant bits of fp/simd
1243 register(s). */
1244 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1245 &dummy_mode, &dummy_int, NULL))
1246 return false;
1247
1248 return true;
1249 }
1250
1251 /* Implement TARGET_FUNCTION_VALUE.
1252 Define how to find the value returned by a function. */
1253
1254 static rtx
1255 aarch64_function_value (const_tree type, const_tree func,
1256 bool outgoing ATTRIBUTE_UNUSED)
1257 {
1258 enum machine_mode mode;
1259 int unsignedp;
1260 int count;
1261 enum machine_mode ag_mode;
1262
1263 mode = TYPE_MODE (type);
1264 if (INTEGRAL_TYPE_P (type))
1265 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1266
1267 if (aarch64_return_in_msb (type))
1268 {
1269 HOST_WIDE_INT size = int_size_in_bytes (type);
1270
1271 if (size % UNITS_PER_WORD != 0)
1272 {
1273 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1274 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1275 }
1276 }
1277
1278 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1279 &ag_mode, &count, NULL))
1280 {
1281 if (!aarch64_composite_type_p (type, mode))
1282 {
1283 gcc_assert (count == 1 && mode == ag_mode);
1284 return gen_rtx_REG (mode, V0_REGNUM);
1285 }
1286 else
1287 {
1288 int i;
1289 rtx par;
1290
1291 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1292 for (i = 0; i < count; i++)
1293 {
1294 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1295 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1296 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1297 XVECEXP (par, 0, i) = tmp;
1298 }
1299 return par;
1300 }
1301 }
1302 else
1303 return gen_rtx_REG (mode, R0_REGNUM);
1304 }
1305
1306 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1307 Return true if REGNO is the number of a hard register in which the values
1308 of called function may come back. */
1309
1310 static bool
1311 aarch64_function_value_regno_p (const unsigned int regno)
1312 {
1313 /* Maximum of 16 bytes can be returned in the general registers. Examples
1314 of 16-byte return values are: 128-bit integers and 16-byte small
1315 structures (excluding homogeneous floating-point aggregates). */
1316 if (regno == R0_REGNUM || regno == R1_REGNUM)
1317 return true;
1318
1319 /* Up to four fp/simd registers can return a function value, e.g. a
1320 homogeneous floating-point aggregate having four members. */
1321 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1322 return !TARGET_GENERAL_REGS_ONLY;
1323
1324 return false;
1325 }
1326
1327 /* Implement TARGET_RETURN_IN_MEMORY.
1328
1329 If the type T of the result of a function is such that
1330 void func (T arg)
1331 would require that arg be passed as a value in a register (or set of
1332 registers) according to the parameter passing rules, then the result
1333 is returned in the same registers as would be used for such an
1334 argument. */
1335
1336 static bool
1337 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1338 {
1339 HOST_WIDE_INT size;
1340 enum machine_mode ag_mode;
1341 int count;
1342
1343 if (!AGGREGATE_TYPE_P (type)
1344 && TREE_CODE (type) != COMPLEX_TYPE
1345 && TREE_CODE (type) != VECTOR_TYPE)
1346 /* Simple scalar types always returned in registers. */
1347 return false;
1348
1349 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1350 type,
1351 &ag_mode,
1352 &count,
1353 NULL))
1354 return false;
1355
1356 /* Types larger than 2 registers returned in memory. */
1357 size = int_size_in_bytes (type);
1358 return (size < 0 || size > 2 * UNITS_PER_WORD);
1359 }
1360
1361 static bool
1362 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1363 const_tree type, int *nregs)
1364 {
1365 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1366 return aarch64_vfp_is_call_or_return_candidate (mode,
1367 type,
1368 &pcum->aapcs_vfp_rmode,
1369 nregs,
1370 NULL);
1371 }
1372
1373 /* Given MODE and TYPE of a function argument, return the alignment in
1374 bits. The idea is to suppress any stronger alignment requested by
1375 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1376 This is a helper function for local use only. */
1377
1378 static unsigned int
1379 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1380 {
1381 unsigned int alignment;
1382
1383 if (type)
1384 {
1385 if (!integer_zerop (TYPE_SIZE (type)))
1386 {
1387 if (TYPE_MODE (type) == mode)
1388 alignment = TYPE_ALIGN (type);
1389 else
1390 alignment = GET_MODE_ALIGNMENT (mode);
1391 }
1392 else
1393 alignment = 0;
1394 }
1395 else
1396 alignment = GET_MODE_ALIGNMENT (mode);
1397
1398 return alignment;
1399 }
1400
1401 /* Layout a function argument according to the AAPCS64 rules. The rule
1402 numbers refer to the rule numbers in the AAPCS64. */
1403
1404 static void
1405 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1406 const_tree type,
1407 bool named ATTRIBUTE_UNUSED)
1408 {
1409 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1410 int ncrn, nvrn, nregs;
1411 bool allocate_ncrn, allocate_nvrn;
1412
1413 /* We need to do this once per argument. */
1414 if (pcum->aapcs_arg_processed)
1415 return;
1416
1417 pcum->aapcs_arg_processed = true;
1418
1419 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1420 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1421 mode,
1422 type,
1423 &nregs);
1424
1425 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1426 The following code thus handles passing by SIMD/FP registers first. */
1427
1428 nvrn = pcum->aapcs_nvrn;
1429
1430 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1431 and homogenous short-vector aggregates (HVA). */
1432 if (allocate_nvrn)
1433 {
1434 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1435 {
1436 pcum->aapcs_nextnvrn = nvrn + nregs;
1437 if (!aarch64_composite_type_p (type, mode))
1438 {
1439 gcc_assert (nregs == 1);
1440 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1441 }
1442 else
1443 {
1444 rtx par;
1445 int i;
1446 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1447 for (i = 0; i < nregs; i++)
1448 {
1449 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1450 V0_REGNUM + nvrn + i);
1451 tmp = gen_rtx_EXPR_LIST
1452 (VOIDmode, tmp,
1453 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1454 XVECEXP (par, 0, i) = tmp;
1455 }
1456 pcum->aapcs_reg = par;
1457 }
1458 return;
1459 }
1460 else
1461 {
1462 /* C.3 NSRN is set to 8. */
1463 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1464 goto on_stack;
1465 }
1466 }
1467
1468 ncrn = pcum->aapcs_ncrn;
1469 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1470 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1471
1472
1473 /* C6 - C9. though the sign and zero extension semantics are
1474 handled elsewhere. This is the case where the argument fits
1475 entirely general registers. */
1476 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1477 {
1478 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1479
1480 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1481
1482 /* C.8 if the argument has an alignment of 16 then the NGRN is
1483 rounded up to the next even number. */
1484 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1485 {
1486 ++ncrn;
1487 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1488 }
1489 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1490 A reg is still generated for it, but the caller should be smart
1491 enough not to use it. */
1492 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1493 {
1494 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1495 }
1496 else
1497 {
1498 rtx par;
1499 int i;
1500
1501 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1502 for (i = 0; i < nregs; i++)
1503 {
1504 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1505 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1506 GEN_INT (i * UNITS_PER_WORD));
1507 XVECEXP (par, 0, i) = tmp;
1508 }
1509 pcum->aapcs_reg = par;
1510 }
1511
1512 pcum->aapcs_nextncrn = ncrn + nregs;
1513 return;
1514 }
1515
1516 /* C.11 */
1517 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1518
1519 /* The argument is passed on stack; record the needed number of words for
1520 this argument (we can re-use NREGS) and align the total size if
1521 necessary. */
1522 on_stack:
1523 pcum->aapcs_stack_words = nregs;
1524 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1525 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1526 16 / UNITS_PER_WORD) + 1;
1527 return;
1528 }
1529
1530 /* Implement TARGET_FUNCTION_ARG. */
1531
1532 static rtx
1533 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1534 const_tree type, bool named)
1535 {
1536 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1537 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1538
1539 if (mode == VOIDmode)
1540 return NULL_RTX;
1541
1542 aarch64_layout_arg (pcum_v, mode, type, named);
1543 return pcum->aapcs_reg;
1544 }
1545
1546 void
1547 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1548 const_tree fntype ATTRIBUTE_UNUSED,
1549 rtx libname ATTRIBUTE_UNUSED,
1550 const_tree fndecl ATTRIBUTE_UNUSED,
1551 unsigned n_named ATTRIBUTE_UNUSED)
1552 {
1553 pcum->aapcs_ncrn = 0;
1554 pcum->aapcs_nvrn = 0;
1555 pcum->aapcs_nextncrn = 0;
1556 pcum->aapcs_nextnvrn = 0;
1557 pcum->pcs_variant = ARM_PCS_AAPCS64;
1558 pcum->aapcs_reg = NULL_RTX;
1559 pcum->aapcs_arg_processed = false;
1560 pcum->aapcs_stack_words = 0;
1561 pcum->aapcs_stack_size = 0;
1562
1563 return;
1564 }
1565
1566 static void
1567 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1568 enum machine_mode mode,
1569 const_tree type,
1570 bool named)
1571 {
1572 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1573 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1574 {
1575 aarch64_layout_arg (pcum_v, mode, type, named);
1576 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1577 != (pcum->aapcs_stack_words != 0));
1578 pcum->aapcs_arg_processed = false;
1579 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1580 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1581 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1582 pcum->aapcs_stack_words = 0;
1583 pcum->aapcs_reg = NULL_RTX;
1584 }
1585 }
1586
1587 bool
1588 aarch64_function_arg_regno_p (unsigned regno)
1589 {
1590 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1591 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1592 }
1593
1594 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1595 PARM_BOUNDARY bits of alignment, but will be given anything up
1596 to STACK_BOUNDARY bits if the type requires it. This makes sure
1597 that both before and after the layout of each argument, the Next
1598 Stacked Argument Address (NSAA) will have a minimum alignment of
1599 8 bytes. */
1600
1601 static unsigned int
1602 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1603 {
1604 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1605
1606 if (alignment < PARM_BOUNDARY)
1607 alignment = PARM_BOUNDARY;
1608 if (alignment > STACK_BOUNDARY)
1609 alignment = STACK_BOUNDARY;
1610 return alignment;
1611 }
1612
1613 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1614
1615 Return true if an argument passed on the stack should be padded upwards,
1616 i.e. if the least-significant byte of the stack slot has useful data.
1617
1618 Small aggregate types are placed in the lowest memory address.
1619
1620 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1621
1622 bool
1623 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1624 {
1625 /* On little-endian targets, the least significant byte of every stack
1626 argument is passed at the lowest byte address of the stack slot. */
1627 if (!BYTES_BIG_ENDIAN)
1628 return true;
1629
1630 /* Otherwise, integral, floating-point and pointer types are padded downward:
1631 the least significant byte of a stack argument is passed at the highest
1632 byte address of the stack slot. */
1633 if (type
1634 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1635 || POINTER_TYPE_P (type))
1636 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1637 return false;
1638
1639 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1640 return true;
1641 }
1642
1643 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1644
1645 It specifies padding for the last (may also be the only)
1646 element of a block move between registers and memory. If
1647 assuming the block is in the memory, padding upward means that
1648 the last element is padded after its highest significant byte,
1649 while in downward padding, the last element is padded at the
1650 its least significant byte side.
1651
1652 Small aggregates and small complex types are always padded
1653 upwards.
1654
1655 We don't need to worry about homogeneous floating-point or
1656 short-vector aggregates; their move is not affected by the
1657 padding direction determined here. Regardless of endianness,
1658 each element of such an aggregate is put in the least
1659 significant bits of a fp/simd register.
1660
1661 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1662 register has useful data, and return the opposite if the most
1663 significant byte does. */
1664
1665 bool
1666 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1667 bool first ATTRIBUTE_UNUSED)
1668 {
1669
1670 /* Small composite types are always padded upward. */
1671 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1672 {
1673 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1674 : GET_MODE_SIZE (mode));
1675 if (size < 2 * UNITS_PER_WORD)
1676 return true;
1677 }
1678
1679 /* Otherwise, use the default padding. */
1680 return !BYTES_BIG_ENDIAN;
1681 }
1682
1683 static enum machine_mode
1684 aarch64_libgcc_cmp_return_mode (void)
1685 {
1686 return SImode;
1687 }
1688
1689 static bool
1690 aarch64_frame_pointer_required (void)
1691 {
1692 /* If the function contains dynamic stack allocations, we need to
1693 use the frame pointer to access the static parts of the frame. */
1694 if (cfun->calls_alloca)
1695 return true;
1696
1697 /* We may have turned flag_omit_frame_pointer on in order to have this
1698 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1699 and we'll check it here.
1700 If we really did set flag_omit_frame_pointer normally, then we return false
1701 (no frame pointer required) in all cases. */
1702
1703 if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1704 return false;
1705 else if (flag_omit_leaf_frame_pointer)
1706 return !crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM);
1707 return true;
1708 }
1709
1710 /* Mark the registers that need to be saved by the callee and calculate
1711 the size of the callee-saved registers area and frame record (both FP
1712 and LR may be omitted). */
1713 static void
1714 aarch64_layout_frame (void)
1715 {
1716 HOST_WIDE_INT offset = 0;
1717 int regno;
1718
1719 if (reload_completed && cfun->machine->frame.laid_out)
1720 return;
1721
1722 cfun->machine->frame.fp_lr_offset = 0;
1723
1724 /* First mark all the registers that really need to be saved... */
1725 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1726 cfun->machine->frame.reg_offset[regno] = -1;
1727
1728 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1729 cfun->machine->frame.reg_offset[regno] = -1;
1730
1731 /* ... that includes the eh data registers (if needed)... */
1732 if (crtl->calls_eh_return)
1733 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1734 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1735
1736 /* ... and any callee saved register that dataflow says is live. */
1737 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1738 if (df_regs_ever_live_p (regno)
1739 && !call_used_regs[regno])
1740 cfun->machine->frame.reg_offset[regno] = 0;
1741
1742 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1743 if (df_regs_ever_live_p (regno)
1744 && !call_used_regs[regno])
1745 cfun->machine->frame.reg_offset[regno] = 0;
1746
1747 if (frame_pointer_needed)
1748 {
1749 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1750 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1751 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1752 }
1753
1754 /* Now assign stack slots for them. */
1755 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1756 if (cfun->machine->frame.reg_offset[regno] != -1)
1757 {
1758 cfun->machine->frame.reg_offset[regno] = offset;
1759 offset += UNITS_PER_WORD;
1760 }
1761
1762 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1763 if (cfun->machine->frame.reg_offset[regno] != -1)
1764 {
1765 cfun->machine->frame.reg_offset[regno] = offset;
1766 offset += UNITS_PER_WORD;
1767 }
1768
1769 if (frame_pointer_needed)
1770 {
1771 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1772 offset += UNITS_PER_WORD;
1773 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1774 }
1775
1776 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1777 {
1778 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1779 offset += UNITS_PER_WORD;
1780 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1781 }
1782
1783 cfun->machine->frame.padding0 =
1784 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1785 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1786
1787 cfun->machine->frame.saved_regs_size = offset;
1788 cfun->machine->frame.laid_out = true;
1789 }
1790
1791 /* Make the last instruction frame-related and note that it performs
1792 the operation described by FRAME_PATTERN. */
1793
1794 static void
1795 aarch64_set_frame_expr (rtx frame_pattern)
1796 {
1797 rtx insn;
1798
1799 insn = get_last_insn ();
1800 RTX_FRAME_RELATED_P (insn) = 1;
1801 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1802 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1803 frame_pattern,
1804 REG_NOTES (insn));
1805 }
1806
1807 static bool
1808 aarch64_register_saved_on_entry (int regno)
1809 {
1810 return cfun->machine->frame.reg_offset[regno] != -1;
1811 }
1812
1813
1814 static void
1815 aarch64_save_or_restore_fprs (int start_offset, int increment,
1816 bool restore, rtx base_rtx)
1817
1818 {
1819 unsigned regno;
1820 unsigned regno2;
1821 rtx insn;
1822 rtx (*gen_mem_ref)(enum machine_mode, rtx)
1823 = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1824
1825
1826 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1827 {
1828 if (aarch64_register_saved_on_entry (regno))
1829 {
1830 rtx mem;
1831 mem = gen_mem_ref (DFmode,
1832 plus_constant (Pmode,
1833 base_rtx,
1834 start_offset));
1835
1836 for (regno2 = regno + 1;
1837 regno2 <= V31_REGNUM
1838 && !aarch64_register_saved_on_entry (regno2);
1839 regno2++)
1840 {
1841 /* Empty loop. */
1842 }
1843 if (regno2 <= V31_REGNUM &&
1844 aarch64_register_saved_on_entry (regno2))
1845 {
1846 rtx mem2;
1847 /* Next highest register to be saved. */
1848 mem2 = gen_mem_ref (DFmode,
1849 plus_constant
1850 (Pmode,
1851 base_rtx,
1852 start_offset + increment));
1853 if (restore == false)
1854 {
1855 insn = emit_insn
1856 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1857 mem2, gen_rtx_REG (DFmode, regno2)));
1858
1859 }
1860 else
1861 {
1862 insn = emit_insn
1863 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1864 gen_rtx_REG (DFmode, regno2), mem2));
1865
1866 add_reg_note (insn, REG_CFA_RESTORE,
1867 gen_rtx_REG (DFmode, regno));
1868 add_reg_note (insn, REG_CFA_RESTORE,
1869 gen_rtx_REG (DFmode, regno2));
1870 }
1871
1872 /* The first part of a frame-related parallel insn
1873 is always assumed to be relevant to the frame
1874 calculations; subsequent parts, are only
1875 frame-related if explicitly marked. */
1876 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1877 regno = regno2;
1878 start_offset += increment * 2;
1879 }
1880 else
1881 {
1882 if (restore == false)
1883 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1884 else
1885 {
1886 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1887 add_reg_note (insn, REG_CFA_RESTORE,
1888 gen_rtx_REG (DImode, regno));
1889 }
1890 start_offset += increment;
1891 }
1892 RTX_FRAME_RELATED_P (insn) = 1;
1893 }
1894 }
1895
1896 }
1897
1898
1899 /* offset from the stack pointer of where the saves and
1900 restore's have to happen. */
1901 static void
1902 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1903 bool restore)
1904 {
1905 rtx insn;
1906 rtx base_rtx = stack_pointer_rtx;
1907 HOST_WIDE_INT start_offset = offset;
1908 HOST_WIDE_INT increment = UNITS_PER_WORD;
1909 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1910 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1911 unsigned regno;
1912 unsigned regno2;
1913
1914 for (regno = R0_REGNUM; regno <= limit; regno++)
1915 {
1916 if (aarch64_register_saved_on_entry (regno))
1917 {
1918 rtx mem;
1919 mem = gen_mem_ref (Pmode,
1920 plus_constant (Pmode,
1921 base_rtx,
1922 start_offset));
1923
1924 for (regno2 = regno + 1;
1925 regno2 <= limit
1926 && !aarch64_register_saved_on_entry (regno2);
1927 regno2++)
1928 {
1929 /* Empty loop. */
1930 }
1931 if (regno2 <= limit &&
1932 aarch64_register_saved_on_entry (regno2))
1933 {
1934 rtx mem2;
1935 /* Next highest register to be saved. */
1936 mem2 = gen_mem_ref (Pmode,
1937 plus_constant
1938 (Pmode,
1939 base_rtx,
1940 start_offset + increment));
1941 if (restore == false)
1942 {
1943 insn = emit_insn
1944 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1945 mem2, gen_rtx_REG (DImode, regno2)));
1946
1947 }
1948 else
1949 {
1950 insn = emit_insn
1951 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1952 gen_rtx_REG (DImode, regno2), mem2));
1953
1954 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1955 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1956 }
1957
1958 /* The first part of a frame-related parallel insn
1959 is always assumed to be relevant to the frame
1960 calculations; subsequent parts, are only
1961 frame-related if explicitly marked. */
1962 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1963 1)) = 1;
1964 regno = regno2;
1965 start_offset += increment * 2;
1966 }
1967 else
1968 {
1969 if (restore == false)
1970 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1971 else
1972 {
1973 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1974 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1975 }
1976 start_offset += increment;
1977 }
1978 RTX_FRAME_RELATED_P (insn) = 1;
1979 }
1980 }
1981
1982 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1983
1984 }
1985
1986 /* AArch64 stack frames generated by this compiler look like:
1987
1988 +-------------------------------+
1989 | |
1990 | incoming stack arguments |
1991 | |
1992 +-------------------------------+ <-- arg_pointer_rtx
1993 | |
1994 | callee-allocated save area |
1995 | for register varargs |
1996 | |
1997 +-------------------------------+
1998 | |
1999 | local variables |
2000 | |
2001 +-------------------------------+ <-- frame_pointer_rtx
2002 | |
2003 | callee-saved registers |
2004 | |
2005 +-------------------------------+
2006 | LR' |
2007 +-------------------------------+
2008 | FP' |
2009 P +-------------------------------+ <-- hard_frame_pointer_rtx
2010 | dynamic allocation |
2011 +-------------------------------+
2012 | |
2013 | outgoing stack arguments |
2014 | |
2015 +-------------------------------+ <-- stack_pointer_rtx
2016
2017 Dynamic stack allocations such as alloca insert data at point P.
2018 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2019 hard_frame_pointer_rtx unchanged. */
2020
2021 /* Generate the prologue instructions for entry into a function.
2022 Establish the stack frame by decreasing the stack pointer with a
2023 properly calculated size and, if necessary, create a frame record
2024 filled with the values of LR and previous frame pointer. The
2025 current FP is also set up if it is in use. */
2026
2027 void
2028 aarch64_expand_prologue (void)
2029 {
2030 /* sub sp, sp, #<frame_size>
2031 stp {fp, lr}, [sp, #<frame_size> - 16]
2032 add fp, sp, #<frame_size> - hardfp_offset
2033 stp {cs_reg}, [fp, #-16] etc.
2034
2035 sub sp, sp, <final_adjustment_if_any>
2036 */
2037 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
2038 HOST_WIDE_INT frame_size, offset;
2039 HOST_WIDE_INT fp_offset; /* FP offset from SP */
2040 rtx insn;
2041
2042 aarch64_layout_frame ();
2043 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2044 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2045 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2046 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2047 + crtl->outgoing_args_size);
2048 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2049 STACK_BOUNDARY / BITS_PER_UNIT);
2050
2051 if (flag_stack_usage_info)
2052 current_function_static_stack_size = frame_size;
2053
2054 fp_offset = (offset
2055 - original_frame_size
2056 - cfun->machine->frame.saved_regs_size);
2057
2058 /* Store pairs and load pairs have a range only -512 to 504. */
2059 if (offset >= 512)
2060 {
2061 /* When the frame has a large size, an initial decrease is done on
2062 the stack pointer to jump over the callee-allocated save area for
2063 register varargs, the local variable area and/or the callee-saved
2064 register area. This will allow the pre-index write-back
2065 store pair instructions to be used for setting up the stack frame
2066 efficiently. */
2067 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2068 if (offset >= 512)
2069 offset = cfun->machine->frame.saved_regs_size;
2070
2071 frame_size -= (offset + crtl->outgoing_args_size);
2072 fp_offset = 0;
2073
2074 if (frame_size >= 0x1000000)
2075 {
2076 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2077 emit_move_insn (op0, GEN_INT (-frame_size));
2078 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2079 aarch64_set_frame_expr (gen_rtx_SET
2080 (Pmode, stack_pointer_rtx,
2081 plus_constant (Pmode,
2082 stack_pointer_rtx,
2083 -frame_size)));
2084 }
2085 else if (frame_size > 0)
2086 {
2087 if ((frame_size & 0xfff) != frame_size)
2088 {
2089 insn = emit_insn (gen_add2_insn
2090 (stack_pointer_rtx,
2091 GEN_INT (-(frame_size
2092 & ~(HOST_WIDE_INT)0xfff))));
2093 RTX_FRAME_RELATED_P (insn) = 1;
2094 }
2095 if ((frame_size & 0xfff) != 0)
2096 {
2097 insn = emit_insn (gen_add2_insn
2098 (stack_pointer_rtx,
2099 GEN_INT (-(frame_size
2100 & (HOST_WIDE_INT)0xfff))));
2101 RTX_FRAME_RELATED_P (insn) = 1;
2102 }
2103 }
2104 }
2105 else
2106 frame_size = -1;
2107
2108 if (offset > 0)
2109 {
2110 /* Save the frame pointer and lr if the frame pointer is needed
2111 first. Make the frame pointer point to the location of the
2112 old frame pointer on the stack. */
2113 if (frame_pointer_needed)
2114 {
2115 rtx mem_fp, mem_lr;
2116
2117 if (fp_offset)
2118 {
2119 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2120 GEN_INT (-offset)));
2121 RTX_FRAME_RELATED_P (insn) = 1;
2122 aarch64_set_frame_expr (gen_rtx_SET
2123 (Pmode, stack_pointer_rtx,
2124 gen_rtx_MINUS (Pmode,
2125 stack_pointer_rtx,
2126 GEN_INT (offset))));
2127 mem_fp = gen_frame_mem (DImode,
2128 plus_constant (Pmode,
2129 stack_pointer_rtx,
2130 fp_offset));
2131 mem_lr = gen_frame_mem (DImode,
2132 plus_constant (Pmode,
2133 stack_pointer_rtx,
2134 fp_offset
2135 + UNITS_PER_WORD));
2136 insn = emit_insn (gen_store_pairdi (mem_fp,
2137 hard_frame_pointer_rtx,
2138 mem_lr,
2139 gen_rtx_REG (DImode,
2140 LR_REGNUM)));
2141 }
2142 else
2143 {
2144 insn = emit_insn (gen_storewb_pairdi_di
2145 (stack_pointer_rtx, stack_pointer_rtx,
2146 hard_frame_pointer_rtx,
2147 gen_rtx_REG (DImode, LR_REGNUM),
2148 GEN_INT (-offset),
2149 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2150 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2151 }
2152
2153 /* The first part of a frame-related parallel insn is always
2154 assumed to be relevant to the frame calculations;
2155 subsequent parts, are only frame-related if explicitly
2156 marked. */
2157 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2158 RTX_FRAME_RELATED_P (insn) = 1;
2159
2160 /* Set up frame pointer to point to the location of the
2161 previous frame pointer on the stack. */
2162 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2163 stack_pointer_rtx,
2164 GEN_INT (fp_offset)));
2165 aarch64_set_frame_expr (gen_rtx_SET
2166 (Pmode, hard_frame_pointer_rtx,
2167 plus_constant (Pmode,
2168 stack_pointer_rtx,
2169 fp_offset)));
2170 RTX_FRAME_RELATED_P (insn) = 1;
2171 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2172 hard_frame_pointer_rtx));
2173 }
2174 else
2175 {
2176 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2177 GEN_INT (-offset)));
2178 RTX_FRAME_RELATED_P (insn) = 1;
2179 }
2180
2181 aarch64_save_or_restore_callee_save_registers
2182 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2183 }
2184
2185 /* when offset >= 512,
2186 sub sp, sp, #<outgoing_args_size> */
2187 if (frame_size > -1)
2188 {
2189 if (crtl->outgoing_args_size > 0)
2190 {
2191 insn = emit_insn (gen_add2_insn
2192 (stack_pointer_rtx,
2193 GEN_INT (- crtl->outgoing_args_size)));
2194 RTX_FRAME_RELATED_P (insn) = 1;
2195 }
2196 }
2197 }
2198
2199 /* Generate the epilogue instructions for returning from a function. */
2200 void
2201 aarch64_expand_epilogue (bool for_sibcall)
2202 {
2203 HOST_WIDE_INT original_frame_size, frame_size, offset;
2204 HOST_WIDE_INT fp_offset;
2205 rtx insn;
2206 rtx cfa_reg;
2207
2208 aarch64_layout_frame ();
2209 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2210 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2211 + crtl->outgoing_args_size);
2212 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2213 STACK_BOUNDARY / BITS_PER_UNIT);
2214
2215 fp_offset = (offset
2216 - original_frame_size
2217 - cfun->machine->frame.saved_regs_size);
2218
2219 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2220
2221 /* Store pairs and load pairs have a range only -512 to 504. */
2222 if (offset >= 512)
2223 {
2224 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2225 if (offset >= 512)
2226 offset = cfun->machine->frame.saved_regs_size;
2227
2228 frame_size -= (offset + crtl->outgoing_args_size);
2229 fp_offset = 0;
2230 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2231 {
2232 insn = emit_insn (gen_add2_insn
2233 (stack_pointer_rtx,
2234 GEN_INT (crtl->outgoing_args_size)));
2235 RTX_FRAME_RELATED_P (insn) = 1;
2236 }
2237 }
2238 else
2239 frame_size = -1;
2240
2241 /* If there were outgoing arguments or we've done dynamic stack
2242 allocation, then restore the stack pointer from the frame
2243 pointer. This is at most one insn and more efficient than using
2244 GCC's internal mechanism. */
2245 if (frame_pointer_needed
2246 && (crtl->outgoing_args_size || cfun->calls_alloca))
2247 {
2248 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2249 hard_frame_pointer_rtx,
2250 GEN_INT (- fp_offset)));
2251 RTX_FRAME_RELATED_P (insn) = 1;
2252 /* As SP is set to (FP - fp_offset), according to the rules in
2253 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2254 from the value of SP from now on. */
2255 cfa_reg = stack_pointer_rtx;
2256 }
2257
2258 aarch64_save_or_restore_callee_save_registers
2259 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2260
2261 /* Restore the frame pointer and lr if the frame pointer is needed. */
2262 if (offset > 0)
2263 {
2264 if (frame_pointer_needed)
2265 {
2266 rtx mem_fp, mem_lr;
2267
2268 if (fp_offset)
2269 {
2270 mem_fp = gen_frame_mem (DImode,
2271 plus_constant (Pmode,
2272 stack_pointer_rtx,
2273 fp_offset));
2274 mem_lr = gen_frame_mem (DImode,
2275 plus_constant (Pmode,
2276 stack_pointer_rtx,
2277 fp_offset
2278 + UNITS_PER_WORD));
2279 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2280 mem_fp,
2281 gen_rtx_REG (DImode,
2282 LR_REGNUM),
2283 mem_lr));
2284 }
2285 else
2286 {
2287 insn = emit_insn (gen_loadwb_pairdi_di
2288 (stack_pointer_rtx,
2289 stack_pointer_rtx,
2290 hard_frame_pointer_rtx,
2291 gen_rtx_REG (DImode, LR_REGNUM),
2292 GEN_INT (offset),
2293 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2294 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2295 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2296 (gen_rtx_SET (Pmode, stack_pointer_rtx,
2297 plus_constant (Pmode, cfa_reg,
2298 offset))));
2299 }
2300
2301 /* The first part of a frame-related parallel insn
2302 is always assumed to be relevant to the frame
2303 calculations; subsequent parts, are only
2304 frame-related if explicitly marked. */
2305 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2306 RTX_FRAME_RELATED_P (insn) = 1;
2307 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2308 add_reg_note (insn, REG_CFA_RESTORE,
2309 gen_rtx_REG (DImode, LR_REGNUM));
2310
2311 if (fp_offset)
2312 {
2313 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2314 GEN_INT (offset)));
2315 RTX_FRAME_RELATED_P (insn) = 1;
2316 }
2317 }
2318 else
2319 {
2320 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2321 GEN_INT (offset)));
2322 RTX_FRAME_RELATED_P (insn) = 1;
2323 }
2324 }
2325
2326 /* Stack adjustment for exception handler. */
2327 if (crtl->calls_eh_return)
2328 {
2329 /* We need to unwind the stack by the offset computed by
2330 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2331 based on SP. Ideally we would update the SP and define the
2332 CFA along the lines of:
2333
2334 SP = SP + EH_RETURN_STACKADJ_RTX
2335 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2336
2337 However the dwarf emitter only understands a constant
2338 register offset.
2339
2340 The solution chosen here is to use the otherwise unused IP0
2341 as a temporary register to hold the current SP value. The
2342 CFA is described using IP0 then SP is modified. */
2343
2344 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2345
2346 insn = emit_move_insn (ip0, stack_pointer_rtx);
2347 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2348 RTX_FRAME_RELATED_P (insn) = 1;
2349
2350 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2351
2352 /* Ensure the assignment to IP0 does not get optimized away. */
2353 emit_use (ip0);
2354 }
2355
2356 if (frame_size > -1)
2357 {
2358 if (frame_size >= 0x1000000)
2359 {
2360 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2361 emit_move_insn (op0, GEN_INT (frame_size));
2362 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2363 aarch64_set_frame_expr (gen_rtx_SET
2364 (Pmode, stack_pointer_rtx,
2365 plus_constant (Pmode,
2366 stack_pointer_rtx,
2367 frame_size)));
2368 }
2369 else if (frame_size > 0)
2370 {
2371 if ((frame_size & 0xfff) != 0)
2372 {
2373 insn = emit_insn (gen_add2_insn
2374 (stack_pointer_rtx,
2375 GEN_INT ((frame_size
2376 & (HOST_WIDE_INT) 0xfff))));
2377 RTX_FRAME_RELATED_P (insn) = 1;
2378 }
2379 if ((frame_size & 0xfff) != frame_size)
2380 {
2381 insn = emit_insn (gen_add2_insn
2382 (stack_pointer_rtx,
2383 GEN_INT ((frame_size
2384 & ~ (HOST_WIDE_INT) 0xfff))));
2385 RTX_FRAME_RELATED_P (insn) = 1;
2386 }
2387 }
2388
2389 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2390 plus_constant (Pmode,
2391 stack_pointer_rtx,
2392 offset)));
2393 }
2394
2395 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2396 if (!for_sibcall)
2397 emit_jump_insn (ret_rtx);
2398 }
2399
2400 /* Return the place to copy the exception unwinding return address to.
2401 This will probably be a stack slot, but could (in theory be the
2402 return register). */
2403 rtx
2404 aarch64_final_eh_return_addr (void)
2405 {
2406 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2407 aarch64_layout_frame ();
2408 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2409 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2410 + crtl->outgoing_args_size);
2411 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2412 STACK_BOUNDARY / BITS_PER_UNIT);
2413 fp_offset = offset
2414 - original_frame_size
2415 - cfun->machine->frame.saved_regs_size;
2416
2417 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2418 return gen_rtx_REG (DImode, LR_REGNUM);
2419
2420 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2421 result in a store to save LR introduced by builtin_eh_return () being
2422 incorrectly deleted because the alias is not detected.
2423 So in the calculation of the address to copy the exception unwinding
2424 return address to, we note 2 cases.
2425 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2426 we return a SP-relative location since all the addresses are SP-relative
2427 in this case. This prevents the store from being optimized away.
2428 If the fp_offset is not 0, then the addresses will be FP-relative and
2429 therefore we return a FP-relative location. */
2430
2431 if (frame_pointer_needed)
2432 {
2433 if (fp_offset)
2434 return gen_frame_mem (DImode,
2435 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2436 else
2437 return gen_frame_mem (DImode,
2438 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2439 }
2440
2441 /* If FP is not needed, we calculate the location of LR, which would be
2442 at the top of the saved registers block. */
2443
2444 return gen_frame_mem (DImode,
2445 plus_constant (Pmode,
2446 stack_pointer_rtx,
2447 fp_offset
2448 + cfun->machine->frame.saved_regs_size
2449 - 2 * UNITS_PER_WORD));
2450 }
2451
2452 /* Output code to build up a constant in a register. */
2453 static void
2454 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2455 {
2456 if (aarch64_bitmask_imm (val, DImode))
2457 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2458 else
2459 {
2460 int i;
2461 int ncount = 0;
2462 int zcount = 0;
2463 HOST_WIDE_INT valp = val >> 16;
2464 HOST_WIDE_INT valm;
2465 HOST_WIDE_INT tval;
2466
2467 for (i = 16; i < 64; i += 16)
2468 {
2469 valm = (valp & 0xffff);
2470
2471 if (valm != 0)
2472 ++ zcount;
2473
2474 if (valm != 0xffff)
2475 ++ ncount;
2476
2477 valp >>= 16;
2478 }
2479
2480 /* zcount contains the number of additional MOVK instructions
2481 required if the constant is built up with an initial MOVZ instruction,
2482 while ncount is the number of MOVK instructions required if starting
2483 with a MOVN instruction. Choose the sequence that yields the fewest
2484 number of instructions, preferring MOVZ instructions when they are both
2485 the same. */
2486 if (ncount < zcount)
2487 {
2488 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2489 GEN_INT ((~val) & 0xffff));
2490 tval = 0xffff;
2491 }
2492 else
2493 {
2494 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2495 GEN_INT (val & 0xffff));
2496 tval = 0;
2497 }
2498
2499 val >>= 16;
2500
2501 for (i = 16; i < 64; i += 16)
2502 {
2503 if ((val & 0xffff) != tval)
2504 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2505 GEN_INT (i), GEN_INT (val & 0xffff)));
2506 val >>= 16;
2507 }
2508 }
2509 }
2510
2511 static void
2512 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2513 {
2514 HOST_WIDE_INT mdelta = delta;
2515 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2516 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2517
2518 if (mdelta < 0)
2519 mdelta = -mdelta;
2520
2521 if (mdelta >= 4096 * 4096)
2522 {
2523 aarch64_build_constant (scratchreg, delta);
2524 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2525 }
2526 else if (mdelta > 0)
2527 {
2528 if (mdelta >= 4096)
2529 {
2530 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2531 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2532 if (delta < 0)
2533 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2534 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2535 else
2536 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2537 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2538 }
2539 if (mdelta % 4096 != 0)
2540 {
2541 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2542 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2543 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2544 }
2545 }
2546 }
2547
2548 /* Output code to add DELTA to the first argument, and then jump
2549 to FUNCTION. Used for C++ multiple inheritance. */
2550 static void
2551 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2552 HOST_WIDE_INT delta,
2553 HOST_WIDE_INT vcall_offset,
2554 tree function)
2555 {
2556 /* The this pointer is always in x0. Note that this differs from
2557 Arm where the this pointer maybe bumped to r1 if r0 is required
2558 to return a pointer to an aggregate. On AArch64 a result value
2559 pointer will be in x8. */
2560 int this_regno = R0_REGNUM;
2561 rtx this_rtx, temp0, temp1, addr, insn, funexp;
2562
2563 reload_completed = 1;
2564 emit_note (NOTE_INSN_PROLOGUE_END);
2565
2566 if (vcall_offset == 0)
2567 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2568 else
2569 {
2570 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2571
2572 this_rtx = gen_rtx_REG (Pmode, this_regno);
2573 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2574 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2575
2576 addr = this_rtx;
2577 if (delta != 0)
2578 {
2579 if (delta >= -256 && delta < 256)
2580 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2581 plus_constant (Pmode, this_rtx, delta));
2582 else
2583 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2584 }
2585
2586 if (Pmode == ptr_mode)
2587 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2588 else
2589 aarch64_emit_move (temp0,
2590 gen_rtx_ZERO_EXTEND (Pmode,
2591 gen_rtx_MEM (ptr_mode, addr)));
2592
2593 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2594 addr = plus_constant (Pmode, temp0, vcall_offset);
2595 else
2596 {
2597 aarch64_build_constant (IP1_REGNUM, vcall_offset);
2598 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2599 }
2600
2601 if (Pmode == ptr_mode)
2602 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2603 else
2604 aarch64_emit_move (temp1,
2605 gen_rtx_SIGN_EXTEND (Pmode,
2606 gen_rtx_MEM (ptr_mode, addr)));
2607
2608 emit_insn (gen_add2_insn (this_rtx, temp1));
2609 }
2610
2611 /* Generate a tail call to the target function. */
2612 if (!TREE_USED (function))
2613 {
2614 assemble_external (function);
2615 TREE_USED (function) = 1;
2616 }
2617 funexp = XEXP (DECL_RTL (function), 0);
2618 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2619 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2620 SIBLING_CALL_P (insn) = 1;
2621
2622 insn = get_insns ();
2623 shorten_branches (insn);
2624 final_start_function (insn, file, 1);
2625 final (insn, file, 1);
2626 final_end_function ();
2627
2628 /* Stop pretending to be a post-reload pass. */
2629 reload_completed = 0;
2630 }
2631
2632 static int
2633 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2634 {
2635 if (GET_CODE (*x) == SYMBOL_REF)
2636 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2637
2638 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2639 TLS offsets, not real symbol references. */
2640 if (GET_CODE (*x) == UNSPEC
2641 && XINT (*x, 1) == UNSPEC_TLS)
2642 return -1;
2643
2644 return 0;
2645 }
2646
2647 static bool
2648 aarch64_tls_referenced_p (rtx x)
2649 {
2650 if (!TARGET_HAVE_TLS)
2651 return false;
2652
2653 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2654 }
2655
2656
2657 static int
2658 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2659 {
2660 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2661 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2662
2663 if (*imm1 < *imm2)
2664 return -1;
2665 if (*imm1 > *imm2)
2666 return +1;
2667 return 0;
2668 }
2669
2670
2671 static void
2672 aarch64_build_bitmask_table (void)
2673 {
2674 unsigned HOST_WIDE_INT mask, imm;
2675 unsigned int log_e, e, s, r;
2676 unsigned int nimms = 0;
2677
2678 for (log_e = 1; log_e <= 6; log_e++)
2679 {
2680 e = 1 << log_e;
2681 if (e == 64)
2682 mask = ~(HOST_WIDE_INT) 0;
2683 else
2684 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2685 for (s = 1; s < e; s++)
2686 {
2687 for (r = 0; r < e; r++)
2688 {
2689 /* set s consecutive bits to 1 (s < 64) */
2690 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2691 /* rotate right by r */
2692 if (r != 0)
2693 imm = ((imm >> r) | (imm << (e - r))) & mask;
2694 /* replicate the constant depending on SIMD size */
2695 switch (log_e) {
2696 case 1: imm |= (imm << 2);
2697 case 2: imm |= (imm << 4);
2698 case 3: imm |= (imm << 8);
2699 case 4: imm |= (imm << 16);
2700 case 5: imm |= (imm << 32);
2701 case 6:
2702 break;
2703 default:
2704 gcc_unreachable ();
2705 }
2706 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2707 aarch64_bitmasks[nimms++] = imm;
2708 }
2709 }
2710 }
2711
2712 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2713 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2714 aarch64_bitmasks_cmp);
2715 }
2716
2717
2718 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2719 a left shift of 0 or 12 bits. */
2720 bool
2721 aarch64_uimm12_shift (HOST_WIDE_INT val)
2722 {
2723 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2724 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2725 );
2726 }
2727
2728
2729 /* Return true if val is an immediate that can be loaded into a
2730 register by a MOVZ instruction. */
2731 static bool
2732 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2733 {
2734 if (GET_MODE_SIZE (mode) > 4)
2735 {
2736 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2737 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2738 return 1;
2739 }
2740 else
2741 {
2742 /* Ignore sign extension. */
2743 val &= (HOST_WIDE_INT) 0xffffffff;
2744 }
2745 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2746 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2747 }
2748
2749
2750 /* Return true if val is a valid bitmask immediate. */
2751 bool
2752 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2753 {
2754 if (GET_MODE_SIZE (mode) < 8)
2755 {
2756 /* Replicate bit pattern. */
2757 val &= (HOST_WIDE_INT) 0xffffffff;
2758 val |= val << 32;
2759 }
2760 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2761 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2762 }
2763
2764
2765 /* Return true if val is an immediate that can be loaded into a
2766 register in a single instruction. */
2767 bool
2768 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2769 {
2770 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2771 return 1;
2772 return aarch64_bitmask_imm (val, mode);
2773 }
2774
2775 static bool
2776 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2777 {
2778 rtx base, offset;
2779
2780 if (GET_CODE (x) == HIGH)
2781 return true;
2782
2783 split_const (x, &base, &offset);
2784 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2785 {
2786 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2787 != SYMBOL_FORCE_TO_MEM)
2788 return true;
2789 else
2790 /* Avoid generating a 64-bit relocation in ILP32; leave
2791 to aarch64_expand_mov_immediate to handle it properly. */
2792 return mode != ptr_mode;
2793 }
2794
2795 return aarch64_tls_referenced_p (x);
2796 }
2797
2798 /* Return true if register REGNO is a valid index register.
2799 STRICT_P is true if REG_OK_STRICT is in effect. */
2800
2801 bool
2802 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2803 {
2804 if (!HARD_REGISTER_NUM_P (regno))
2805 {
2806 if (!strict_p)
2807 return true;
2808
2809 if (!reg_renumber)
2810 return false;
2811
2812 regno = reg_renumber[regno];
2813 }
2814 return GP_REGNUM_P (regno);
2815 }
2816
2817 /* Return true if register REGNO is a valid base register for mode MODE.
2818 STRICT_P is true if REG_OK_STRICT is in effect. */
2819
2820 bool
2821 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2822 {
2823 if (!HARD_REGISTER_NUM_P (regno))
2824 {
2825 if (!strict_p)
2826 return true;
2827
2828 if (!reg_renumber)
2829 return false;
2830
2831 regno = reg_renumber[regno];
2832 }
2833
2834 /* The fake registers will be eliminated to either the stack or
2835 hard frame pointer, both of which are usually valid base registers.
2836 Reload deals with the cases where the eliminated form isn't valid. */
2837 return (GP_REGNUM_P (regno)
2838 || regno == SP_REGNUM
2839 || regno == FRAME_POINTER_REGNUM
2840 || regno == ARG_POINTER_REGNUM);
2841 }
2842
2843 /* Return true if X is a valid base register for mode MODE.
2844 STRICT_P is true if REG_OK_STRICT is in effect. */
2845
2846 static bool
2847 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2848 {
2849 if (!strict_p && GET_CODE (x) == SUBREG)
2850 x = SUBREG_REG (x);
2851
2852 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2853 }
2854
2855 /* Return true if address offset is a valid index. If it is, fill in INFO
2856 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2857
2858 static bool
2859 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2860 enum machine_mode mode, bool strict_p)
2861 {
2862 enum aarch64_address_type type;
2863 rtx index;
2864 int shift;
2865
2866 /* (reg:P) */
2867 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2868 && GET_MODE (x) == Pmode)
2869 {
2870 type = ADDRESS_REG_REG;
2871 index = x;
2872 shift = 0;
2873 }
2874 /* (sign_extend:DI (reg:SI)) */
2875 else if ((GET_CODE (x) == SIGN_EXTEND
2876 || GET_CODE (x) == ZERO_EXTEND)
2877 && GET_MODE (x) == DImode
2878 && GET_MODE (XEXP (x, 0)) == SImode)
2879 {
2880 type = (GET_CODE (x) == SIGN_EXTEND)
2881 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2882 index = XEXP (x, 0);
2883 shift = 0;
2884 }
2885 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2886 else if (GET_CODE (x) == MULT
2887 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2888 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2889 && GET_MODE (XEXP (x, 0)) == DImode
2890 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2891 && CONST_INT_P (XEXP (x, 1)))
2892 {
2893 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2894 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2895 index = XEXP (XEXP (x, 0), 0);
2896 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2897 }
2898 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2899 else if (GET_CODE (x) == ASHIFT
2900 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2901 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2902 && GET_MODE (XEXP (x, 0)) == DImode
2903 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2904 && CONST_INT_P (XEXP (x, 1)))
2905 {
2906 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2907 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2908 index = XEXP (XEXP (x, 0), 0);
2909 shift = INTVAL (XEXP (x, 1));
2910 }
2911 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2912 else if ((GET_CODE (x) == SIGN_EXTRACT
2913 || GET_CODE (x) == ZERO_EXTRACT)
2914 && GET_MODE (x) == DImode
2915 && GET_CODE (XEXP (x, 0)) == MULT
2916 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2917 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2918 {
2919 type = (GET_CODE (x) == SIGN_EXTRACT)
2920 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2921 index = XEXP (XEXP (x, 0), 0);
2922 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2923 if (INTVAL (XEXP (x, 1)) != 32 + shift
2924 || INTVAL (XEXP (x, 2)) != 0)
2925 shift = -1;
2926 }
2927 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2928 (const_int 0xffffffff<<shift)) */
2929 else if (GET_CODE (x) == AND
2930 && GET_MODE (x) == DImode
2931 && GET_CODE (XEXP (x, 0)) == MULT
2932 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2933 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2934 && CONST_INT_P (XEXP (x, 1)))
2935 {
2936 type = ADDRESS_REG_UXTW;
2937 index = XEXP (XEXP (x, 0), 0);
2938 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2939 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2940 shift = -1;
2941 }
2942 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2943 else if ((GET_CODE (x) == SIGN_EXTRACT
2944 || GET_CODE (x) == ZERO_EXTRACT)
2945 && GET_MODE (x) == DImode
2946 && GET_CODE (XEXP (x, 0)) == ASHIFT
2947 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2948 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2949 {
2950 type = (GET_CODE (x) == SIGN_EXTRACT)
2951 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2952 index = XEXP (XEXP (x, 0), 0);
2953 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2954 if (INTVAL (XEXP (x, 1)) != 32 + shift
2955 || INTVAL (XEXP (x, 2)) != 0)
2956 shift = -1;
2957 }
2958 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2959 (const_int 0xffffffff<<shift)) */
2960 else if (GET_CODE (x) == AND
2961 && GET_MODE (x) == DImode
2962 && GET_CODE (XEXP (x, 0)) == ASHIFT
2963 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2964 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2965 && CONST_INT_P (XEXP (x, 1)))
2966 {
2967 type = ADDRESS_REG_UXTW;
2968 index = XEXP (XEXP (x, 0), 0);
2969 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2970 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2971 shift = -1;
2972 }
2973 /* (mult:P (reg:P) (const_int scale)) */
2974 else if (GET_CODE (x) == MULT
2975 && GET_MODE (x) == Pmode
2976 && GET_MODE (XEXP (x, 0)) == Pmode
2977 && CONST_INT_P (XEXP (x, 1)))
2978 {
2979 type = ADDRESS_REG_REG;
2980 index = XEXP (x, 0);
2981 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2982 }
2983 /* (ashift:P (reg:P) (const_int shift)) */
2984 else if (GET_CODE (x) == ASHIFT
2985 && GET_MODE (x) == Pmode
2986 && GET_MODE (XEXP (x, 0)) == Pmode
2987 && CONST_INT_P (XEXP (x, 1)))
2988 {
2989 type = ADDRESS_REG_REG;
2990 index = XEXP (x, 0);
2991 shift = INTVAL (XEXP (x, 1));
2992 }
2993 else
2994 return false;
2995
2996 if (GET_CODE (index) == SUBREG)
2997 index = SUBREG_REG (index);
2998
2999 if ((shift == 0 ||
3000 (shift > 0 && shift <= 3
3001 && (1 << shift) == GET_MODE_SIZE (mode)))
3002 && REG_P (index)
3003 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3004 {
3005 info->type = type;
3006 info->offset = index;
3007 info->shift = shift;
3008 return true;
3009 }
3010
3011 return false;
3012 }
3013
3014 static inline bool
3015 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3016 {
3017 return (offset >= -64 * GET_MODE_SIZE (mode)
3018 && offset < 64 * GET_MODE_SIZE (mode)
3019 && offset % GET_MODE_SIZE (mode) == 0);
3020 }
3021
3022 static inline bool
3023 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3024 HOST_WIDE_INT offset)
3025 {
3026 return offset >= -256 && offset < 256;
3027 }
3028
3029 static inline bool
3030 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3031 {
3032 return (offset >= 0
3033 && offset < 4096 * GET_MODE_SIZE (mode)
3034 && offset % GET_MODE_SIZE (mode) == 0);
3035 }
3036
3037 /* Return true if X is a valid address for machine mode MODE. If it is,
3038 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3039 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3040
3041 static bool
3042 aarch64_classify_address (struct aarch64_address_info *info,
3043 rtx x, enum machine_mode mode,
3044 RTX_CODE outer_code, bool strict_p)
3045 {
3046 enum rtx_code code = GET_CODE (x);
3047 rtx op0, op1;
3048 bool allow_reg_index_p =
3049 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3050
3051 /* Don't support anything other than POST_INC or REG addressing for
3052 AdvSIMD. */
3053 if (aarch64_vector_mode_p (mode)
3054 && (code != POST_INC && code != REG))
3055 return false;
3056
3057 switch (code)
3058 {
3059 case REG:
3060 case SUBREG:
3061 info->type = ADDRESS_REG_IMM;
3062 info->base = x;
3063 info->offset = const0_rtx;
3064 return aarch64_base_register_rtx_p (x, strict_p);
3065
3066 case PLUS:
3067 op0 = XEXP (x, 0);
3068 op1 = XEXP (x, 1);
3069 if (GET_MODE_SIZE (mode) != 0
3070 && CONST_INT_P (op1)
3071 && aarch64_base_register_rtx_p (op0, strict_p))
3072 {
3073 HOST_WIDE_INT offset = INTVAL (op1);
3074
3075 info->type = ADDRESS_REG_IMM;
3076 info->base = op0;
3077 info->offset = op1;
3078
3079 /* TImode and TFmode values are allowed in both pairs of X
3080 registers and individual Q registers. The available
3081 address modes are:
3082 X,X: 7-bit signed scaled offset
3083 Q: 9-bit signed offset
3084 We conservatively require an offset representable in either mode.
3085 */
3086 if (mode == TImode || mode == TFmode)
3087 return (offset_7bit_signed_scaled_p (mode, offset)
3088 && offset_9bit_signed_unscaled_p (mode, offset));
3089
3090 if (outer_code == PARALLEL)
3091 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3092 && offset_7bit_signed_scaled_p (mode, offset));
3093 else
3094 return (offset_9bit_signed_unscaled_p (mode, offset)
3095 || offset_12bit_unsigned_scaled_p (mode, offset));
3096 }
3097
3098 if (allow_reg_index_p)
3099 {
3100 /* Look for base + (scaled/extended) index register. */
3101 if (aarch64_base_register_rtx_p (op0, strict_p)
3102 && aarch64_classify_index (info, op1, mode, strict_p))
3103 {
3104 info->base = op0;
3105 return true;
3106 }
3107 if (aarch64_base_register_rtx_p (op1, strict_p)
3108 && aarch64_classify_index (info, op0, mode, strict_p))
3109 {
3110 info->base = op1;
3111 return true;
3112 }
3113 }
3114
3115 return false;
3116
3117 case POST_INC:
3118 case POST_DEC:
3119 case PRE_INC:
3120 case PRE_DEC:
3121 info->type = ADDRESS_REG_WB;
3122 info->base = XEXP (x, 0);
3123 info->offset = NULL_RTX;
3124 return aarch64_base_register_rtx_p (info->base, strict_p);
3125
3126 case POST_MODIFY:
3127 case PRE_MODIFY:
3128 info->type = ADDRESS_REG_WB;
3129 info->base = XEXP (x, 0);
3130 if (GET_CODE (XEXP (x, 1)) == PLUS
3131 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3132 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3133 && aarch64_base_register_rtx_p (info->base, strict_p))
3134 {
3135 HOST_WIDE_INT offset;
3136 info->offset = XEXP (XEXP (x, 1), 1);
3137 offset = INTVAL (info->offset);
3138
3139 /* TImode and TFmode values are allowed in both pairs of X
3140 registers and individual Q registers. The available
3141 address modes are:
3142 X,X: 7-bit signed scaled offset
3143 Q: 9-bit signed offset
3144 We conservatively require an offset representable in either mode.
3145 */
3146 if (mode == TImode || mode == TFmode)
3147 return (offset_7bit_signed_scaled_p (mode, offset)
3148 && offset_9bit_signed_unscaled_p (mode, offset));
3149
3150 if (outer_code == PARALLEL)
3151 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3152 && offset_7bit_signed_scaled_p (mode, offset));
3153 else
3154 return offset_9bit_signed_unscaled_p (mode, offset);
3155 }
3156 return false;
3157
3158 case CONST:
3159 case SYMBOL_REF:
3160 case LABEL_REF:
3161 /* load literal: pc-relative constant pool entry. Only supported
3162 for SI mode or larger. */
3163 info->type = ADDRESS_SYMBOLIC;
3164 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3165 {
3166 rtx sym, addend;
3167
3168 split_const (x, &sym, &addend);
3169 return (GET_CODE (sym) == LABEL_REF
3170 || (GET_CODE (sym) == SYMBOL_REF
3171 && CONSTANT_POOL_ADDRESS_P (sym)));
3172 }
3173 return false;
3174
3175 case LO_SUM:
3176 info->type = ADDRESS_LO_SUM;
3177 info->base = XEXP (x, 0);
3178 info->offset = XEXP (x, 1);
3179 if (allow_reg_index_p
3180 && aarch64_base_register_rtx_p (info->base, strict_p))
3181 {
3182 rtx sym, offs;
3183 split_const (info->offset, &sym, &offs);
3184 if (GET_CODE (sym) == SYMBOL_REF
3185 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3186 == SYMBOL_SMALL_ABSOLUTE))
3187 {
3188 /* The symbol and offset must be aligned to the access size. */
3189 unsigned int align;
3190 unsigned int ref_size;
3191
3192 if (CONSTANT_POOL_ADDRESS_P (sym))
3193 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3194 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3195 {
3196 tree exp = SYMBOL_REF_DECL (sym);
3197 align = TYPE_ALIGN (TREE_TYPE (exp));
3198 align = CONSTANT_ALIGNMENT (exp, align);
3199 }
3200 else if (SYMBOL_REF_DECL (sym))
3201 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3202 else
3203 align = BITS_PER_UNIT;
3204
3205 ref_size = GET_MODE_SIZE (mode);
3206 if (ref_size == 0)
3207 ref_size = GET_MODE_SIZE (DImode);
3208
3209 return ((INTVAL (offs) & (ref_size - 1)) == 0
3210 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3211 }
3212 }
3213 return false;
3214
3215 default:
3216 return false;
3217 }
3218 }
3219
3220 bool
3221 aarch64_symbolic_address_p (rtx x)
3222 {
3223 rtx offset;
3224
3225 split_const (x, &x, &offset);
3226 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3227 }
3228
3229 /* Classify the base of symbolic expression X, given that X appears in
3230 context CONTEXT. */
3231
3232 enum aarch64_symbol_type
3233 aarch64_classify_symbolic_expression (rtx x,
3234 enum aarch64_symbol_context context)
3235 {
3236 rtx offset;
3237
3238 split_const (x, &x, &offset);
3239 return aarch64_classify_symbol (x, context);
3240 }
3241
3242
3243 /* Return TRUE if X is a legitimate address for accessing memory in
3244 mode MODE. */
3245 static bool
3246 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3247 {
3248 struct aarch64_address_info addr;
3249
3250 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3251 }
3252
3253 /* Return TRUE if X is a legitimate address for accessing memory in
3254 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3255 pair operation. */
3256 bool
3257 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3258 RTX_CODE outer_code, bool strict_p)
3259 {
3260 struct aarch64_address_info addr;
3261
3262 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3263 }
3264
3265 /* Return TRUE if rtx X is immediate constant 0.0 */
3266 bool
3267 aarch64_float_const_zero_rtx_p (rtx x)
3268 {
3269 REAL_VALUE_TYPE r;
3270
3271 if (GET_MODE (x) == VOIDmode)
3272 return false;
3273
3274 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3275 if (REAL_VALUE_MINUS_ZERO (r))
3276 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3277 return REAL_VALUES_EQUAL (r, dconst0);
3278 }
3279
3280 /* Return the fixed registers used for condition codes. */
3281
3282 static bool
3283 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3284 {
3285 *p1 = CC_REGNUM;
3286 *p2 = INVALID_REGNUM;
3287 return true;
3288 }
3289
3290 enum machine_mode
3291 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3292 {
3293 /* All floating point compares return CCFP if it is an equality
3294 comparison, and CCFPE otherwise. */
3295 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3296 {
3297 switch (code)
3298 {
3299 case EQ:
3300 case NE:
3301 case UNORDERED:
3302 case ORDERED:
3303 case UNLT:
3304 case UNLE:
3305 case UNGT:
3306 case UNGE:
3307 case UNEQ:
3308 case LTGT:
3309 return CCFPmode;
3310
3311 case LT:
3312 case LE:
3313 case GT:
3314 case GE:
3315 return CCFPEmode;
3316
3317 default:
3318 gcc_unreachable ();
3319 }
3320 }
3321
3322 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3323 && y == const0_rtx
3324 && (code == EQ || code == NE || code == LT || code == GE)
3325 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3326 || GET_CODE (x) == NEG))
3327 return CC_NZmode;
3328
3329 /* A compare with a shifted or negated operand. Because of canonicalization,
3330 the comparison will have to be swapped when we emit the assembly
3331 code. */
3332 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3333 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3334 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3335 || GET_CODE (x) == LSHIFTRT
3336 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND
3337 || GET_CODE (x) == NEG))
3338 return CC_SWPmode;
3339
3340 /* A compare of a mode narrower than SI mode against zero can be done
3341 by extending the value in the comparison. */
3342 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3343 && y == const0_rtx)
3344 /* Only use sign-extension if we really need it. */
3345 return ((code == GT || code == GE || code == LE || code == LT)
3346 ? CC_SESWPmode : CC_ZESWPmode);
3347
3348 /* For everything else, return CCmode. */
3349 return CCmode;
3350 }
3351
3352 static unsigned
3353 aarch64_get_condition_code (rtx x)
3354 {
3355 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3356 enum rtx_code comp_code = GET_CODE (x);
3357
3358 if (GET_MODE_CLASS (mode) != MODE_CC)
3359 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3360
3361 switch (mode)
3362 {
3363 case CCFPmode:
3364 case CCFPEmode:
3365 switch (comp_code)
3366 {
3367 case GE: return AARCH64_GE;
3368 case GT: return AARCH64_GT;
3369 case LE: return AARCH64_LS;
3370 case LT: return AARCH64_MI;
3371 case NE: return AARCH64_NE;
3372 case EQ: return AARCH64_EQ;
3373 case ORDERED: return AARCH64_VC;
3374 case UNORDERED: return AARCH64_VS;
3375 case UNLT: return AARCH64_LT;
3376 case UNLE: return AARCH64_LE;
3377 case UNGT: return AARCH64_HI;
3378 case UNGE: return AARCH64_PL;
3379 default: gcc_unreachable ();
3380 }
3381 break;
3382
3383 case CCmode:
3384 switch (comp_code)
3385 {
3386 case NE: return AARCH64_NE;
3387 case EQ: return AARCH64_EQ;
3388 case GE: return AARCH64_GE;
3389 case GT: return AARCH64_GT;
3390 case LE: return AARCH64_LE;
3391 case LT: return AARCH64_LT;
3392 case GEU: return AARCH64_CS;
3393 case GTU: return AARCH64_HI;
3394 case LEU: return AARCH64_LS;
3395 case LTU: return AARCH64_CC;
3396 default: gcc_unreachable ();
3397 }
3398 break;
3399
3400 case CC_SWPmode:
3401 case CC_ZESWPmode:
3402 case CC_SESWPmode:
3403 switch (comp_code)
3404 {
3405 case NE: return AARCH64_NE;
3406 case EQ: return AARCH64_EQ;
3407 case GE: return AARCH64_LE;
3408 case GT: return AARCH64_LT;
3409 case LE: return AARCH64_GE;
3410 case LT: return AARCH64_GT;
3411 case GEU: return AARCH64_LS;
3412 case GTU: return AARCH64_CC;
3413 case LEU: return AARCH64_CS;
3414 case LTU: return AARCH64_HI;
3415 default: gcc_unreachable ();
3416 }
3417 break;
3418
3419 case CC_NZmode:
3420 switch (comp_code)
3421 {
3422 case NE: return AARCH64_NE;
3423 case EQ: return AARCH64_EQ;
3424 case GE: return AARCH64_PL;
3425 case LT: return AARCH64_MI;
3426 default: gcc_unreachable ();
3427 }
3428 break;
3429
3430 default:
3431 gcc_unreachable ();
3432 break;
3433 }
3434 }
3435
3436 static unsigned
3437 bit_count (unsigned HOST_WIDE_INT value)
3438 {
3439 unsigned count = 0;
3440
3441 while (value)
3442 {
3443 count++;
3444 value &= value - 1;
3445 }
3446
3447 return count;
3448 }
3449
3450 void
3451 aarch64_print_operand (FILE *f, rtx x, char code)
3452 {
3453 switch (code)
3454 {
3455 /* An integer or symbol address without a preceding # sign. */
3456 case 'c':
3457 switch (GET_CODE (x))
3458 {
3459 case CONST_INT:
3460 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3461 break;
3462
3463 case SYMBOL_REF:
3464 output_addr_const (f, x);
3465 break;
3466
3467 case CONST:
3468 if (GET_CODE (XEXP (x, 0)) == PLUS
3469 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3470 {
3471 output_addr_const (f, x);
3472 break;
3473 }
3474 /* Fall through. */
3475
3476 default:
3477 output_operand_lossage ("Unsupported operand for code '%c'", code);
3478 }
3479 break;
3480
3481 case 'e':
3482 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3483 {
3484 int n;
3485
3486 if (GET_CODE (x) != CONST_INT
3487 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3488 {
3489 output_operand_lossage ("invalid operand for '%%%c'", code);
3490 return;
3491 }
3492
3493 switch (n)
3494 {
3495 case 3:
3496 fputc ('b', f);
3497 break;
3498 case 4:
3499 fputc ('h', f);
3500 break;
3501 case 5:
3502 fputc ('w', f);
3503 break;
3504 default:
3505 output_operand_lossage ("invalid operand for '%%%c'", code);
3506 return;
3507 }
3508 }
3509 break;
3510
3511 case 'p':
3512 {
3513 int n;
3514
3515 /* Print N such that 2^N == X. */
3516 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3517 {
3518 output_operand_lossage ("invalid operand for '%%%c'", code);
3519 return;
3520 }
3521
3522 asm_fprintf (f, "%d", n);
3523 }
3524 break;
3525
3526 case 'P':
3527 /* Print the number of non-zero bits in X (a const_int). */
3528 if (GET_CODE (x) != CONST_INT)
3529 {
3530 output_operand_lossage ("invalid operand for '%%%c'", code);
3531 return;
3532 }
3533
3534 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3535 break;
3536
3537 case 'H':
3538 /* Print the higher numbered register of a pair (TImode) of regs. */
3539 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3540 {
3541 output_operand_lossage ("invalid operand for '%%%c'", code);
3542 return;
3543 }
3544
3545 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3546 break;
3547
3548 case 'm':
3549 /* Print a condition (eq, ne, etc). */
3550
3551 /* CONST_TRUE_RTX means always -- that's the default. */
3552 if (x == const_true_rtx)
3553 return;
3554
3555 if (!COMPARISON_P (x))
3556 {
3557 output_operand_lossage ("invalid operand for '%%%c'", code);
3558 return;
3559 }
3560
3561 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3562 break;
3563
3564 case 'M':
3565 /* Print the inverse of a condition (eq <-> ne, etc). */
3566
3567 /* CONST_TRUE_RTX means never -- that's the default. */
3568 if (x == const_true_rtx)
3569 {
3570 fputs ("nv", f);
3571 return;
3572 }
3573
3574 if (!COMPARISON_P (x))
3575 {
3576 output_operand_lossage ("invalid operand for '%%%c'", code);
3577 return;
3578 }
3579
3580 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3581 (aarch64_get_condition_code (x))], f);
3582 break;
3583
3584 case 'b':
3585 case 'h':
3586 case 's':
3587 case 'd':
3588 case 'q':
3589 /* Print a scalar FP/SIMD register name. */
3590 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3591 {
3592 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3593 return;
3594 }
3595 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3596 break;
3597
3598 case 'S':
3599 case 'T':
3600 case 'U':
3601 case 'V':
3602 /* Print the first FP/SIMD register name in a list. */
3603 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3604 {
3605 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3606 return;
3607 }
3608 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3609 break;
3610
3611 case 'X':
3612 /* Print bottom 16 bits of integer constant in hex. */
3613 if (GET_CODE (x) != CONST_INT)
3614 {
3615 output_operand_lossage ("invalid operand for '%%%c'", code);
3616 return;
3617 }
3618 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3619 break;
3620
3621 case 'w':
3622 case 'x':
3623 /* Print a general register name or the zero register (32-bit or
3624 64-bit). */
3625 if (x == const0_rtx
3626 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3627 {
3628 asm_fprintf (f, "%czr", code);
3629 break;
3630 }
3631
3632 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3633 {
3634 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3635 break;
3636 }
3637
3638 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3639 {
3640 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3641 break;
3642 }
3643
3644 /* Fall through */
3645
3646 case 0:
3647 /* Print a normal operand, if it's a general register, then we
3648 assume DImode. */
3649 if (x == NULL)
3650 {
3651 output_operand_lossage ("missing operand");
3652 return;
3653 }
3654
3655 switch (GET_CODE (x))
3656 {
3657 case REG:
3658 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3659 break;
3660
3661 case MEM:
3662 aarch64_memory_reference_mode = GET_MODE (x);
3663 output_address (XEXP (x, 0));
3664 break;
3665
3666 case LABEL_REF:
3667 case SYMBOL_REF:
3668 output_addr_const (asm_out_file, x);
3669 break;
3670
3671 case CONST_INT:
3672 asm_fprintf (f, "%wd", INTVAL (x));
3673 break;
3674
3675 case CONST_VECTOR:
3676 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3677 {
3678 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3679 HOST_WIDE_INT_MIN,
3680 HOST_WIDE_INT_MAX));
3681 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3682 }
3683 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3684 {
3685 fputc ('0', f);
3686 }
3687 else
3688 gcc_unreachable ();
3689 break;
3690
3691 case CONST_DOUBLE:
3692 /* CONST_DOUBLE can represent a double-width integer.
3693 In this case, the mode of x is VOIDmode. */
3694 if (GET_MODE (x) == VOIDmode)
3695 ; /* Do Nothing. */
3696 else if (aarch64_float_const_zero_rtx_p (x))
3697 {
3698 fputc ('0', f);
3699 break;
3700 }
3701 else if (aarch64_float_const_representable_p (x))
3702 {
3703 #define buf_size 20
3704 char float_buf[buf_size] = {'\0'};
3705 REAL_VALUE_TYPE r;
3706 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3707 real_to_decimal_for_mode (float_buf, &r,
3708 buf_size, buf_size,
3709 1, GET_MODE (x));
3710 asm_fprintf (asm_out_file, "%s", float_buf);
3711 break;
3712 #undef buf_size
3713 }
3714 output_operand_lossage ("invalid constant");
3715 return;
3716 default:
3717 output_operand_lossage ("invalid operand");
3718 return;
3719 }
3720 break;
3721
3722 case 'A':
3723 if (GET_CODE (x) == HIGH)
3724 x = XEXP (x, 0);
3725
3726 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3727 {
3728 case SYMBOL_SMALL_GOT:
3729 asm_fprintf (asm_out_file, ":got:");
3730 break;
3731
3732 case SYMBOL_SMALL_TLSGD:
3733 asm_fprintf (asm_out_file, ":tlsgd:");
3734 break;
3735
3736 case SYMBOL_SMALL_TLSDESC:
3737 asm_fprintf (asm_out_file, ":tlsdesc:");
3738 break;
3739
3740 case SYMBOL_SMALL_GOTTPREL:
3741 asm_fprintf (asm_out_file, ":gottprel:");
3742 break;
3743
3744 case SYMBOL_SMALL_TPREL:
3745 asm_fprintf (asm_out_file, ":tprel:");
3746 break;
3747
3748 case SYMBOL_TINY_GOT:
3749 gcc_unreachable ();
3750 break;
3751
3752 default:
3753 break;
3754 }
3755 output_addr_const (asm_out_file, x);
3756 break;
3757
3758 case 'L':
3759 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3760 {
3761 case SYMBOL_SMALL_GOT:
3762 asm_fprintf (asm_out_file, ":lo12:");
3763 break;
3764
3765 case SYMBOL_SMALL_TLSGD:
3766 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3767 break;
3768
3769 case SYMBOL_SMALL_TLSDESC:
3770 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3771 break;
3772
3773 case SYMBOL_SMALL_GOTTPREL:
3774 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3775 break;
3776
3777 case SYMBOL_SMALL_TPREL:
3778 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3779 break;
3780
3781 case SYMBOL_TINY_GOT:
3782 asm_fprintf (asm_out_file, ":got:");
3783 break;
3784
3785 default:
3786 break;
3787 }
3788 output_addr_const (asm_out_file, x);
3789 break;
3790
3791 case 'G':
3792
3793 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3794 {
3795 case SYMBOL_SMALL_TPREL:
3796 asm_fprintf (asm_out_file, ":tprel_hi12:");
3797 break;
3798 default:
3799 break;
3800 }
3801 output_addr_const (asm_out_file, x);
3802 break;
3803
3804 default:
3805 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3806 return;
3807 }
3808 }
3809
3810 void
3811 aarch64_print_operand_address (FILE *f, rtx x)
3812 {
3813 struct aarch64_address_info addr;
3814
3815 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3816 MEM, true))
3817 switch (addr.type)
3818 {
3819 case ADDRESS_REG_IMM:
3820 if (addr.offset == const0_rtx)
3821 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3822 else
3823 asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3824 INTVAL (addr.offset));
3825 return;
3826
3827 case ADDRESS_REG_REG:
3828 if (addr.shift == 0)
3829 asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3830 reg_names [REGNO (addr.offset)]);
3831 else
3832 asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3833 reg_names [REGNO (addr.offset)], addr.shift);
3834 return;
3835
3836 case ADDRESS_REG_UXTW:
3837 if (addr.shift == 0)
3838 asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3839 REGNO (addr.offset) - R0_REGNUM);
3840 else
3841 asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3842 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3843 return;
3844
3845 case ADDRESS_REG_SXTW:
3846 if (addr.shift == 0)
3847 asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3848 REGNO (addr.offset) - R0_REGNUM);
3849 else
3850 asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3851 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3852 return;
3853
3854 case ADDRESS_REG_WB:
3855 switch (GET_CODE (x))
3856 {
3857 case PRE_INC:
3858 asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3859 GET_MODE_SIZE (aarch64_memory_reference_mode));
3860 return;
3861 case POST_INC:
3862 asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3863 GET_MODE_SIZE (aarch64_memory_reference_mode));
3864 return;
3865 case PRE_DEC:
3866 asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3867 GET_MODE_SIZE (aarch64_memory_reference_mode));
3868 return;
3869 case POST_DEC:
3870 asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3871 GET_MODE_SIZE (aarch64_memory_reference_mode));
3872 return;
3873 case PRE_MODIFY:
3874 asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3875 INTVAL (addr.offset));
3876 return;
3877 case POST_MODIFY:
3878 asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3879 INTVAL (addr.offset));
3880 return;
3881 default:
3882 break;
3883 }
3884 break;
3885
3886 case ADDRESS_LO_SUM:
3887 asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3888 output_addr_const (f, addr.offset);
3889 asm_fprintf (f, "]");
3890 return;
3891
3892 case ADDRESS_SYMBOLIC:
3893 break;
3894 }
3895
3896 output_addr_const (f, x);
3897 }
3898
3899 bool
3900 aarch64_label_mentioned_p (rtx x)
3901 {
3902 const char *fmt;
3903 int i;
3904
3905 if (GET_CODE (x) == LABEL_REF)
3906 return true;
3907
3908 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3909 referencing instruction, but they are constant offsets, not
3910 symbols. */
3911 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3912 return false;
3913
3914 fmt = GET_RTX_FORMAT (GET_CODE (x));
3915 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3916 {
3917 if (fmt[i] == 'E')
3918 {
3919 int j;
3920
3921 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3922 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3923 return 1;
3924 }
3925 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3926 return 1;
3927 }
3928
3929 return 0;
3930 }
3931
3932 /* Implement REGNO_REG_CLASS. */
3933
3934 enum reg_class
3935 aarch64_regno_regclass (unsigned regno)
3936 {
3937 if (GP_REGNUM_P (regno))
3938 return CORE_REGS;
3939
3940 if (regno == SP_REGNUM)
3941 return STACK_REG;
3942
3943 if (regno == FRAME_POINTER_REGNUM
3944 || regno == ARG_POINTER_REGNUM)
3945 return POINTER_REGS;
3946
3947 if (FP_REGNUM_P (regno))
3948 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
3949
3950 return NO_REGS;
3951 }
3952
3953 /* Try a machine-dependent way of reloading an illegitimate address
3954 operand. If we find one, push the reload and return the new rtx. */
3955
3956 rtx
3957 aarch64_legitimize_reload_address (rtx *x_p,
3958 enum machine_mode mode,
3959 int opnum, int type,
3960 int ind_levels ATTRIBUTE_UNUSED)
3961 {
3962 rtx x = *x_p;
3963
3964 /* Do not allow mem (plus (reg, const)) if vector mode. */
3965 if (aarch64_vector_mode_p (mode)
3966 && GET_CODE (x) == PLUS
3967 && REG_P (XEXP (x, 0))
3968 && CONST_INT_P (XEXP (x, 1)))
3969 {
3970 rtx orig_rtx = x;
3971 x = copy_rtx (x);
3972 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3973 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3974 opnum, (enum reload_type) type);
3975 return x;
3976 }
3977
3978 /* We must recognize output that we have already generated ourselves. */
3979 if (GET_CODE (x) == PLUS
3980 && GET_CODE (XEXP (x, 0)) == PLUS
3981 && REG_P (XEXP (XEXP (x, 0), 0))
3982 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3983 && CONST_INT_P (XEXP (x, 1)))
3984 {
3985 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3986 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3987 opnum, (enum reload_type) type);
3988 return x;
3989 }
3990
3991 /* We wish to handle large displacements off a base register by splitting
3992 the addend across an add and the mem insn. This can cut the number of
3993 extra insns needed from 3 to 1. It is only useful for load/store of a
3994 single register with 12 bit offset field. */
3995 if (GET_CODE (x) == PLUS
3996 && REG_P (XEXP (x, 0))
3997 && CONST_INT_P (XEXP (x, 1))
3998 && HARD_REGISTER_P (XEXP (x, 0))
3999 && mode != TImode
4000 && mode != TFmode
4001 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4002 {
4003 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4004 HOST_WIDE_INT low = val & 0xfff;
4005 HOST_WIDE_INT high = val - low;
4006 HOST_WIDE_INT offs;
4007 rtx cst;
4008 enum machine_mode xmode = GET_MODE (x);
4009
4010 /* In ILP32, xmode can be either DImode or SImode. */
4011 gcc_assert (xmode == DImode || xmode == SImode);
4012
4013 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4014 BLKmode alignment. */
4015 if (GET_MODE_SIZE (mode) == 0)
4016 return NULL_RTX;
4017
4018 offs = low % GET_MODE_SIZE (mode);
4019
4020 /* Align misaligned offset by adjusting high part to compensate. */
4021 if (offs != 0)
4022 {
4023 if (aarch64_uimm12_shift (high + offs))
4024 {
4025 /* Align down. */
4026 low = low - offs;
4027 high = high + offs;
4028 }
4029 else
4030 {
4031 /* Align up. */
4032 offs = GET_MODE_SIZE (mode) - offs;
4033 low = low + offs;
4034 high = high + (low & 0x1000) - offs;
4035 low &= 0xfff;
4036 }
4037 }
4038
4039 /* Check for overflow. */
4040 if (high + low != val)
4041 return NULL_RTX;
4042
4043 cst = GEN_INT (high);
4044 if (!aarch64_uimm12_shift (high))
4045 cst = force_const_mem (xmode, cst);
4046
4047 /* Reload high part into base reg, leaving the low part
4048 in the mem instruction.
4049 Note that replacing this gen_rtx_PLUS with plus_constant is
4050 wrong in this case because we rely on the
4051 (plus (plus reg c1) c2) structure being preserved so that
4052 XEXP (*p, 0) in push_reload below uses the correct term. */
4053 x = gen_rtx_PLUS (xmode,
4054 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4055 GEN_INT (low));
4056
4057 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4058 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4059 opnum, (enum reload_type) type);
4060 return x;
4061 }
4062
4063 return NULL_RTX;
4064 }
4065
4066
4067 static reg_class_t
4068 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4069 reg_class_t rclass,
4070 enum machine_mode mode,
4071 secondary_reload_info *sri)
4072 {
4073 /* Without the TARGET_SIMD instructions we cannot move a Q register
4074 to a Q register directly. We need a scratch. */
4075 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4076 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4077 && reg_class_subset_p (rclass, FP_REGS))
4078 {
4079 if (mode == TFmode)
4080 sri->icode = CODE_FOR_aarch64_reload_movtf;
4081 else if (mode == TImode)
4082 sri->icode = CODE_FOR_aarch64_reload_movti;
4083 return NO_REGS;
4084 }
4085
4086 /* A TFmode or TImode memory access should be handled via an FP_REGS
4087 because AArch64 has richer addressing modes for LDR/STR instructions
4088 than LDP/STP instructions. */
4089 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4090 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4091 return FP_REGS;
4092
4093 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4094 return CORE_REGS;
4095
4096 return NO_REGS;
4097 }
4098
4099 static bool
4100 aarch64_can_eliminate (const int from, const int to)
4101 {
4102 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4103 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4104
4105 if (frame_pointer_needed)
4106 {
4107 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4108 return true;
4109 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4110 return false;
4111 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4112 && !cfun->calls_alloca)
4113 return true;
4114 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4115 return true;
4116 return false;
4117 }
4118 else
4119 {
4120 /* If we decided that we didn't need a leaf frame pointer but then used
4121 LR in the function, then we'll want a frame pointer after all, so
4122 prevent this elimination to ensure a frame pointer is used.
4123
4124 NOTE: the original value of flag_omit_frame_pointer gets trashed
4125 IFF flag_omit_leaf_frame_pointer is true, so we check the value
4126 of faked_omit_frame_pointer here (which is true when we always
4127 wish to keep non-leaf frame pointers but only wish to keep leaf frame
4128 pointers when LR is clobbered). */
4129 if (to == STACK_POINTER_REGNUM
4130 && df_regs_ever_live_p (LR_REGNUM)
4131 && faked_omit_frame_pointer)
4132 return false;
4133 }
4134
4135 return true;
4136 }
4137
4138 HOST_WIDE_INT
4139 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4140 {
4141 HOST_WIDE_INT frame_size;
4142 HOST_WIDE_INT offset;
4143
4144 aarch64_layout_frame ();
4145 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4146 + crtl->outgoing_args_size
4147 + cfun->machine->saved_varargs_size);
4148
4149 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4150 offset = frame_size;
4151
4152 if (to == HARD_FRAME_POINTER_REGNUM)
4153 {
4154 if (from == ARG_POINTER_REGNUM)
4155 return offset - crtl->outgoing_args_size;
4156
4157 if (from == FRAME_POINTER_REGNUM)
4158 return cfun->machine->frame.saved_regs_size + get_frame_size ();
4159 }
4160
4161 if (to == STACK_POINTER_REGNUM)
4162 {
4163 if (from == FRAME_POINTER_REGNUM)
4164 {
4165 HOST_WIDE_INT elim = crtl->outgoing_args_size
4166 + cfun->machine->frame.saved_regs_size
4167 + get_frame_size ()
4168 - cfun->machine->frame.fp_lr_offset;
4169 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4170 return elim;
4171 }
4172 }
4173
4174 return offset;
4175 }
4176
4177
4178 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4179 previous frame. */
4180
4181 rtx
4182 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4183 {
4184 if (count != 0)
4185 return const0_rtx;
4186 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4187 }
4188
4189
4190 static void
4191 aarch64_asm_trampoline_template (FILE *f)
4192 {
4193 if (TARGET_ILP32)
4194 {
4195 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4196 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4197 }
4198 else
4199 {
4200 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4201 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4202 }
4203 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4204 assemble_aligned_integer (4, const0_rtx);
4205 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4206 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4207 }
4208
4209 static void
4210 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4211 {
4212 rtx fnaddr, mem, a_tramp;
4213 const int tramp_code_sz = 16;
4214
4215 /* Don't need to copy the trailing D-words, we fill those in below. */
4216 emit_block_move (m_tramp, assemble_trampoline_template (),
4217 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4218 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4219 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4220 if (GET_MODE (fnaddr) != ptr_mode)
4221 fnaddr = convert_memory_address (ptr_mode, fnaddr);
4222 emit_move_insn (mem, fnaddr);
4223
4224 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4225 emit_move_insn (mem, chain_value);
4226
4227 /* XXX We should really define a "clear_cache" pattern and use
4228 gen_clear_cache(). */
4229 a_tramp = XEXP (m_tramp, 0);
4230 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4231 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4232 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4233 ptr_mode);
4234 }
4235
4236 static unsigned char
4237 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4238 {
4239 switch (regclass)
4240 {
4241 case CORE_REGS:
4242 case POINTER_REGS:
4243 case GENERAL_REGS:
4244 case ALL_REGS:
4245 case FP_REGS:
4246 case FP_LO_REGS:
4247 return
4248 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4249 (GET_MODE_SIZE (mode) + 7) / 8;
4250 case STACK_REG:
4251 return 1;
4252
4253 case NO_REGS:
4254 return 0;
4255
4256 default:
4257 break;
4258 }
4259 gcc_unreachable ();
4260 }
4261
4262 static reg_class_t
4263 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4264 {
4265 if (regclass == POINTER_REGS)
4266 return GENERAL_REGS;
4267
4268 if (regclass == STACK_REG)
4269 {
4270 if (REG_P(x)
4271 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4272 return regclass;
4273
4274 return NO_REGS;
4275 }
4276
4277 /* If it's an integer immediate that MOVI can't handle, then
4278 FP_REGS is not an option, so we return NO_REGS instead. */
4279 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4280 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4281 return NO_REGS;
4282
4283 /* Register eliminiation can result in a request for
4284 SP+constant->FP_REGS. We cannot support such operations which
4285 use SP as source and an FP_REG as destination, so reject out
4286 right now. */
4287 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4288 {
4289 rtx lhs = XEXP (x, 0);
4290
4291 /* Look through a possible SUBREG introduced by ILP32. */
4292 if (GET_CODE (lhs) == SUBREG)
4293 lhs = SUBREG_REG (lhs);
4294
4295 gcc_assert (REG_P (lhs));
4296 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4297 POINTER_REGS));
4298 return NO_REGS;
4299 }
4300
4301 return regclass;
4302 }
4303
4304 void
4305 aarch64_asm_output_labelref (FILE* f, const char *name)
4306 {
4307 asm_fprintf (f, "%U%s", name);
4308 }
4309
4310 static void
4311 aarch64_elf_asm_constructor (rtx symbol, int priority)
4312 {
4313 if (priority == DEFAULT_INIT_PRIORITY)
4314 default_ctor_section_asm_out_constructor (symbol, priority);
4315 else
4316 {
4317 section *s;
4318 char buf[18];
4319 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4320 s = get_section (buf, SECTION_WRITE, NULL);
4321 switch_to_section (s);
4322 assemble_align (POINTER_SIZE);
4323 assemble_aligned_integer (POINTER_BYTES, symbol);
4324 }
4325 }
4326
4327 static void
4328 aarch64_elf_asm_destructor (rtx symbol, int priority)
4329 {
4330 if (priority == DEFAULT_INIT_PRIORITY)
4331 default_dtor_section_asm_out_destructor (symbol, priority);
4332 else
4333 {
4334 section *s;
4335 char buf[18];
4336 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4337 s = get_section (buf, SECTION_WRITE, NULL);
4338 switch_to_section (s);
4339 assemble_align (POINTER_SIZE);
4340 assemble_aligned_integer (POINTER_BYTES, symbol);
4341 }
4342 }
4343
4344 const char*
4345 aarch64_output_casesi (rtx *operands)
4346 {
4347 char buf[100];
4348 char label[100];
4349 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4350 int index;
4351 static const char *const patterns[4][2] =
4352 {
4353 {
4354 "ldrb\t%w3, [%0,%w1,uxtw]",
4355 "add\t%3, %4, %w3, sxtb #2"
4356 },
4357 {
4358 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4359 "add\t%3, %4, %w3, sxth #2"
4360 },
4361 {
4362 "ldr\t%w3, [%0,%w1,uxtw #2]",
4363 "add\t%3, %4, %w3, sxtw #2"
4364 },
4365 /* We assume that DImode is only generated when not optimizing and
4366 that we don't really need 64-bit address offsets. That would
4367 imply an object file with 8GB of code in a single function! */
4368 {
4369 "ldr\t%w3, [%0,%w1,uxtw #2]",
4370 "add\t%3, %4, %w3, sxtw #2"
4371 }
4372 };
4373
4374 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4375
4376 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4377
4378 gcc_assert (index >= 0 && index <= 3);
4379
4380 /* Need to implement table size reduction, by chaning the code below. */
4381 output_asm_insn (patterns[index][0], operands);
4382 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4383 snprintf (buf, sizeof (buf),
4384 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4385 output_asm_insn (buf, operands);
4386 output_asm_insn (patterns[index][1], operands);
4387 output_asm_insn ("br\t%3", operands);
4388 assemble_label (asm_out_file, label);
4389 return "";
4390 }
4391
4392
4393 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4394 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4395 operator. */
4396
4397 int
4398 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4399 {
4400 if (shift >= 0 && shift <= 3)
4401 {
4402 int size;
4403 for (size = 8; size <= 32; size *= 2)
4404 {
4405 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4406 if (mask == bits << shift)
4407 return size;
4408 }
4409 }
4410 return 0;
4411 }
4412
4413 static bool
4414 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4415 const_rtx x ATTRIBUTE_UNUSED)
4416 {
4417 /* We can't use blocks for constants when we're using a per-function
4418 constant pool. */
4419 return false;
4420 }
4421
4422 static section *
4423 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4424 rtx x ATTRIBUTE_UNUSED,
4425 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4426 {
4427 /* Force all constant pool entries into the current function section. */
4428 return function_section (current_function_decl);
4429 }
4430
4431
4432 /* Costs. */
4433
4434 /* Helper function for rtx cost calculation. Strip a shift expression
4435 from X. Returns the inner operand if successful, or the original
4436 expression on failure. */
4437 static rtx
4438 aarch64_strip_shift (rtx x)
4439 {
4440 rtx op = x;
4441
4442 if ((GET_CODE (op) == ASHIFT
4443 || GET_CODE (op) == ASHIFTRT
4444 || GET_CODE (op) == LSHIFTRT)
4445 && CONST_INT_P (XEXP (op, 1)))
4446 return XEXP (op, 0);
4447
4448 if (GET_CODE (op) == MULT
4449 && CONST_INT_P (XEXP (op, 1))
4450 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4451 return XEXP (op, 0);
4452
4453 return x;
4454 }
4455
4456 /* Helper function for rtx cost calculation. Strip a shift or extend
4457 expression from X. Returns the inner operand if successful, or the
4458 original expression on failure. We deal with a number of possible
4459 canonicalization variations here. */
4460 static rtx
4461 aarch64_strip_shift_or_extend (rtx x)
4462 {
4463 rtx op = x;
4464
4465 /* Zero and sign extraction of a widened value. */
4466 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4467 && XEXP (op, 2) == const0_rtx
4468 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4469 XEXP (op, 1)))
4470 return XEXP (XEXP (op, 0), 0);
4471
4472 /* It can also be represented (for zero-extend) as an AND with an
4473 immediate. */
4474 if (GET_CODE (op) == AND
4475 && GET_CODE (XEXP (op, 0)) == MULT
4476 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4477 && CONST_INT_P (XEXP (op, 1))
4478 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4479 INTVAL (XEXP (op, 1))) != 0)
4480 return XEXP (XEXP (op, 0), 0);
4481
4482 /* Now handle extended register, as this may also have an optional
4483 left shift by 1..4. */
4484 if (GET_CODE (op) == ASHIFT
4485 && CONST_INT_P (XEXP (op, 1))
4486 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4487 op = XEXP (op, 0);
4488
4489 if (GET_CODE (op) == ZERO_EXTEND
4490 || GET_CODE (op) == SIGN_EXTEND)
4491 op = XEXP (op, 0);
4492
4493 if (op != x)
4494 return op;
4495
4496 return aarch64_strip_shift (x);
4497 }
4498
4499 /* Calculate the cost of calculating X, storing it in *COST. Result
4500 is true if the total cost of the operation has now been calculated. */
4501 static bool
4502 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4503 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4504 {
4505 rtx op0, op1;
4506 const struct cpu_cost_table *extra_cost
4507 = aarch64_tune_params->insn_extra_cost;
4508
4509 switch (code)
4510 {
4511 case SET:
4512 op0 = SET_DEST (x);
4513 op1 = SET_SRC (x);
4514
4515 switch (GET_CODE (op0))
4516 {
4517 case MEM:
4518 if (speed)
4519 *cost += extra_cost->ldst.store;
4520
4521 if (op1 != const0_rtx)
4522 *cost += rtx_cost (op1, SET, 1, speed);
4523 return true;
4524
4525 case SUBREG:
4526 if (! REG_P (SUBREG_REG (op0)))
4527 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4528 /* Fall through. */
4529 case REG:
4530 /* Cost is just the cost of the RHS of the set. */
4531 *cost += rtx_cost (op1, SET, 1, true);
4532 return true;
4533
4534 case ZERO_EXTRACT: /* Bit-field insertion. */
4535 case SIGN_EXTRACT:
4536 /* Strip any redundant widening of the RHS to meet the width of
4537 the target. */
4538 if (GET_CODE (op1) == SUBREG)
4539 op1 = SUBREG_REG (op1);
4540 if ((GET_CODE (op1) == ZERO_EXTEND
4541 || GET_CODE (op1) == SIGN_EXTEND)
4542 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4543 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4544 >= INTVAL (XEXP (op0, 1))))
4545 op1 = XEXP (op1, 0);
4546 *cost += rtx_cost (op1, SET, 1, speed);
4547 return true;
4548
4549 default:
4550 break;
4551 }
4552 return false;
4553
4554 case MEM:
4555 if (speed)
4556 *cost += extra_cost->ldst.load;
4557
4558 return true;
4559
4560 case NEG:
4561 op0 = CONST0_RTX (GET_MODE (x));
4562 op1 = XEXP (x, 0);
4563 goto cost_minus;
4564
4565 case COMPARE:
4566 op0 = XEXP (x, 0);
4567 op1 = XEXP (x, 1);
4568
4569 if (op1 == const0_rtx
4570 && GET_CODE (op0) == AND)
4571 {
4572 x = op0;
4573 goto cost_logic;
4574 }
4575
4576 /* Comparisons can work if the order is swapped.
4577 Canonicalization puts the more complex operation first, but
4578 we want it in op1. */
4579 if (! (REG_P (op0)
4580 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4581 {
4582 op0 = XEXP (x, 1);
4583 op1 = XEXP (x, 0);
4584 }
4585 goto cost_minus;
4586
4587 case MINUS:
4588 op0 = XEXP (x, 0);
4589 op1 = XEXP (x, 1);
4590
4591 cost_minus:
4592 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4593 || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4594 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4595 {
4596 if (op0 != const0_rtx)
4597 *cost += rtx_cost (op0, MINUS, 0, speed);
4598
4599 if (CONST_INT_P (op1))
4600 {
4601 if (!aarch64_uimm12_shift (INTVAL (op1)))
4602 *cost += rtx_cost (op1, MINUS, 1, speed);
4603 }
4604 else
4605 {
4606 op1 = aarch64_strip_shift_or_extend (op1);
4607 *cost += rtx_cost (op1, MINUS, 1, speed);
4608 }
4609 return true;
4610 }
4611
4612 return false;
4613
4614 case PLUS:
4615 op0 = XEXP (x, 0);
4616 op1 = XEXP (x, 1);
4617
4618 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4619 {
4620 if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4621 {
4622 *cost += rtx_cost (op0, PLUS, 0, speed);
4623 }
4624 else
4625 {
4626 rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4627
4628 if (new_op0 == op0
4629 && GET_CODE (op0) == MULT)
4630 {
4631 if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4632 && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4633 || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4634 && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4635 {
4636 *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4637 speed)
4638 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4639 speed)
4640 + rtx_cost (op1, PLUS, 1, speed));
4641 if (speed)
4642 *cost +=
4643 extra_cost->mult[GET_MODE (x) == DImode].extend_add;
4644 return true;
4645 }
4646 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4647 + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4648 + rtx_cost (op1, PLUS, 1, speed));
4649
4650 if (speed)
4651 *cost += extra_cost->mult[GET_MODE (x) == DImode].add;
4652 }
4653
4654 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4655 + rtx_cost (op1, PLUS, 1, speed));
4656 }
4657 return true;
4658 }
4659
4660 return false;
4661
4662 case IOR:
4663 case XOR:
4664 case AND:
4665 cost_logic:
4666 op0 = XEXP (x, 0);
4667 op1 = XEXP (x, 1);
4668
4669 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4670 {
4671 if (CONST_INT_P (op1)
4672 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4673 {
4674 *cost += rtx_cost (op0, AND, 0, speed);
4675 }
4676 else
4677 {
4678 if (GET_CODE (op0) == NOT)
4679 op0 = XEXP (op0, 0);
4680 op0 = aarch64_strip_shift (op0);
4681 *cost += (rtx_cost (op0, AND, 0, speed)
4682 + rtx_cost (op1, AND, 1, speed));
4683 }
4684 return true;
4685 }
4686 return false;
4687
4688 case ZERO_EXTEND:
4689 if ((GET_MODE (x) == DImode
4690 && GET_MODE (XEXP (x, 0)) == SImode)
4691 || GET_CODE (XEXP (x, 0)) == MEM)
4692 {
4693 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4694 return true;
4695 }
4696 return false;
4697
4698 case SIGN_EXTEND:
4699 if (GET_CODE (XEXP (x, 0)) == MEM)
4700 {
4701 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4702 return true;
4703 }
4704 return false;
4705
4706 case ROTATE:
4707 if (!CONST_INT_P (XEXP (x, 1)))
4708 *cost += COSTS_N_INSNS (2);
4709 /* Fall through. */
4710 case ROTATERT:
4711 case LSHIFTRT:
4712 case ASHIFT:
4713 case ASHIFTRT:
4714
4715 /* Shifting by a register often takes an extra cycle. */
4716 if (speed && !CONST_INT_P (XEXP (x, 1)))
4717 *cost += extra_cost->alu.arith_shift_reg;
4718
4719 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4720 return true;
4721
4722 case HIGH:
4723 if (!CONSTANT_P (XEXP (x, 0)))
4724 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4725 return true;
4726
4727 case LO_SUM:
4728 if (!CONSTANT_P (XEXP (x, 1)))
4729 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4730 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4731 return true;
4732
4733 case ZERO_EXTRACT:
4734 case SIGN_EXTRACT:
4735 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4736 return true;
4737
4738 case MULT:
4739 op0 = XEXP (x, 0);
4740 op1 = XEXP (x, 1);
4741
4742 *cost = COSTS_N_INSNS (1);
4743 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4744 {
4745 if (CONST_INT_P (op1)
4746 && exact_log2 (INTVAL (op1)) > 0)
4747 {
4748 *cost += rtx_cost (op0, ASHIFT, 0, speed);
4749 return true;
4750 }
4751
4752 if ((GET_CODE (op0) == ZERO_EXTEND
4753 && GET_CODE (op1) == ZERO_EXTEND)
4754 || (GET_CODE (op0) == SIGN_EXTEND
4755 && GET_CODE (op1) == SIGN_EXTEND))
4756 {
4757 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4758 + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4759 if (speed)
4760 *cost += extra_cost->mult[GET_MODE (x) == DImode].extend;
4761 return true;
4762 }
4763
4764 if (speed)
4765 *cost += extra_cost->mult[GET_MODE (x) == DImode].simple;
4766 }
4767 else if (speed)
4768 {
4769 if (GET_MODE (x) == DFmode)
4770 *cost += extra_cost->fp[1].mult;
4771 else if (GET_MODE (x) == SFmode)
4772 *cost += extra_cost->fp[0].mult;
4773 }
4774
4775 return false; /* All arguments need to be in registers. */
4776
4777 case MOD:
4778 case UMOD:
4779 *cost = COSTS_N_INSNS (2);
4780 if (speed)
4781 {
4782 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4783 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
4784 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
4785 else if (GET_MODE (x) == DFmode)
4786 *cost += (extra_cost->fp[1].mult
4787 + extra_cost->fp[1].div);
4788 else if (GET_MODE (x) == SFmode)
4789 *cost += (extra_cost->fp[0].mult
4790 + extra_cost->fp[0].div);
4791 }
4792 return false; /* All arguments need to be in registers. */
4793
4794 case DIV:
4795 case UDIV:
4796 *cost = COSTS_N_INSNS (1);
4797 if (speed)
4798 {
4799 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4800 *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
4801 else if (GET_MODE (x) == DFmode)
4802 *cost += extra_cost->fp[1].div;
4803 else if (GET_MODE (x) == SFmode)
4804 *cost += extra_cost->fp[0].div;
4805 }
4806 return false; /* All arguments need to be in registers. */
4807
4808 default:
4809 break;
4810 }
4811 return false;
4812 }
4813
4814 static int
4815 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4816 enum machine_mode mode ATTRIBUTE_UNUSED,
4817 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4818 {
4819 enum rtx_code c = GET_CODE (x);
4820 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4821
4822 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4823 return addr_cost->pre_modify;
4824
4825 if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4826 return addr_cost->post_modify;
4827
4828 if (c == PLUS)
4829 {
4830 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4831 return addr_cost->imm_offset;
4832 else if (GET_CODE (XEXP (x, 0)) == MULT
4833 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4834 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4835 return addr_cost->register_extend;
4836
4837 return addr_cost->register_offset;
4838 }
4839 else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4840 return addr_cost->imm_offset;
4841
4842 return 0;
4843 }
4844
4845 static int
4846 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4847 reg_class_t from, reg_class_t to)
4848 {
4849 const struct cpu_regmove_cost *regmove_cost
4850 = aarch64_tune_params->regmove_cost;
4851
4852 if (from == GENERAL_REGS && to == GENERAL_REGS)
4853 return regmove_cost->GP2GP;
4854 else if (from == GENERAL_REGS)
4855 return regmove_cost->GP2FP;
4856 else if (to == GENERAL_REGS)
4857 return regmove_cost->FP2GP;
4858
4859 /* When AdvSIMD instructions are disabled it is not possible to move
4860 a 128-bit value directly between Q registers. This is handled in
4861 secondary reload. A general register is used as a scratch to move
4862 the upper DI value and the lower DI value is moved directly,
4863 hence the cost is the sum of three moves. */
4864
4865 if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4866 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4867
4868 return regmove_cost->FP2FP;
4869 }
4870
4871 static int
4872 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4873 reg_class_t rclass ATTRIBUTE_UNUSED,
4874 bool in ATTRIBUTE_UNUSED)
4875 {
4876 return aarch64_tune_params->memmov_cost;
4877 }
4878
4879 /* Vectorizer cost model target hooks. */
4880
4881 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4882 static int
4883 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4884 tree vectype,
4885 int misalign ATTRIBUTE_UNUSED)
4886 {
4887 unsigned elements;
4888
4889 switch (type_of_cost)
4890 {
4891 case scalar_stmt:
4892 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
4893
4894 case scalar_load:
4895 return aarch64_tune_params->vec_costs->scalar_load_cost;
4896
4897 case scalar_store:
4898 return aarch64_tune_params->vec_costs->scalar_store_cost;
4899
4900 case vector_stmt:
4901 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4902
4903 case vector_load:
4904 return aarch64_tune_params->vec_costs->vec_align_load_cost;
4905
4906 case vector_store:
4907 return aarch64_tune_params->vec_costs->vec_store_cost;
4908
4909 case vec_to_scalar:
4910 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
4911
4912 case scalar_to_vec:
4913 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
4914
4915 case unaligned_load:
4916 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
4917
4918 case unaligned_store:
4919 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
4920
4921 case cond_branch_taken:
4922 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
4923
4924 case cond_branch_not_taken:
4925 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
4926
4927 case vec_perm:
4928 case vec_promote_demote:
4929 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4930
4931 case vec_construct:
4932 elements = TYPE_VECTOR_SUBPARTS (vectype);
4933 return elements / 2 + 1;
4934
4935 default:
4936 gcc_unreachable ();
4937 }
4938 }
4939
4940 /* Implement targetm.vectorize.add_stmt_cost. */
4941 static unsigned
4942 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4943 struct _stmt_vec_info *stmt_info, int misalign,
4944 enum vect_cost_model_location where)
4945 {
4946 unsigned *cost = (unsigned *) data;
4947 unsigned retval = 0;
4948
4949 if (flag_vect_cost_model)
4950 {
4951 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4952 int stmt_cost =
4953 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
4954
4955 /* Statements in an inner loop relative to the loop being
4956 vectorized are weighted more heavily. The value here is
4957 a function (linear for now) of the loop nest level. */
4958 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4959 {
4960 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4961 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
4962 unsigned nest_level = loop_depth (loop);
4963
4964 count *= nest_level;
4965 }
4966
4967 retval = (unsigned) (count * stmt_cost);
4968 cost[where] += retval;
4969 }
4970
4971 return retval;
4972 }
4973
4974 static void initialize_aarch64_code_model (void);
4975
4976 /* Parse the architecture extension string. */
4977
4978 static void
4979 aarch64_parse_extension (char *str)
4980 {
4981 /* The extension string is parsed left to right. */
4982 const struct aarch64_option_extension *opt = NULL;
4983
4984 /* Flag to say whether we are adding or removing an extension. */
4985 int adding_ext = -1;
4986
4987 while (str != NULL && *str != 0)
4988 {
4989 char *ext;
4990 size_t len;
4991
4992 str++;
4993 ext = strchr (str, '+');
4994
4995 if (ext != NULL)
4996 len = ext - str;
4997 else
4998 len = strlen (str);
4999
5000 if (len >= 2 && strncmp (str, "no", 2) == 0)
5001 {
5002 adding_ext = 0;
5003 len -= 2;
5004 str += 2;
5005 }
5006 else if (len > 0)
5007 adding_ext = 1;
5008
5009 if (len == 0)
5010 {
5011 error ("missing feature modifier after %qs", "+no");
5012 return;
5013 }
5014
5015 /* Scan over the extensions table trying to find an exact match. */
5016 for (opt = all_extensions; opt->name != NULL; opt++)
5017 {
5018 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5019 {
5020 /* Add or remove the extension. */
5021 if (adding_ext)
5022 aarch64_isa_flags |= opt->flags_on;
5023 else
5024 aarch64_isa_flags &= ~(opt->flags_off);
5025 break;
5026 }
5027 }
5028
5029 if (opt->name == NULL)
5030 {
5031 /* Extension not found in list. */
5032 error ("unknown feature modifier %qs", str);
5033 return;
5034 }
5035
5036 str = ext;
5037 };
5038
5039 return;
5040 }
5041
5042 /* Parse the ARCH string. */
5043
5044 static void
5045 aarch64_parse_arch (void)
5046 {
5047 char *ext;
5048 const struct processor *arch;
5049 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
5050 size_t len;
5051
5052 strcpy (str, aarch64_arch_string);
5053
5054 ext = strchr (str, '+');
5055
5056 if (ext != NULL)
5057 len = ext - str;
5058 else
5059 len = strlen (str);
5060
5061 if (len == 0)
5062 {
5063 error ("missing arch name in -march=%qs", str);
5064 return;
5065 }
5066
5067 /* Loop through the list of supported ARCHs to find a match. */
5068 for (arch = all_architectures; arch->name != NULL; arch++)
5069 {
5070 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5071 {
5072 selected_arch = arch;
5073 aarch64_isa_flags = selected_arch->flags;
5074 selected_cpu = &all_cores[selected_arch->core];
5075
5076 if (ext != NULL)
5077 {
5078 /* ARCH string contains at least one extension. */
5079 aarch64_parse_extension (ext);
5080 }
5081
5082 return;
5083 }
5084 }
5085
5086 /* ARCH name not found in list. */
5087 error ("unknown value %qs for -march", str);
5088 return;
5089 }
5090
5091 /* Parse the CPU string. */
5092
5093 static void
5094 aarch64_parse_cpu (void)
5095 {
5096 char *ext;
5097 const struct processor *cpu;
5098 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5099 size_t len;
5100
5101 strcpy (str, aarch64_cpu_string);
5102
5103 ext = strchr (str, '+');
5104
5105 if (ext != NULL)
5106 len = ext - str;
5107 else
5108 len = strlen (str);
5109
5110 if (len == 0)
5111 {
5112 error ("missing cpu name in -mcpu=%qs", str);
5113 return;
5114 }
5115
5116 /* Loop through the list of supported CPUs to find a match. */
5117 for (cpu = all_cores; cpu->name != NULL; cpu++)
5118 {
5119 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5120 {
5121 selected_cpu = cpu;
5122 selected_tune = cpu;
5123 aarch64_isa_flags = selected_cpu->flags;
5124
5125 if (ext != NULL)
5126 {
5127 /* CPU string contains at least one extension. */
5128 aarch64_parse_extension (ext);
5129 }
5130
5131 return;
5132 }
5133 }
5134
5135 /* CPU name not found in list. */
5136 error ("unknown value %qs for -mcpu", str);
5137 return;
5138 }
5139
5140 /* Parse the TUNE string. */
5141
5142 static void
5143 aarch64_parse_tune (void)
5144 {
5145 const struct processor *cpu;
5146 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5147 strcpy (str, aarch64_tune_string);
5148
5149 /* Loop through the list of supported CPUs to find a match. */
5150 for (cpu = all_cores; cpu->name != NULL; cpu++)
5151 {
5152 if (strcmp (cpu->name, str) == 0)
5153 {
5154 selected_tune = cpu;
5155 return;
5156 }
5157 }
5158
5159 /* CPU name not found in list. */
5160 error ("unknown value %qs for -mtune", str);
5161 return;
5162 }
5163
5164
5165 /* Implement TARGET_OPTION_OVERRIDE. */
5166
5167 static void
5168 aarch64_override_options (void)
5169 {
5170 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
5171 otherwise march remains undefined. mtune can be used with either march or
5172 mcpu. */
5173
5174 if (aarch64_arch_string)
5175 {
5176 aarch64_parse_arch ();
5177 aarch64_cpu_string = NULL;
5178 }
5179
5180 if (aarch64_cpu_string)
5181 {
5182 aarch64_parse_cpu ();
5183 selected_arch = NULL;
5184 }
5185
5186 if (aarch64_tune_string)
5187 {
5188 aarch64_parse_tune ();
5189 }
5190
5191 #ifndef HAVE_AS_MABI_OPTION
5192 /* The compiler may have been configured with 2.23.* binutils, which does
5193 not have support for ILP32. */
5194 if (TARGET_ILP32)
5195 error ("Assembler does not support -mabi=ilp32");
5196 #endif
5197
5198 initialize_aarch64_code_model ();
5199
5200 aarch64_build_bitmask_table ();
5201
5202 /* This target defaults to strict volatile bitfields. */
5203 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5204 flag_strict_volatile_bitfields = 1;
5205
5206 /* If the user did not specify a processor, choose the default
5207 one for them. This will be the CPU set during configuration using
5208 --with-cpu, otherwise it is "coretex-a53". */
5209 if (!selected_cpu)
5210 {
5211 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5212 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5213 }
5214
5215 gcc_assert (selected_cpu);
5216
5217 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
5218 if (!selected_tune)
5219 selected_tune = &all_cores[selected_cpu->core];
5220
5221 aarch64_tune_flags = selected_tune->flags;
5222 aarch64_tune = selected_tune->core;
5223 aarch64_tune_params = selected_tune->tune;
5224
5225 aarch64_override_options_after_change ();
5226 }
5227
5228 /* Implement targetm.override_options_after_change. */
5229
5230 static void
5231 aarch64_override_options_after_change (void)
5232 {
5233 faked_omit_frame_pointer = false;
5234
5235 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
5236 that aarch64_frame_pointer_required will be called. We need to remember
5237 whether flag_omit_frame_pointer was turned on normally or just faked. */
5238
5239 if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
5240 {
5241 flag_omit_frame_pointer = true;
5242 faked_omit_frame_pointer = true;
5243 }
5244 }
5245
5246 static struct machine_function *
5247 aarch64_init_machine_status (void)
5248 {
5249 struct machine_function *machine;
5250 machine = ggc_alloc_cleared_machine_function ();
5251 return machine;
5252 }
5253
5254 void
5255 aarch64_init_expanders (void)
5256 {
5257 init_machine_status = aarch64_init_machine_status;
5258 }
5259
5260 /* A checking mechanism for the implementation of the various code models. */
5261 static void
5262 initialize_aarch64_code_model (void)
5263 {
5264 if (flag_pic)
5265 {
5266 switch (aarch64_cmodel_var)
5267 {
5268 case AARCH64_CMODEL_TINY:
5269 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5270 break;
5271 case AARCH64_CMODEL_SMALL:
5272 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5273 break;
5274 case AARCH64_CMODEL_LARGE:
5275 sorry ("code model %qs with -f%s", "large",
5276 flag_pic > 1 ? "PIC" : "pic");
5277 default:
5278 gcc_unreachable ();
5279 }
5280 }
5281 else
5282 aarch64_cmodel = aarch64_cmodel_var;
5283 }
5284
5285 /* Return true if SYMBOL_REF X binds locally. */
5286
5287 static bool
5288 aarch64_symbol_binds_local_p (const_rtx x)
5289 {
5290 return (SYMBOL_REF_DECL (x)
5291 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5292 : SYMBOL_REF_LOCAL_P (x));
5293 }
5294
5295 /* Return true if SYMBOL_REF X is thread local */
5296 static bool
5297 aarch64_tls_symbol_p (rtx x)
5298 {
5299 if (! TARGET_HAVE_TLS)
5300 return false;
5301
5302 if (GET_CODE (x) != SYMBOL_REF)
5303 return false;
5304
5305 return SYMBOL_REF_TLS_MODEL (x) != 0;
5306 }
5307
5308 /* Classify a TLS symbol into one of the TLS kinds. */
5309 enum aarch64_symbol_type
5310 aarch64_classify_tls_symbol (rtx x)
5311 {
5312 enum tls_model tls_kind = tls_symbolic_operand_type (x);
5313
5314 switch (tls_kind)
5315 {
5316 case TLS_MODEL_GLOBAL_DYNAMIC:
5317 case TLS_MODEL_LOCAL_DYNAMIC:
5318 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5319
5320 case TLS_MODEL_INITIAL_EXEC:
5321 return SYMBOL_SMALL_GOTTPREL;
5322
5323 case TLS_MODEL_LOCAL_EXEC:
5324 return SYMBOL_SMALL_TPREL;
5325
5326 case TLS_MODEL_EMULATED:
5327 case TLS_MODEL_NONE:
5328 return SYMBOL_FORCE_TO_MEM;
5329
5330 default:
5331 gcc_unreachable ();
5332 }
5333 }
5334
5335 /* Return the method that should be used to access SYMBOL_REF or
5336 LABEL_REF X in context CONTEXT. */
5337
5338 enum aarch64_symbol_type
5339 aarch64_classify_symbol (rtx x,
5340 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5341 {
5342 if (GET_CODE (x) == LABEL_REF)
5343 {
5344 switch (aarch64_cmodel)
5345 {
5346 case AARCH64_CMODEL_LARGE:
5347 return SYMBOL_FORCE_TO_MEM;
5348
5349 case AARCH64_CMODEL_TINY_PIC:
5350 case AARCH64_CMODEL_TINY:
5351 return SYMBOL_TINY_ABSOLUTE;
5352
5353 case AARCH64_CMODEL_SMALL_PIC:
5354 case AARCH64_CMODEL_SMALL:
5355 return SYMBOL_SMALL_ABSOLUTE;
5356
5357 default:
5358 gcc_unreachable ();
5359 }
5360 }
5361
5362 if (GET_CODE (x) == SYMBOL_REF)
5363 {
5364 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
5365 || CONSTANT_POOL_ADDRESS_P (x))
5366 return SYMBOL_FORCE_TO_MEM;
5367
5368 if (aarch64_tls_symbol_p (x))
5369 return aarch64_classify_tls_symbol (x);
5370
5371 switch (aarch64_cmodel)
5372 {
5373 case AARCH64_CMODEL_TINY:
5374 if (SYMBOL_REF_WEAK (x))
5375 return SYMBOL_FORCE_TO_MEM;
5376 return SYMBOL_TINY_ABSOLUTE;
5377
5378 case AARCH64_CMODEL_SMALL:
5379 if (SYMBOL_REF_WEAK (x))
5380 return SYMBOL_FORCE_TO_MEM;
5381 return SYMBOL_SMALL_ABSOLUTE;
5382
5383 case AARCH64_CMODEL_TINY_PIC:
5384 if (!aarch64_symbol_binds_local_p (x))
5385 return SYMBOL_TINY_GOT;
5386 return SYMBOL_TINY_ABSOLUTE;
5387
5388 case AARCH64_CMODEL_SMALL_PIC:
5389 if (!aarch64_symbol_binds_local_p (x))
5390 return SYMBOL_SMALL_GOT;
5391 return SYMBOL_SMALL_ABSOLUTE;
5392
5393 default:
5394 gcc_unreachable ();
5395 }
5396 }
5397
5398 /* By default push everything into the constant pool. */
5399 return SYMBOL_FORCE_TO_MEM;
5400 }
5401
5402 bool
5403 aarch64_constant_address_p (rtx x)
5404 {
5405 return (CONSTANT_P (x) && memory_address_p (DImode, x));
5406 }
5407
5408 bool
5409 aarch64_legitimate_pic_operand_p (rtx x)
5410 {
5411 if (GET_CODE (x) == SYMBOL_REF
5412 || (GET_CODE (x) == CONST
5413 && GET_CODE (XEXP (x, 0)) == PLUS
5414 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5415 return false;
5416
5417 return true;
5418 }
5419
5420 /* Return true if X holds either a quarter-precision or
5421 floating-point +0.0 constant. */
5422 static bool
5423 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5424 {
5425 if (!CONST_DOUBLE_P (x))
5426 return false;
5427
5428 /* TODO: We could handle moving 0.0 to a TFmode register,
5429 but first we would like to refactor the movtf_aarch64
5430 to be more amicable to split moves properly and
5431 correctly gate on TARGET_SIMD. For now - reject all
5432 constants which are not to SFmode or DFmode registers. */
5433 if (!(mode == SFmode || mode == DFmode))
5434 return false;
5435
5436 if (aarch64_float_const_zero_rtx_p (x))
5437 return true;
5438 return aarch64_float_const_representable_p (x);
5439 }
5440
5441 static bool
5442 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5443 {
5444 /* Do not allow vector struct mode constants. We could support
5445 0 and -1 easily, but they need support in aarch64-simd.md. */
5446 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5447 return false;
5448
5449 /* This could probably go away because
5450 we now decompose CONST_INTs according to expand_mov_immediate. */
5451 if ((GET_CODE (x) == CONST_VECTOR
5452 && aarch64_simd_valid_immediate (x, mode, false, NULL))
5453 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5454 return !targetm.cannot_force_const_mem (mode, x);
5455
5456 if (GET_CODE (x) == HIGH
5457 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5458 return true;
5459
5460 return aarch64_constant_address_p (x);
5461 }
5462
5463 rtx
5464 aarch64_load_tp (rtx target)
5465 {
5466 if (!target
5467 || GET_MODE (target) != Pmode
5468 || !register_operand (target, Pmode))
5469 target = gen_reg_rtx (Pmode);
5470
5471 /* Can return in any reg. */
5472 emit_insn (gen_aarch64_load_tp_hard (target));
5473 return target;
5474 }
5475
5476 /* On AAPCS systems, this is the "struct __va_list". */
5477 static GTY(()) tree va_list_type;
5478
5479 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5480 Return the type to use as __builtin_va_list.
5481
5482 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5483
5484 struct __va_list
5485 {
5486 void *__stack;
5487 void *__gr_top;
5488 void *__vr_top;
5489 int __gr_offs;
5490 int __vr_offs;
5491 }; */
5492
5493 static tree
5494 aarch64_build_builtin_va_list (void)
5495 {
5496 tree va_list_name;
5497 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5498
5499 /* Create the type. */
5500 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5501 /* Give it the required name. */
5502 va_list_name = build_decl (BUILTINS_LOCATION,
5503 TYPE_DECL,
5504 get_identifier ("__va_list"),
5505 va_list_type);
5506 DECL_ARTIFICIAL (va_list_name) = 1;
5507 TYPE_NAME (va_list_type) = va_list_name;
5508 TYPE_STUB_DECL (va_list_type) = va_list_name;
5509
5510 /* Create the fields. */
5511 f_stack = build_decl (BUILTINS_LOCATION,
5512 FIELD_DECL, get_identifier ("__stack"),
5513 ptr_type_node);
5514 f_grtop = build_decl (BUILTINS_LOCATION,
5515 FIELD_DECL, get_identifier ("__gr_top"),
5516 ptr_type_node);
5517 f_vrtop = build_decl (BUILTINS_LOCATION,
5518 FIELD_DECL, get_identifier ("__vr_top"),
5519 ptr_type_node);
5520 f_groff = build_decl (BUILTINS_LOCATION,
5521 FIELD_DECL, get_identifier ("__gr_offs"),
5522 integer_type_node);
5523 f_vroff = build_decl (BUILTINS_LOCATION,
5524 FIELD_DECL, get_identifier ("__vr_offs"),
5525 integer_type_node);
5526
5527 DECL_ARTIFICIAL (f_stack) = 1;
5528 DECL_ARTIFICIAL (f_grtop) = 1;
5529 DECL_ARTIFICIAL (f_vrtop) = 1;
5530 DECL_ARTIFICIAL (f_groff) = 1;
5531 DECL_ARTIFICIAL (f_vroff) = 1;
5532
5533 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5534 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5535 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5536 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5537 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5538
5539 TYPE_FIELDS (va_list_type) = f_stack;
5540 DECL_CHAIN (f_stack) = f_grtop;
5541 DECL_CHAIN (f_grtop) = f_vrtop;
5542 DECL_CHAIN (f_vrtop) = f_groff;
5543 DECL_CHAIN (f_groff) = f_vroff;
5544
5545 /* Compute its layout. */
5546 layout_type (va_list_type);
5547
5548 return va_list_type;
5549 }
5550
5551 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5552 static void
5553 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5554 {
5555 const CUMULATIVE_ARGS *cum;
5556 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5557 tree stack, grtop, vrtop, groff, vroff;
5558 tree t;
5559 int gr_save_area_size;
5560 int vr_save_area_size;
5561 int vr_offset;
5562
5563 cum = &crtl->args.info;
5564 gr_save_area_size
5565 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5566 vr_save_area_size
5567 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5568
5569 if (TARGET_GENERAL_REGS_ONLY)
5570 {
5571 if (cum->aapcs_nvrn > 0)
5572 sorry ("%qs and floating point or vector arguments",
5573 "-mgeneral-regs-only");
5574 vr_save_area_size = 0;
5575 }
5576
5577 f_stack = TYPE_FIELDS (va_list_type_node);
5578 f_grtop = DECL_CHAIN (f_stack);
5579 f_vrtop = DECL_CHAIN (f_grtop);
5580 f_groff = DECL_CHAIN (f_vrtop);
5581 f_vroff = DECL_CHAIN (f_groff);
5582
5583 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5584 NULL_TREE);
5585 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5586 NULL_TREE);
5587 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5588 NULL_TREE);
5589 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5590 NULL_TREE);
5591 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5592 NULL_TREE);
5593
5594 /* Emit code to initialize STACK, which points to the next varargs stack
5595 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5596 by named arguments. STACK is 8-byte aligned. */
5597 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5598 if (cum->aapcs_stack_size > 0)
5599 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5600 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5601 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5602
5603 /* Emit code to initialize GRTOP, the top of the GR save area.
5604 virtual_incoming_args_rtx should have been 16 byte aligned. */
5605 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5606 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5607 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5608
5609 /* Emit code to initialize VRTOP, the top of the VR save area.
5610 This address is gr_save_area_bytes below GRTOP, rounded
5611 down to the next 16-byte boundary. */
5612 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5613 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5614 STACK_BOUNDARY / BITS_PER_UNIT);
5615
5616 if (vr_offset)
5617 t = fold_build_pointer_plus_hwi (t, -vr_offset);
5618 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5619 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5620
5621 /* Emit code to initialize GROFF, the offset from GRTOP of the
5622 next GPR argument. */
5623 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5624 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5625 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5626
5627 /* Likewise emit code to initialize VROFF, the offset from FTOP
5628 of the next VR argument. */
5629 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5630 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5631 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5632 }
5633
5634 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5635
5636 static tree
5637 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5638 gimple_seq *post_p ATTRIBUTE_UNUSED)
5639 {
5640 tree addr;
5641 bool indirect_p;
5642 bool is_ha; /* is HFA or HVA. */
5643 bool dw_align; /* double-word align. */
5644 enum machine_mode ag_mode = VOIDmode;
5645 int nregs;
5646 enum machine_mode mode;
5647
5648 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5649 tree stack, f_top, f_off, off, arg, roundup, on_stack;
5650 HOST_WIDE_INT size, rsize, adjust, align;
5651 tree t, u, cond1, cond2;
5652
5653 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5654 if (indirect_p)
5655 type = build_pointer_type (type);
5656
5657 mode = TYPE_MODE (type);
5658
5659 f_stack = TYPE_FIELDS (va_list_type_node);
5660 f_grtop = DECL_CHAIN (f_stack);
5661 f_vrtop = DECL_CHAIN (f_grtop);
5662 f_groff = DECL_CHAIN (f_vrtop);
5663 f_vroff = DECL_CHAIN (f_groff);
5664
5665 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5666 f_stack, NULL_TREE);
5667 size = int_size_in_bytes (type);
5668 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5669
5670 dw_align = false;
5671 adjust = 0;
5672 if (aarch64_vfp_is_call_or_return_candidate (mode,
5673 type,
5674 &ag_mode,
5675 &nregs,
5676 &is_ha))
5677 {
5678 /* TYPE passed in fp/simd registers. */
5679 if (TARGET_GENERAL_REGS_ONLY)
5680 sorry ("%qs and floating point or vector arguments",
5681 "-mgeneral-regs-only");
5682
5683 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5684 unshare_expr (valist), f_vrtop, NULL_TREE);
5685 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5686 unshare_expr (valist), f_vroff, NULL_TREE);
5687
5688 rsize = nregs * UNITS_PER_VREG;
5689
5690 if (is_ha)
5691 {
5692 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5693 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5694 }
5695 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5696 && size < UNITS_PER_VREG)
5697 {
5698 adjust = UNITS_PER_VREG - size;
5699 }
5700 }
5701 else
5702 {
5703 /* TYPE passed in general registers. */
5704 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5705 unshare_expr (valist), f_grtop, NULL_TREE);
5706 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5707 unshare_expr (valist), f_groff, NULL_TREE);
5708 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5709 nregs = rsize / UNITS_PER_WORD;
5710
5711 if (align > 8)
5712 dw_align = true;
5713
5714 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5715 && size < UNITS_PER_WORD)
5716 {
5717 adjust = UNITS_PER_WORD - size;
5718 }
5719 }
5720
5721 /* Get a local temporary for the field value. */
5722 off = get_initialized_tmp_var (f_off, pre_p, NULL);
5723
5724 /* Emit code to branch if off >= 0. */
5725 t = build2 (GE_EXPR, boolean_type_node, off,
5726 build_int_cst (TREE_TYPE (off), 0));
5727 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5728
5729 if (dw_align)
5730 {
5731 /* Emit: offs = (offs + 15) & -16. */
5732 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5733 build_int_cst (TREE_TYPE (off), 15));
5734 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5735 build_int_cst (TREE_TYPE (off), -16));
5736 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5737 }
5738 else
5739 roundup = NULL;
5740
5741 /* Update ap.__[g|v]r_offs */
5742 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5743 build_int_cst (TREE_TYPE (off), rsize));
5744 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5745
5746 /* String up. */
5747 if (roundup)
5748 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5749
5750 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5751 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5752 build_int_cst (TREE_TYPE (f_off), 0));
5753 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5754
5755 /* String up: make sure the assignment happens before the use. */
5756 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5757 COND_EXPR_ELSE (cond1) = t;
5758
5759 /* Prepare the trees handling the argument that is passed on the stack;
5760 the top level node will store in ON_STACK. */
5761 arg = get_initialized_tmp_var (stack, pre_p, NULL);
5762 if (align > 8)
5763 {
5764 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5765 t = fold_convert (intDI_type_node, arg);
5766 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5767 build_int_cst (TREE_TYPE (t), 15));
5768 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5769 build_int_cst (TREE_TYPE (t), -16));
5770 t = fold_convert (TREE_TYPE (arg), t);
5771 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5772 }
5773 else
5774 roundup = NULL;
5775 /* Advance ap.__stack */
5776 t = fold_convert (intDI_type_node, arg);
5777 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5778 build_int_cst (TREE_TYPE (t), size + 7));
5779 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5780 build_int_cst (TREE_TYPE (t), -8));
5781 t = fold_convert (TREE_TYPE (arg), t);
5782 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5783 /* String up roundup and advance. */
5784 if (roundup)
5785 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5786 /* String up with arg */
5787 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5788 /* Big-endianness related address adjustment. */
5789 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5790 && size < UNITS_PER_WORD)
5791 {
5792 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5793 size_int (UNITS_PER_WORD - size));
5794 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5795 }
5796
5797 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5798 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5799
5800 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5801 t = off;
5802 if (adjust)
5803 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5804 build_int_cst (TREE_TYPE (off), adjust));
5805
5806 t = fold_convert (sizetype, t);
5807 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5808
5809 if (is_ha)
5810 {
5811 /* type ha; // treat as "struct {ftype field[n];}"
5812 ... [computing offs]
5813 for (i = 0; i <nregs; ++i, offs += 16)
5814 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5815 return ha; */
5816 int i;
5817 tree tmp_ha, field_t, field_ptr_t;
5818
5819 /* Declare a local variable. */
5820 tmp_ha = create_tmp_var_raw (type, "ha");
5821 gimple_add_tmp_var (tmp_ha);
5822
5823 /* Establish the base type. */
5824 switch (ag_mode)
5825 {
5826 case SFmode:
5827 field_t = float_type_node;
5828 field_ptr_t = float_ptr_type_node;
5829 break;
5830 case DFmode:
5831 field_t = double_type_node;
5832 field_ptr_t = double_ptr_type_node;
5833 break;
5834 case TFmode:
5835 field_t = long_double_type_node;
5836 field_ptr_t = long_double_ptr_type_node;
5837 break;
5838 /* The half precision and quad precision are not fully supported yet. Enable
5839 the following code after the support is complete. Need to find the correct
5840 type node for __fp16 *. */
5841 #if 0
5842 case HFmode:
5843 field_t = float_type_node;
5844 field_ptr_t = float_ptr_type_node;
5845 break;
5846 #endif
5847 case V2SImode:
5848 case V4SImode:
5849 {
5850 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5851 field_t = build_vector_type_for_mode (innertype, ag_mode);
5852 field_ptr_t = build_pointer_type (field_t);
5853 }
5854 break;
5855 default:
5856 gcc_assert (0);
5857 }
5858
5859 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5860 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5861 addr = t;
5862 t = fold_convert (field_ptr_t, addr);
5863 t = build2 (MODIFY_EXPR, field_t,
5864 build1 (INDIRECT_REF, field_t, tmp_ha),
5865 build1 (INDIRECT_REF, field_t, t));
5866
5867 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5868 for (i = 1; i < nregs; ++i)
5869 {
5870 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5871 u = fold_convert (field_ptr_t, addr);
5872 u = build2 (MODIFY_EXPR, field_t,
5873 build2 (MEM_REF, field_t, tmp_ha,
5874 build_int_cst (field_ptr_t,
5875 (i *
5876 int_size_in_bytes (field_t)))),
5877 build1 (INDIRECT_REF, field_t, u));
5878 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5879 }
5880
5881 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5882 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5883 }
5884
5885 COND_EXPR_ELSE (cond2) = t;
5886 addr = fold_convert (build_pointer_type (type), cond1);
5887 addr = build_va_arg_indirect_ref (addr);
5888
5889 if (indirect_p)
5890 addr = build_va_arg_indirect_ref (addr);
5891
5892 return addr;
5893 }
5894
5895 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
5896
5897 static void
5898 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5899 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5900 int no_rtl)
5901 {
5902 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5903 CUMULATIVE_ARGS local_cum;
5904 int gr_saved, vr_saved;
5905
5906 /* The caller has advanced CUM up to, but not beyond, the last named
5907 argument. Advance a local copy of CUM past the last "real" named
5908 argument, to find out how many registers are left over. */
5909 local_cum = *cum;
5910 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5911
5912 /* Found out how many registers we need to save. */
5913 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5914 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5915
5916 if (TARGET_GENERAL_REGS_ONLY)
5917 {
5918 if (local_cum.aapcs_nvrn > 0)
5919 sorry ("%qs and floating point or vector arguments",
5920 "-mgeneral-regs-only");
5921 vr_saved = 0;
5922 }
5923
5924 if (!no_rtl)
5925 {
5926 if (gr_saved > 0)
5927 {
5928 rtx ptr, mem;
5929
5930 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5931 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5932 - gr_saved * UNITS_PER_WORD);
5933 mem = gen_frame_mem (BLKmode, ptr);
5934 set_mem_alias_set (mem, get_varargs_alias_set ());
5935
5936 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5937 mem, gr_saved);
5938 }
5939 if (vr_saved > 0)
5940 {
5941 /* We can't use move_block_from_reg, because it will use
5942 the wrong mode, storing D regs only. */
5943 enum machine_mode mode = TImode;
5944 int off, i;
5945
5946 /* Set OFF to the offset from virtual_incoming_args_rtx of
5947 the first vector register. The VR save area lies below
5948 the GR one, and is aligned to 16 bytes. */
5949 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5950 STACK_BOUNDARY / BITS_PER_UNIT);
5951 off -= vr_saved * UNITS_PER_VREG;
5952
5953 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5954 {
5955 rtx ptr, mem;
5956
5957 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5958 mem = gen_frame_mem (mode, ptr);
5959 set_mem_alias_set (mem, get_varargs_alias_set ());
5960 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5961 off += UNITS_PER_VREG;
5962 }
5963 }
5964 }
5965
5966 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5967 any complication of having crtl->args.pretend_args_size changed. */
5968 cfun->machine->saved_varargs_size
5969 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5970 STACK_BOUNDARY / BITS_PER_UNIT)
5971 + vr_saved * UNITS_PER_VREG);
5972 }
5973
5974 static void
5975 aarch64_conditional_register_usage (void)
5976 {
5977 int i;
5978 if (!TARGET_FLOAT)
5979 {
5980 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5981 {
5982 fixed_regs[i] = 1;
5983 call_used_regs[i] = 1;
5984 }
5985 }
5986 }
5987
5988 /* Walk down the type tree of TYPE counting consecutive base elements.
5989 If *MODEP is VOIDmode, then set it to the first valid floating point
5990 type. If a non-floating point type is found, or if a floating point
5991 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5992 otherwise return the count in the sub-tree. */
5993 static int
5994 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5995 {
5996 enum machine_mode mode;
5997 HOST_WIDE_INT size;
5998
5999 switch (TREE_CODE (type))
6000 {
6001 case REAL_TYPE:
6002 mode = TYPE_MODE (type);
6003 if (mode != DFmode && mode != SFmode && mode != TFmode)
6004 return -1;
6005
6006 if (*modep == VOIDmode)
6007 *modep = mode;
6008
6009 if (*modep == mode)
6010 return 1;
6011
6012 break;
6013
6014 case COMPLEX_TYPE:
6015 mode = TYPE_MODE (TREE_TYPE (type));
6016 if (mode != DFmode && mode != SFmode && mode != TFmode)
6017 return -1;
6018
6019 if (*modep == VOIDmode)
6020 *modep = mode;
6021
6022 if (*modep == mode)
6023 return 2;
6024
6025 break;
6026
6027 case VECTOR_TYPE:
6028 /* Use V2SImode and V4SImode as representatives of all 64-bit
6029 and 128-bit vector types. */
6030 size = int_size_in_bytes (type);
6031 switch (size)
6032 {
6033 case 8:
6034 mode = V2SImode;
6035 break;
6036 case 16:
6037 mode = V4SImode;
6038 break;
6039 default:
6040 return -1;
6041 }
6042
6043 if (*modep == VOIDmode)
6044 *modep = mode;
6045
6046 /* Vector modes are considered to be opaque: two vectors are
6047 equivalent for the purposes of being homogeneous aggregates
6048 if they are the same size. */
6049 if (*modep == mode)
6050 return 1;
6051
6052 break;
6053
6054 case ARRAY_TYPE:
6055 {
6056 int count;
6057 tree index = TYPE_DOMAIN (type);
6058
6059 /* Can't handle incomplete types. */
6060 if (!COMPLETE_TYPE_P (type))
6061 return -1;
6062
6063 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6064 if (count == -1
6065 || !index
6066 || !TYPE_MAX_VALUE (index)
6067 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6068 || !TYPE_MIN_VALUE (index)
6069 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6070 || count < 0)
6071 return -1;
6072
6073 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6074 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6075
6076 /* There must be no padding. */
6077 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6078 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6079 != count * GET_MODE_BITSIZE (*modep)))
6080 return -1;
6081
6082 return count;
6083 }
6084
6085 case RECORD_TYPE:
6086 {
6087 int count = 0;
6088 int sub_count;
6089 tree field;
6090
6091 /* Can't handle incomplete types. */
6092 if (!COMPLETE_TYPE_P (type))
6093 return -1;
6094
6095 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6096 {
6097 if (TREE_CODE (field) != FIELD_DECL)
6098 continue;
6099
6100 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6101 if (sub_count < 0)
6102 return -1;
6103 count += sub_count;
6104 }
6105
6106 /* There must be no padding. */
6107 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6108 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6109 != count * GET_MODE_BITSIZE (*modep)))
6110 return -1;
6111
6112 return count;
6113 }
6114
6115 case UNION_TYPE:
6116 case QUAL_UNION_TYPE:
6117 {
6118 /* These aren't very interesting except in a degenerate case. */
6119 int count = 0;
6120 int sub_count;
6121 tree field;
6122
6123 /* Can't handle incomplete types. */
6124 if (!COMPLETE_TYPE_P (type))
6125 return -1;
6126
6127 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6128 {
6129 if (TREE_CODE (field) != FIELD_DECL)
6130 continue;
6131
6132 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6133 if (sub_count < 0)
6134 return -1;
6135 count = count > sub_count ? count : sub_count;
6136 }
6137
6138 /* There must be no padding. */
6139 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6140 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6141 != count * GET_MODE_BITSIZE (*modep)))
6142 return -1;
6143
6144 return count;
6145 }
6146
6147 default:
6148 break;
6149 }
6150
6151 return -1;
6152 }
6153
6154 /* Return true if we use LRA instead of reload pass. */
6155 static bool
6156 aarch64_lra_p (void)
6157 {
6158 return aarch64_lra_flag;
6159 }
6160
6161 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6162 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6163 array types. The C99 floating-point complex types are also considered
6164 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6165 types, which are GCC extensions and out of the scope of AAPCS64, are
6166 treated as composite types here as well.
6167
6168 Note that MODE itself is not sufficient in determining whether a type
6169 is such a composite type or not. This is because
6170 stor-layout.c:compute_record_mode may have already changed the MODE
6171 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6172 structure with only one field may have its MODE set to the mode of the
6173 field. Also an integer mode whose size matches the size of the
6174 RECORD_TYPE type may be used to substitute the original mode
6175 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6176 solely relied on. */
6177
6178 static bool
6179 aarch64_composite_type_p (const_tree type,
6180 enum machine_mode mode)
6181 {
6182 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6183 return true;
6184
6185 if (mode == BLKmode
6186 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6187 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6188 return true;
6189
6190 return false;
6191 }
6192
6193 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6194 type as described in AAPCS64 \S 4.1.2.
6195
6196 See the comment above aarch64_composite_type_p for the notes on MODE. */
6197
6198 static bool
6199 aarch64_short_vector_p (const_tree type,
6200 enum machine_mode mode)
6201 {
6202 HOST_WIDE_INT size = -1;
6203
6204 if (type && TREE_CODE (type) == VECTOR_TYPE)
6205 size = int_size_in_bytes (type);
6206 else if (!aarch64_composite_type_p (type, mode)
6207 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6208 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6209 size = GET_MODE_SIZE (mode);
6210
6211 return (size == 8 || size == 16) ? true : false;
6212 }
6213
6214 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6215 shall be passed or returned in simd/fp register(s) (providing these
6216 parameter passing registers are available).
6217
6218 Upon successful return, *COUNT returns the number of needed registers,
6219 *BASE_MODE returns the mode of the individual register and when IS_HAF
6220 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6221 floating-point aggregate or a homogeneous short-vector aggregate. */
6222
6223 static bool
6224 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6225 const_tree type,
6226 enum machine_mode *base_mode,
6227 int *count,
6228 bool *is_ha)
6229 {
6230 enum machine_mode new_mode = VOIDmode;
6231 bool composite_p = aarch64_composite_type_p (type, mode);
6232
6233 if (is_ha != NULL) *is_ha = false;
6234
6235 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6236 || aarch64_short_vector_p (type, mode))
6237 {
6238 *count = 1;
6239 new_mode = mode;
6240 }
6241 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6242 {
6243 if (is_ha != NULL) *is_ha = true;
6244 *count = 2;
6245 new_mode = GET_MODE_INNER (mode);
6246 }
6247 else if (type && composite_p)
6248 {
6249 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6250
6251 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6252 {
6253 if (is_ha != NULL) *is_ha = true;
6254 *count = ag_count;
6255 }
6256 else
6257 return false;
6258 }
6259 else
6260 return false;
6261
6262 *base_mode = new_mode;
6263 return true;
6264 }
6265
6266 /* Implement TARGET_STRUCT_VALUE_RTX. */
6267
6268 static rtx
6269 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6270 int incoming ATTRIBUTE_UNUSED)
6271 {
6272 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6273 }
6274
6275 /* Implements target hook vector_mode_supported_p. */
6276 static bool
6277 aarch64_vector_mode_supported_p (enum machine_mode mode)
6278 {
6279 if (TARGET_SIMD
6280 && (mode == V4SImode || mode == V8HImode
6281 || mode == V16QImode || mode == V2DImode
6282 || mode == V2SImode || mode == V4HImode
6283 || mode == V8QImode || mode == V2SFmode
6284 || mode == V4SFmode || mode == V2DFmode))
6285 return true;
6286
6287 return false;
6288 }
6289
6290 /* Return appropriate SIMD container
6291 for MODE within a vector of WIDTH bits. */
6292 static enum machine_mode
6293 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6294 {
6295 gcc_assert (width == 64 || width == 128);
6296 if (TARGET_SIMD)
6297 {
6298 if (width == 128)
6299 switch (mode)
6300 {
6301 case DFmode:
6302 return V2DFmode;
6303 case SFmode:
6304 return V4SFmode;
6305 case SImode:
6306 return V4SImode;
6307 case HImode:
6308 return V8HImode;
6309 case QImode:
6310 return V16QImode;
6311 case DImode:
6312 return V2DImode;
6313 default:
6314 break;
6315 }
6316 else
6317 switch (mode)
6318 {
6319 case SFmode:
6320 return V2SFmode;
6321 case SImode:
6322 return V2SImode;
6323 case HImode:
6324 return V4HImode;
6325 case QImode:
6326 return V8QImode;
6327 default:
6328 break;
6329 }
6330 }
6331 return word_mode;
6332 }
6333
6334 /* Return 128-bit container as the preferred SIMD mode for MODE. */
6335 static enum machine_mode
6336 aarch64_preferred_simd_mode (enum machine_mode mode)
6337 {
6338 return aarch64_simd_container_mode (mode, 128);
6339 }
6340
6341 /* Return the bitmask of possible vector sizes for the vectorizer
6342 to iterate over. */
6343 static unsigned int
6344 aarch64_autovectorize_vector_sizes (void)
6345 {
6346 return (16 | 8);
6347 }
6348
6349 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6350 vector types in order to conform to the AAPCS64 (see "Procedure
6351 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6352 qualify for emission with the mangled names defined in that document,
6353 a vector type must not only be of the correct mode but also be
6354 composed of AdvSIMD vector element types (e.g.
6355 _builtin_aarch64_simd_qi); these types are registered by
6356 aarch64_init_simd_builtins (). In other words, vector types defined
6357 in other ways e.g. via vector_size attribute will get default
6358 mangled names. */
6359 typedef struct
6360 {
6361 enum machine_mode mode;
6362 const char *element_type_name;
6363 const char *mangled_name;
6364 } aarch64_simd_mangle_map_entry;
6365
6366 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6367 /* 64-bit containerized types. */
6368 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6369 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6370 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6371 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6372 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6373 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6374 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6375 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6376 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6377 /* 128-bit containerized types. */
6378 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6379 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6380 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6381 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6382 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6383 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6384 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6385 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6386 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6387 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6388 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6389 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6390 { V2DImode, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
6391 { VOIDmode, NULL, NULL }
6392 };
6393
6394 /* Implement TARGET_MANGLE_TYPE. */
6395
6396 static const char *
6397 aarch64_mangle_type (const_tree type)
6398 {
6399 /* The AArch64 ABI documents say that "__va_list" has to be
6400 managled as if it is in the "std" namespace. */
6401 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6402 return "St9__va_list";
6403
6404 /* Check the mode of the vector type, and the name of the vector
6405 element type, against the table. */
6406 if (TREE_CODE (type) == VECTOR_TYPE)
6407 {
6408 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6409
6410 while (pos->mode != VOIDmode)
6411 {
6412 tree elt_type = TREE_TYPE (type);
6413
6414 if (pos->mode == TYPE_MODE (type)
6415 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6416 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6417 pos->element_type_name))
6418 return pos->mangled_name;
6419
6420 pos++;
6421 }
6422 }
6423
6424 /* Use the default mangling. */
6425 return NULL;
6426 }
6427
6428 /* Return the equivalent letter for size. */
6429 static char
6430 sizetochar (int size)
6431 {
6432 switch (size)
6433 {
6434 case 64: return 'd';
6435 case 32: return 's';
6436 case 16: return 'h';
6437 case 8 : return 'b';
6438 default: gcc_unreachable ();
6439 }
6440 }
6441
6442 /* Return true iff x is a uniform vector of floating-point
6443 constants, and the constant can be represented in
6444 quarter-precision form. Note, as aarch64_float_const_representable
6445 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6446 static bool
6447 aarch64_vect_float_const_representable_p (rtx x)
6448 {
6449 int i = 0;
6450 REAL_VALUE_TYPE r0, ri;
6451 rtx x0, xi;
6452
6453 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6454 return false;
6455
6456 x0 = CONST_VECTOR_ELT (x, 0);
6457 if (!CONST_DOUBLE_P (x0))
6458 return false;
6459
6460 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6461
6462 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6463 {
6464 xi = CONST_VECTOR_ELT (x, i);
6465 if (!CONST_DOUBLE_P (xi))
6466 return false;
6467
6468 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6469 if (!REAL_VALUES_EQUAL (r0, ri))
6470 return false;
6471 }
6472
6473 return aarch64_float_const_representable_p (x0);
6474 }
6475
6476 /* Return true for valid and false for invalid. */
6477 bool
6478 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6479 struct simd_immediate_info *info)
6480 {
6481 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6482 matches = 1; \
6483 for (i = 0; i < idx; i += (STRIDE)) \
6484 if (!(TEST)) \
6485 matches = 0; \
6486 if (matches) \
6487 { \
6488 immtype = (CLASS); \
6489 elsize = (ELSIZE); \
6490 eshift = (SHIFT); \
6491 emvn = (NEG); \
6492 break; \
6493 }
6494
6495 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6496 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6497 unsigned char bytes[16];
6498 int immtype = -1, matches;
6499 unsigned int invmask = inverse ? 0xff : 0;
6500 int eshift, emvn;
6501
6502 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6503 {
6504 if (! (aarch64_simd_imm_zero_p (op, mode)
6505 || aarch64_vect_float_const_representable_p (op)))
6506 return false;
6507
6508 if (info)
6509 {
6510 info->value = CONST_VECTOR_ELT (op, 0);
6511 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
6512 info->mvn = false;
6513 info->shift = 0;
6514 }
6515
6516 return true;
6517 }
6518
6519 /* Splat vector constant out into a byte vector. */
6520 for (i = 0; i < n_elts; i++)
6521 {
6522 rtx el = CONST_VECTOR_ELT (op, i);
6523 unsigned HOST_WIDE_INT elpart;
6524 unsigned int part, parts;
6525
6526 if (GET_CODE (el) == CONST_INT)
6527 {
6528 elpart = INTVAL (el);
6529 parts = 1;
6530 }
6531 else if (GET_CODE (el) == CONST_DOUBLE)
6532 {
6533 elpart = CONST_DOUBLE_LOW (el);
6534 parts = 2;
6535 }
6536 else
6537 gcc_unreachable ();
6538
6539 for (part = 0; part < parts; part++)
6540 {
6541 unsigned int byte;
6542 for (byte = 0; byte < innersize; byte++)
6543 {
6544 bytes[idx++] = (elpart & 0xff) ^ invmask;
6545 elpart >>= BITS_PER_UNIT;
6546 }
6547 if (GET_CODE (el) == CONST_DOUBLE)
6548 elpart = CONST_DOUBLE_HIGH (el);
6549 }
6550 }
6551
6552 /* Sanity check. */
6553 gcc_assert (idx == GET_MODE_SIZE (mode));
6554
6555 do
6556 {
6557 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6558 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6559
6560 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6561 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6562
6563 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6564 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6565
6566 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6567 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6568
6569 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6570
6571 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6572
6573 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6574 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6575
6576 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6577 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6578
6579 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6580 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6581
6582 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6583 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6584
6585 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6586
6587 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6588
6589 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6590 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6591
6592 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6593 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6594
6595 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6596 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6597
6598 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6599 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6600
6601 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6602
6603 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6604 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6605 }
6606 while (0);
6607
6608 if (immtype == -1)
6609 return false;
6610
6611 if (info)
6612 {
6613 info->element_width = elsize;
6614 info->mvn = emvn != 0;
6615 info->shift = eshift;
6616
6617 unsigned HOST_WIDE_INT imm = 0;
6618
6619 if (immtype >= 12 && immtype <= 15)
6620 info->msl = true;
6621
6622 /* Un-invert bytes of recognized vector, if necessary. */
6623 if (invmask != 0)
6624 for (i = 0; i < idx; i++)
6625 bytes[i] ^= invmask;
6626
6627 if (immtype == 17)
6628 {
6629 /* FIXME: Broken on 32-bit H_W_I hosts. */
6630 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6631
6632 for (i = 0; i < 8; i++)
6633 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6634 << (i * BITS_PER_UNIT);
6635
6636
6637 info->value = GEN_INT (imm);
6638 }
6639 else
6640 {
6641 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6642 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6643
6644 /* Construct 'abcdefgh' because the assembler cannot handle
6645 generic constants. */
6646 if (info->mvn)
6647 imm = ~imm;
6648 imm = (imm >> info->shift) & 0xff;
6649 info->value = GEN_INT (imm);
6650 }
6651 }
6652
6653 return true;
6654 #undef CHECK
6655 }
6656
6657 static bool
6658 aarch64_const_vec_all_same_int_p (rtx x,
6659 HOST_WIDE_INT minval,
6660 HOST_WIDE_INT maxval)
6661 {
6662 HOST_WIDE_INT firstval;
6663 int count, i;
6664
6665 if (GET_CODE (x) != CONST_VECTOR
6666 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6667 return false;
6668
6669 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6670 if (firstval < minval || firstval > maxval)
6671 return false;
6672
6673 count = CONST_VECTOR_NUNITS (x);
6674 for (i = 1; i < count; i++)
6675 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6676 return false;
6677
6678 return true;
6679 }
6680
6681 /* Check of immediate shift constants are within range. */
6682 bool
6683 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6684 {
6685 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6686 if (left)
6687 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6688 else
6689 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6690 }
6691
6692 /* Return true if X is a uniform vector where all elements
6693 are either the floating-point constant 0.0 or the
6694 integer constant 0. */
6695 bool
6696 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6697 {
6698 return x == CONST0_RTX (mode);
6699 }
6700
6701 bool
6702 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6703 {
6704 HOST_WIDE_INT imm = INTVAL (x);
6705 int i;
6706
6707 for (i = 0; i < 8; i++)
6708 {
6709 unsigned int byte = imm & 0xff;
6710 if (byte != 0xff && byte != 0)
6711 return false;
6712 imm >>= 8;
6713 }
6714
6715 return true;
6716 }
6717
6718 bool
6719 aarch64_mov_operand_p (rtx x,
6720 enum aarch64_symbol_context context,
6721 enum machine_mode mode)
6722 {
6723 if (GET_CODE (x) == HIGH
6724 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6725 return true;
6726
6727 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
6728 return true;
6729
6730 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
6731 return true;
6732
6733 return aarch64_classify_symbolic_expression (x, context)
6734 == SYMBOL_TINY_ABSOLUTE;
6735 }
6736
6737 /* Return a const_int vector of VAL. */
6738 rtx
6739 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6740 {
6741 int nunits = GET_MODE_NUNITS (mode);
6742 rtvec v = rtvec_alloc (nunits);
6743 int i;
6744
6745 for (i=0; i < nunits; i++)
6746 RTVEC_ELT (v, i) = GEN_INT (val);
6747
6748 return gen_rtx_CONST_VECTOR (mode, v);
6749 }
6750
6751 /* Check OP is a legal scalar immediate for the MOVI instruction. */
6752
6753 bool
6754 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6755 {
6756 enum machine_mode vmode;
6757
6758 gcc_assert (!VECTOR_MODE_P (mode));
6759 vmode = aarch64_preferred_simd_mode (mode);
6760 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
6761 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
6762 }
6763
6764 /* Construct and return a PARALLEL RTX vector. */
6765 rtx
6766 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6767 {
6768 int nunits = GET_MODE_NUNITS (mode);
6769 rtvec v = rtvec_alloc (nunits / 2);
6770 int base = high ? nunits / 2 : 0;
6771 rtx t1;
6772 int i;
6773
6774 for (i=0; i < nunits / 2; i++)
6775 RTVEC_ELT (v, i) = GEN_INT (base + i);
6776
6777 t1 = gen_rtx_PARALLEL (mode, v);
6778 return t1;
6779 }
6780
6781 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6782 HIGH (exclusive). */
6783 void
6784 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6785 {
6786 HOST_WIDE_INT lane;
6787 gcc_assert (GET_CODE (operand) == CONST_INT);
6788 lane = INTVAL (operand);
6789
6790 if (lane < low || lane >= high)
6791 error ("lane out of range");
6792 }
6793
6794 void
6795 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6796 {
6797 gcc_assert (GET_CODE (operand) == CONST_INT);
6798 HOST_WIDE_INT lane = INTVAL (operand);
6799
6800 if (lane < low || lane >= high)
6801 error ("constant out of range");
6802 }
6803
6804 /* Emit code to reinterpret one AdvSIMD type as another,
6805 without altering bits. */
6806 void
6807 aarch64_simd_reinterpret (rtx dest, rtx src)
6808 {
6809 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6810 }
6811
6812 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6813 registers). */
6814 void
6815 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6816 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6817 rtx op1)
6818 {
6819 rtx mem = gen_rtx_MEM (mode, destaddr);
6820 rtx tmp1 = gen_reg_rtx (mode);
6821 rtx tmp2 = gen_reg_rtx (mode);
6822
6823 emit_insn (intfn (tmp1, op1, tmp2));
6824
6825 emit_move_insn (mem, tmp1);
6826 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6827 emit_move_insn (mem, tmp2);
6828 }
6829
6830 /* Return TRUE if OP is a valid vector addressing mode. */
6831 bool
6832 aarch64_simd_mem_operand_p (rtx op)
6833 {
6834 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6835 || GET_CODE (XEXP (op, 0)) == REG);
6836 }
6837
6838 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6839 not to early-clobber SRC registers in the process.
6840
6841 We assume that the operands described by SRC and DEST represent a
6842 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6843 number of components into which the copy has been decomposed. */
6844 void
6845 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6846 rtx *src, unsigned int count)
6847 {
6848 unsigned int i;
6849
6850 if (!reg_overlap_mentioned_p (operands[0], operands[1])
6851 || REGNO (operands[0]) < REGNO (operands[1]))
6852 {
6853 for (i = 0; i < count; i++)
6854 {
6855 operands[2 * i] = dest[i];
6856 operands[2 * i + 1] = src[i];
6857 }
6858 }
6859 else
6860 {
6861 for (i = 0; i < count; i++)
6862 {
6863 operands[2 * i] = dest[count - i - 1];
6864 operands[2 * i + 1] = src[count - i - 1];
6865 }
6866 }
6867 }
6868
6869 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6870 one of VSTRUCT modes: OI, CI or XI. */
6871 int
6872 aarch64_simd_attr_length_move (rtx insn)
6873 {
6874 enum machine_mode mode;
6875
6876 extract_insn_cached (insn);
6877
6878 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6879 {
6880 mode = GET_MODE (recog_data.operand[0]);
6881 switch (mode)
6882 {
6883 case OImode:
6884 return 8;
6885 case CImode:
6886 return 12;
6887 case XImode:
6888 return 16;
6889 default:
6890 gcc_unreachable ();
6891 }
6892 }
6893 return 4;
6894 }
6895
6896 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6897 alignment of a vector to 128 bits. */
6898 static HOST_WIDE_INT
6899 aarch64_simd_vector_alignment (const_tree type)
6900 {
6901 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
6902 return MIN (align, 128);
6903 }
6904
6905 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6906 static bool
6907 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6908 {
6909 if (is_packed)
6910 return false;
6911
6912 /* We guarantee alignment for vectors up to 128-bits. */
6913 if (tree_int_cst_compare (TYPE_SIZE (type),
6914 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6915 return false;
6916
6917 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6918 return true;
6919 }
6920
6921 /* If VALS is a vector constant that can be loaded into a register
6922 using DUP, generate instructions to do so and return an RTX to
6923 assign to the register. Otherwise return NULL_RTX. */
6924 static rtx
6925 aarch64_simd_dup_constant (rtx vals)
6926 {
6927 enum machine_mode mode = GET_MODE (vals);
6928 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6929 int n_elts = GET_MODE_NUNITS (mode);
6930 bool all_same = true;
6931 rtx x;
6932 int i;
6933
6934 if (GET_CODE (vals) != CONST_VECTOR)
6935 return NULL_RTX;
6936
6937 for (i = 1; i < n_elts; ++i)
6938 {
6939 x = CONST_VECTOR_ELT (vals, i);
6940 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6941 all_same = false;
6942 }
6943
6944 if (!all_same)
6945 return NULL_RTX;
6946
6947 /* We can load this constant by using DUP and a constant in a
6948 single ARM register. This will be cheaper than a vector
6949 load. */
6950 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6951 return gen_rtx_VEC_DUPLICATE (mode, x);
6952 }
6953
6954
6955 /* Generate code to load VALS, which is a PARALLEL containing only
6956 constants (for vec_init) or CONST_VECTOR, efficiently into a
6957 register. Returns an RTX to copy into the register, or NULL_RTX
6958 for a PARALLEL that can not be converted into a CONST_VECTOR. */
6959 static rtx
6960 aarch64_simd_make_constant (rtx vals)
6961 {
6962 enum machine_mode mode = GET_MODE (vals);
6963 rtx const_dup;
6964 rtx const_vec = NULL_RTX;
6965 int n_elts = GET_MODE_NUNITS (mode);
6966 int n_const = 0;
6967 int i;
6968
6969 if (GET_CODE (vals) == CONST_VECTOR)
6970 const_vec = vals;
6971 else if (GET_CODE (vals) == PARALLEL)
6972 {
6973 /* A CONST_VECTOR must contain only CONST_INTs and
6974 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6975 Only store valid constants in a CONST_VECTOR. */
6976 for (i = 0; i < n_elts; ++i)
6977 {
6978 rtx x = XVECEXP (vals, 0, i);
6979 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6980 n_const++;
6981 }
6982 if (n_const == n_elts)
6983 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6984 }
6985 else
6986 gcc_unreachable ();
6987
6988 if (const_vec != NULL_RTX
6989 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
6990 /* Load using MOVI/MVNI. */
6991 return const_vec;
6992 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6993 /* Loaded using DUP. */
6994 return const_dup;
6995 else if (const_vec != NULL_RTX)
6996 /* Load from constant pool. We can not take advantage of single-cycle
6997 LD1 because we need a PC-relative addressing mode. */
6998 return const_vec;
6999 else
7000 /* A PARALLEL containing something not valid inside CONST_VECTOR.
7001 We can not construct an initializer. */
7002 return NULL_RTX;
7003 }
7004
7005 void
7006 aarch64_expand_vector_init (rtx target, rtx vals)
7007 {
7008 enum machine_mode mode = GET_MODE (target);
7009 enum machine_mode inner_mode = GET_MODE_INNER (mode);
7010 int n_elts = GET_MODE_NUNITS (mode);
7011 int n_var = 0, one_var = -1;
7012 bool all_same = true;
7013 rtx x, mem;
7014 int i;
7015
7016 x = XVECEXP (vals, 0, 0);
7017 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7018 n_var = 1, one_var = 0;
7019
7020 for (i = 1; i < n_elts; ++i)
7021 {
7022 x = XVECEXP (vals, 0, i);
7023 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7024 ++n_var, one_var = i;
7025
7026 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7027 all_same = false;
7028 }
7029
7030 if (n_var == 0)
7031 {
7032 rtx constant = aarch64_simd_make_constant (vals);
7033 if (constant != NULL_RTX)
7034 {
7035 emit_move_insn (target, constant);
7036 return;
7037 }
7038 }
7039
7040 /* Splat a single non-constant element if we can. */
7041 if (all_same)
7042 {
7043 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
7044 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
7045 return;
7046 }
7047
7048 /* One field is non-constant. Load constant then overwrite varying
7049 field. This is more efficient than using the stack. */
7050 if (n_var == 1)
7051 {
7052 rtx copy = copy_rtx (vals);
7053 rtx index = GEN_INT (one_var);
7054 enum insn_code icode;
7055
7056 /* Load constant part of vector, substitute neighboring value for
7057 varying element. */
7058 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
7059 aarch64_expand_vector_init (target, copy);
7060
7061 /* Insert variable. */
7062 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
7063 icode = optab_handler (vec_set_optab, mode);
7064 gcc_assert (icode != CODE_FOR_nothing);
7065 emit_insn (GEN_FCN (icode) (target, x, index));
7066 return;
7067 }
7068
7069 /* Construct the vector in memory one field at a time
7070 and load the whole vector. */
7071 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7072 for (i = 0; i < n_elts; i++)
7073 emit_move_insn (adjust_address_nv (mem, inner_mode,
7074 i * GET_MODE_SIZE (inner_mode)),
7075 XVECEXP (vals, 0, i));
7076 emit_move_insn (target, mem);
7077
7078 }
7079
7080 static unsigned HOST_WIDE_INT
7081 aarch64_shift_truncation_mask (enum machine_mode mode)
7082 {
7083 return
7084 (aarch64_vector_mode_supported_p (mode)
7085 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7086 }
7087
7088 #ifndef TLS_SECTION_ASM_FLAG
7089 #define TLS_SECTION_ASM_FLAG 'T'
7090 #endif
7091
7092 void
7093 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7094 tree decl ATTRIBUTE_UNUSED)
7095 {
7096 char flagchars[10], *f = flagchars;
7097
7098 /* If we have already declared this section, we can use an
7099 abbreviated form to switch back to it -- unless this section is
7100 part of a COMDAT groups, in which case GAS requires the full
7101 declaration every time. */
7102 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7103 && (flags & SECTION_DECLARED))
7104 {
7105 fprintf (asm_out_file, "\t.section\t%s\n", name);
7106 return;
7107 }
7108
7109 if (!(flags & SECTION_DEBUG))
7110 *f++ = 'a';
7111 if (flags & SECTION_WRITE)
7112 *f++ = 'w';
7113 if (flags & SECTION_CODE)
7114 *f++ = 'x';
7115 if (flags & SECTION_SMALL)
7116 *f++ = 's';
7117 if (flags & SECTION_MERGE)
7118 *f++ = 'M';
7119 if (flags & SECTION_STRINGS)
7120 *f++ = 'S';
7121 if (flags & SECTION_TLS)
7122 *f++ = TLS_SECTION_ASM_FLAG;
7123 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7124 *f++ = 'G';
7125 *f = '\0';
7126
7127 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7128
7129 if (!(flags & SECTION_NOTYPE))
7130 {
7131 const char *type;
7132 const char *format;
7133
7134 if (flags & SECTION_BSS)
7135 type = "nobits";
7136 else
7137 type = "progbits";
7138
7139 #ifdef TYPE_OPERAND_FMT
7140 format = "," TYPE_OPERAND_FMT;
7141 #else
7142 format = ",@%s";
7143 #endif
7144
7145 fprintf (asm_out_file, format, type);
7146
7147 if (flags & SECTION_ENTSIZE)
7148 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7149 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7150 {
7151 if (TREE_CODE (decl) == IDENTIFIER_NODE)
7152 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7153 else
7154 fprintf (asm_out_file, ",%s,comdat",
7155 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7156 }
7157 }
7158
7159 putc ('\n', asm_out_file);
7160 }
7161
7162 /* Select a format to encode pointers in exception handling data. */
7163 int
7164 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7165 {
7166 int type;
7167 switch (aarch64_cmodel)
7168 {
7169 case AARCH64_CMODEL_TINY:
7170 case AARCH64_CMODEL_TINY_PIC:
7171 case AARCH64_CMODEL_SMALL:
7172 case AARCH64_CMODEL_SMALL_PIC:
7173 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7174 for everything. */
7175 type = DW_EH_PE_sdata4;
7176 break;
7177 default:
7178 /* No assumptions here. 8-byte relocs required. */
7179 type = DW_EH_PE_sdata8;
7180 break;
7181 }
7182 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7183 }
7184
7185 /* Emit load exclusive. */
7186
7187 static void
7188 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7189 rtx mem, rtx model_rtx)
7190 {
7191 rtx (*gen) (rtx, rtx, rtx);
7192
7193 switch (mode)
7194 {
7195 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7196 case HImode: gen = gen_aarch64_load_exclusivehi; break;
7197 case SImode: gen = gen_aarch64_load_exclusivesi; break;
7198 case DImode: gen = gen_aarch64_load_exclusivedi; break;
7199 default:
7200 gcc_unreachable ();
7201 }
7202
7203 emit_insn (gen (rval, mem, model_rtx));
7204 }
7205
7206 /* Emit store exclusive. */
7207
7208 static void
7209 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7210 rtx rval, rtx mem, rtx model_rtx)
7211 {
7212 rtx (*gen) (rtx, rtx, rtx, rtx);
7213
7214 switch (mode)
7215 {
7216 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7217 case HImode: gen = gen_aarch64_store_exclusivehi; break;
7218 case SImode: gen = gen_aarch64_store_exclusivesi; break;
7219 case DImode: gen = gen_aarch64_store_exclusivedi; break;
7220 default:
7221 gcc_unreachable ();
7222 }
7223
7224 emit_insn (gen (bval, rval, mem, model_rtx));
7225 }
7226
7227 /* Mark the previous jump instruction as unlikely. */
7228
7229 static void
7230 aarch64_emit_unlikely_jump (rtx insn)
7231 {
7232 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
7233
7234 insn = emit_jump_insn (insn);
7235 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
7236 }
7237
7238 /* Expand a compare and swap pattern. */
7239
7240 void
7241 aarch64_expand_compare_and_swap (rtx operands[])
7242 {
7243 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7244 enum machine_mode mode, cmp_mode;
7245 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7246
7247 bval = operands[0];
7248 rval = operands[1];
7249 mem = operands[2];
7250 oldval = operands[3];
7251 newval = operands[4];
7252 is_weak = operands[5];
7253 mod_s = operands[6];
7254 mod_f = operands[7];
7255 mode = GET_MODE (mem);
7256 cmp_mode = mode;
7257
7258 /* Normally the succ memory model must be stronger than fail, but in the
7259 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7260 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7261
7262 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7263 && INTVAL (mod_s) == MEMMODEL_RELEASE)
7264 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7265
7266 switch (mode)
7267 {
7268 case QImode:
7269 case HImode:
7270 /* For short modes, we're going to perform the comparison in SImode,
7271 so do the zero-extension now. */
7272 cmp_mode = SImode;
7273 rval = gen_reg_rtx (SImode);
7274 oldval = convert_modes (SImode, mode, oldval, true);
7275 /* Fall through. */
7276
7277 case SImode:
7278 case DImode:
7279 /* Force the value into a register if needed. */
7280 if (!aarch64_plus_operand (oldval, mode))
7281 oldval = force_reg (cmp_mode, oldval);
7282 break;
7283
7284 default:
7285 gcc_unreachable ();
7286 }
7287
7288 switch (mode)
7289 {
7290 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7291 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7292 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7293 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7294 default:
7295 gcc_unreachable ();
7296 }
7297
7298 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7299
7300 if (mode == QImode || mode == HImode)
7301 emit_move_insn (operands[1], gen_lowpart (mode, rval));
7302
7303 x = gen_rtx_REG (CCmode, CC_REGNUM);
7304 x = gen_rtx_EQ (SImode, x, const0_rtx);
7305 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7306 }
7307
7308 /* Split a compare and swap pattern. */
7309
7310 void
7311 aarch64_split_compare_and_swap (rtx operands[])
7312 {
7313 rtx rval, mem, oldval, newval, scratch;
7314 enum machine_mode mode;
7315 bool is_weak;
7316 rtx label1, label2, x, cond;
7317
7318 rval = operands[0];
7319 mem = operands[1];
7320 oldval = operands[2];
7321 newval = operands[3];
7322 is_weak = (operands[4] != const0_rtx);
7323 scratch = operands[7];
7324 mode = GET_MODE (mem);
7325
7326 label1 = NULL_RTX;
7327 if (!is_weak)
7328 {
7329 label1 = gen_label_rtx ();
7330 emit_label (label1);
7331 }
7332 label2 = gen_label_rtx ();
7333
7334 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7335
7336 cond = aarch64_gen_compare_reg (NE, rval, oldval);
7337 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7338 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7339 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7340 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7341
7342 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7343
7344 if (!is_weak)
7345 {
7346 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7347 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7348 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7349 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7350 }
7351 else
7352 {
7353 cond = gen_rtx_REG (CCmode, CC_REGNUM);
7354 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7355 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7356 }
7357
7358 emit_label (label2);
7359 }
7360
7361 /* Split an atomic operation. */
7362
7363 void
7364 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7365 rtx value, rtx model_rtx, rtx cond)
7366 {
7367 enum machine_mode mode = GET_MODE (mem);
7368 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7369 rtx label, x;
7370
7371 label = gen_label_rtx ();
7372 emit_label (label);
7373
7374 if (new_out)
7375 new_out = gen_lowpart (wmode, new_out);
7376 if (old_out)
7377 old_out = gen_lowpart (wmode, old_out);
7378 else
7379 old_out = new_out;
7380 value = simplify_gen_subreg (wmode, value, mode, 0);
7381
7382 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7383
7384 switch (code)
7385 {
7386 case SET:
7387 new_out = value;
7388 break;
7389
7390 case NOT:
7391 x = gen_rtx_AND (wmode, old_out, value);
7392 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7393 x = gen_rtx_NOT (wmode, new_out);
7394 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7395 break;
7396
7397 case MINUS:
7398 if (CONST_INT_P (value))
7399 {
7400 value = GEN_INT (-INTVAL (value));
7401 code = PLUS;
7402 }
7403 /* Fall through. */
7404
7405 default:
7406 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7407 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7408 break;
7409 }
7410
7411 aarch64_emit_store_exclusive (mode, cond, mem,
7412 gen_lowpart (mode, new_out), model_rtx);
7413
7414 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7415 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7416 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7417 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7418 }
7419
7420 static void
7421 aarch64_print_extension (void)
7422 {
7423 const struct aarch64_option_extension *opt = NULL;
7424
7425 for (opt = all_extensions; opt->name != NULL; opt++)
7426 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7427 asm_fprintf (asm_out_file, "+%s", opt->name);
7428
7429 asm_fprintf (asm_out_file, "\n");
7430 }
7431
7432 static void
7433 aarch64_start_file (void)
7434 {
7435 if (selected_arch)
7436 {
7437 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7438 aarch64_print_extension ();
7439 }
7440 else if (selected_cpu)
7441 {
7442 const char *truncated_name
7443 = aarch64_rewrite_selected_cpu (selected_cpu->name);
7444 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
7445 aarch64_print_extension ();
7446 }
7447 default_file_start();
7448 }
7449
7450 /* Target hook for c_mode_for_suffix. */
7451 static enum machine_mode
7452 aarch64_c_mode_for_suffix (char suffix)
7453 {
7454 if (suffix == 'q')
7455 return TFmode;
7456
7457 return VOIDmode;
7458 }
7459
7460 /* We can only represent floating point constants which will fit in
7461 "quarter-precision" values. These values are characterised by
7462 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7463 by:
7464
7465 (-1)^s * (n/16) * 2^r
7466
7467 Where:
7468 's' is the sign bit.
7469 'n' is an integer in the range 16 <= n <= 31.
7470 'r' is an integer in the range -3 <= r <= 4. */
7471
7472 /* Return true iff X can be represented by a quarter-precision
7473 floating point immediate operand X. Note, we cannot represent 0.0. */
7474 bool
7475 aarch64_float_const_representable_p (rtx x)
7476 {
7477 /* This represents our current view of how many bits
7478 make up the mantissa. */
7479 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7480 int exponent;
7481 unsigned HOST_WIDE_INT mantissa, mask;
7482 HOST_WIDE_INT m1, m2;
7483 REAL_VALUE_TYPE r, m;
7484
7485 if (!CONST_DOUBLE_P (x))
7486 return false;
7487
7488 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7489
7490 /* We cannot represent infinities, NaNs or +/-zero. We won't
7491 know if we have +zero until we analyse the mantissa, but we
7492 can reject the other invalid values. */
7493 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7494 || REAL_VALUE_MINUS_ZERO (r))
7495 return false;
7496
7497 /* Extract exponent. */
7498 r = real_value_abs (&r);
7499 exponent = REAL_EXP (&r);
7500
7501 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7502 highest (sign) bit, with a fixed binary point at bit point_pos.
7503 m1 holds the low part of the mantissa, m2 the high part.
7504 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7505 bits for the mantissa, this can fail (low bits will be lost). */
7506 real_ldexp (&m, &r, point_pos - exponent);
7507 REAL_VALUE_TO_INT (&m1, &m2, m);
7508
7509 /* If the low part of the mantissa has bits set we cannot represent
7510 the value. */
7511 if (m1 != 0)
7512 return false;
7513 /* We have rejected the lower HOST_WIDE_INT, so update our
7514 understanding of how many bits lie in the mantissa and
7515 look only at the high HOST_WIDE_INT. */
7516 mantissa = m2;
7517 point_pos -= HOST_BITS_PER_WIDE_INT;
7518
7519 /* We can only represent values with a mantissa of the form 1.xxxx. */
7520 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7521 if ((mantissa & mask) != 0)
7522 return false;
7523
7524 /* Having filtered unrepresentable values, we may now remove all
7525 but the highest 5 bits. */
7526 mantissa >>= point_pos - 5;
7527
7528 /* We cannot represent the value 0.0, so reject it. This is handled
7529 elsewhere. */
7530 if (mantissa == 0)
7531 return false;
7532
7533 /* Then, as bit 4 is always set, we can mask it off, leaving
7534 the mantissa in the range [0, 15]. */
7535 mantissa &= ~(1 << 4);
7536 gcc_assert (mantissa <= 15);
7537
7538 /* GCC internally does not use IEEE754-like encoding (where normalized
7539 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7540 Our mantissa values are shifted 4 places to the left relative to
7541 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7542 by 5 places to correct for GCC's representation. */
7543 exponent = 5 - exponent;
7544
7545 return (exponent >= 0 && exponent <= 7);
7546 }
7547
7548 char*
7549 aarch64_output_simd_mov_immediate (rtx const_vector,
7550 enum machine_mode mode,
7551 unsigned width)
7552 {
7553 bool is_valid;
7554 static char templ[40];
7555 const char *mnemonic;
7556 const char *shift_op;
7557 unsigned int lane_count = 0;
7558 char element_char;
7559
7560 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
7561
7562 /* This will return true to show const_vector is legal for use as either
7563 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
7564 also update INFO to show how the immediate should be generated. */
7565 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
7566 gcc_assert (is_valid);
7567
7568 element_char = sizetochar (info.element_width);
7569 lane_count = width / info.element_width;
7570
7571 mode = GET_MODE_INNER (mode);
7572 if (mode == SFmode || mode == DFmode)
7573 {
7574 gcc_assert (info.shift == 0 && ! info.mvn);
7575 if (aarch64_float_const_zero_rtx_p (info.value))
7576 info.value = GEN_INT (0);
7577 else
7578 {
7579 #define buf_size 20
7580 REAL_VALUE_TYPE r;
7581 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
7582 char float_buf[buf_size] = {'\0'};
7583 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
7584 #undef buf_size
7585
7586 if (lane_count == 1)
7587 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
7588 else
7589 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
7590 lane_count, element_char, float_buf);
7591 return templ;
7592 }
7593 }
7594
7595 mnemonic = info.mvn ? "mvni" : "movi";
7596 shift_op = info.msl ? "msl" : "lsl";
7597
7598 if (lane_count == 1)
7599 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
7600 mnemonic, UINTVAL (info.value));
7601 else if (info.shift)
7602 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
7603 ", %s %d", mnemonic, lane_count, element_char,
7604 UINTVAL (info.value), shift_op, info.shift);
7605 else
7606 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
7607 mnemonic, lane_count, element_char, UINTVAL (info.value));
7608 return templ;
7609 }
7610
7611 char*
7612 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
7613 enum machine_mode mode)
7614 {
7615 enum machine_mode vmode;
7616
7617 gcc_assert (!VECTOR_MODE_P (mode));
7618 vmode = aarch64_simd_container_mode (mode, 64);
7619 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
7620 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
7621 }
7622
7623 /* Split operands into moves from op[1] + op[2] into op[0]. */
7624
7625 void
7626 aarch64_split_combinev16qi (rtx operands[3])
7627 {
7628 unsigned int dest = REGNO (operands[0]);
7629 unsigned int src1 = REGNO (operands[1]);
7630 unsigned int src2 = REGNO (operands[2]);
7631 enum machine_mode halfmode = GET_MODE (operands[1]);
7632 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7633 rtx destlo, desthi;
7634
7635 gcc_assert (halfmode == V16QImode);
7636
7637 if (src1 == dest && src2 == dest + halfregs)
7638 {
7639 /* No-op move. Can't split to nothing; emit something. */
7640 emit_note (NOTE_INSN_DELETED);
7641 return;
7642 }
7643
7644 /* Preserve register attributes for variable tracking. */
7645 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7646 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7647 GET_MODE_SIZE (halfmode));
7648
7649 /* Special case of reversed high/low parts. */
7650 if (reg_overlap_mentioned_p (operands[2], destlo)
7651 && reg_overlap_mentioned_p (operands[1], desthi))
7652 {
7653 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7654 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7655 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7656 }
7657 else if (!reg_overlap_mentioned_p (operands[2], destlo))
7658 {
7659 /* Try to avoid unnecessary moves if part of the result
7660 is in the right place already. */
7661 if (src1 != dest)
7662 emit_move_insn (destlo, operands[1]);
7663 if (src2 != dest + halfregs)
7664 emit_move_insn (desthi, operands[2]);
7665 }
7666 else
7667 {
7668 if (src2 != dest + halfregs)
7669 emit_move_insn (desthi, operands[2]);
7670 if (src1 != dest)
7671 emit_move_insn (destlo, operands[1]);
7672 }
7673 }
7674
7675 /* vec_perm support. */
7676
7677 #define MAX_VECT_LEN 16
7678
7679 struct expand_vec_perm_d
7680 {
7681 rtx target, op0, op1;
7682 unsigned char perm[MAX_VECT_LEN];
7683 enum machine_mode vmode;
7684 unsigned char nelt;
7685 bool one_vector_p;
7686 bool testing_p;
7687 };
7688
7689 /* Generate a variable permutation. */
7690
7691 static void
7692 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7693 {
7694 enum machine_mode vmode = GET_MODE (target);
7695 bool one_vector_p = rtx_equal_p (op0, op1);
7696
7697 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7698 gcc_checking_assert (GET_MODE (op0) == vmode);
7699 gcc_checking_assert (GET_MODE (op1) == vmode);
7700 gcc_checking_assert (GET_MODE (sel) == vmode);
7701 gcc_checking_assert (TARGET_SIMD);
7702
7703 if (one_vector_p)
7704 {
7705 if (vmode == V8QImode)
7706 {
7707 /* Expand the argument to a V16QI mode by duplicating it. */
7708 rtx pair = gen_reg_rtx (V16QImode);
7709 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7710 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7711 }
7712 else
7713 {
7714 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7715 }
7716 }
7717 else
7718 {
7719 rtx pair;
7720
7721 if (vmode == V8QImode)
7722 {
7723 pair = gen_reg_rtx (V16QImode);
7724 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7725 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7726 }
7727 else
7728 {
7729 pair = gen_reg_rtx (OImode);
7730 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7731 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7732 }
7733 }
7734 }
7735
7736 void
7737 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7738 {
7739 enum machine_mode vmode = GET_MODE (target);
7740 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7741 bool one_vector_p = rtx_equal_p (op0, op1);
7742 rtx rmask[MAX_VECT_LEN], mask;
7743
7744 gcc_checking_assert (!BYTES_BIG_ENDIAN);
7745
7746 /* The TBL instruction does not use a modulo index, so we must take care
7747 of that ourselves. */
7748 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7749 for (i = 0; i < nelt; ++i)
7750 rmask[i] = mask;
7751 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7752 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7753
7754 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7755 }
7756
7757 /* Recognize patterns suitable for the TRN instructions. */
7758 static bool
7759 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7760 {
7761 unsigned int i, odd, mask, nelt = d->nelt;
7762 rtx out, in0, in1, x;
7763 rtx (*gen) (rtx, rtx, rtx);
7764 enum machine_mode vmode = d->vmode;
7765
7766 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7767 return false;
7768
7769 /* Note that these are little-endian tests.
7770 We correct for big-endian later. */
7771 if (d->perm[0] == 0)
7772 odd = 0;
7773 else if (d->perm[0] == 1)
7774 odd = 1;
7775 else
7776 return false;
7777 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7778
7779 for (i = 0; i < nelt; i += 2)
7780 {
7781 if (d->perm[i] != i + odd)
7782 return false;
7783 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7784 return false;
7785 }
7786
7787 /* Success! */
7788 if (d->testing_p)
7789 return true;
7790
7791 in0 = d->op0;
7792 in1 = d->op1;
7793 if (BYTES_BIG_ENDIAN)
7794 {
7795 x = in0, in0 = in1, in1 = x;
7796 odd = !odd;
7797 }
7798 out = d->target;
7799
7800 if (odd)
7801 {
7802 switch (vmode)
7803 {
7804 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7805 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7806 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7807 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7808 case V4SImode: gen = gen_aarch64_trn2v4si; break;
7809 case V2SImode: gen = gen_aarch64_trn2v2si; break;
7810 case V2DImode: gen = gen_aarch64_trn2v2di; break;
7811 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7812 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7813 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7814 default:
7815 return false;
7816 }
7817 }
7818 else
7819 {
7820 switch (vmode)
7821 {
7822 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7823 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7824 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7825 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7826 case V4SImode: gen = gen_aarch64_trn1v4si; break;
7827 case V2SImode: gen = gen_aarch64_trn1v2si; break;
7828 case V2DImode: gen = gen_aarch64_trn1v2di; break;
7829 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7830 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7831 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7832 default:
7833 return false;
7834 }
7835 }
7836
7837 emit_insn (gen (out, in0, in1));
7838 return true;
7839 }
7840
7841 /* Recognize patterns suitable for the UZP instructions. */
7842 static bool
7843 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7844 {
7845 unsigned int i, odd, mask, nelt = d->nelt;
7846 rtx out, in0, in1, x;
7847 rtx (*gen) (rtx, rtx, rtx);
7848 enum machine_mode vmode = d->vmode;
7849
7850 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7851 return false;
7852
7853 /* Note that these are little-endian tests.
7854 We correct for big-endian later. */
7855 if (d->perm[0] == 0)
7856 odd = 0;
7857 else if (d->perm[0] == 1)
7858 odd = 1;
7859 else
7860 return false;
7861 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7862
7863 for (i = 0; i < nelt; i++)
7864 {
7865 unsigned elt = (i * 2 + odd) & mask;
7866 if (d->perm[i] != elt)
7867 return false;
7868 }
7869
7870 /* Success! */
7871 if (d->testing_p)
7872 return true;
7873
7874 in0 = d->op0;
7875 in1 = d->op1;
7876 if (BYTES_BIG_ENDIAN)
7877 {
7878 x = in0, in0 = in1, in1 = x;
7879 odd = !odd;
7880 }
7881 out = d->target;
7882
7883 if (odd)
7884 {
7885 switch (vmode)
7886 {
7887 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7888 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7889 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7890 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7891 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7892 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7893 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7894 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7895 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7896 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7897 default:
7898 return false;
7899 }
7900 }
7901 else
7902 {
7903 switch (vmode)
7904 {
7905 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7906 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7907 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7908 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7909 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7910 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7911 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7912 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7913 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7914 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7915 default:
7916 return false;
7917 }
7918 }
7919
7920 emit_insn (gen (out, in0, in1));
7921 return true;
7922 }
7923
7924 /* Recognize patterns suitable for the ZIP instructions. */
7925 static bool
7926 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7927 {
7928 unsigned int i, high, mask, nelt = d->nelt;
7929 rtx out, in0, in1, x;
7930 rtx (*gen) (rtx, rtx, rtx);
7931 enum machine_mode vmode = d->vmode;
7932
7933 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7934 return false;
7935
7936 /* Note that these are little-endian tests.
7937 We correct for big-endian later. */
7938 high = nelt / 2;
7939 if (d->perm[0] == high)
7940 /* Do Nothing. */
7941 ;
7942 else if (d->perm[0] == 0)
7943 high = 0;
7944 else
7945 return false;
7946 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7947
7948 for (i = 0; i < nelt / 2; i++)
7949 {
7950 unsigned elt = (i + high) & mask;
7951 if (d->perm[i * 2] != elt)
7952 return false;
7953 elt = (elt + nelt) & mask;
7954 if (d->perm[i * 2 + 1] != elt)
7955 return false;
7956 }
7957
7958 /* Success! */
7959 if (d->testing_p)
7960 return true;
7961
7962 in0 = d->op0;
7963 in1 = d->op1;
7964 if (BYTES_BIG_ENDIAN)
7965 {
7966 x = in0, in0 = in1, in1 = x;
7967 high = !high;
7968 }
7969 out = d->target;
7970
7971 if (high)
7972 {
7973 switch (vmode)
7974 {
7975 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7976 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7977 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7978 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7979 case V4SImode: gen = gen_aarch64_zip2v4si; break;
7980 case V2SImode: gen = gen_aarch64_zip2v2si; break;
7981 case V2DImode: gen = gen_aarch64_zip2v2di; break;
7982 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7983 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7984 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7985 default:
7986 return false;
7987 }
7988 }
7989 else
7990 {
7991 switch (vmode)
7992 {
7993 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7994 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7995 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7996 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7997 case V4SImode: gen = gen_aarch64_zip1v4si; break;
7998 case V2SImode: gen = gen_aarch64_zip1v2si; break;
7999 case V2DImode: gen = gen_aarch64_zip1v2di; break;
8000 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
8001 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
8002 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
8003 default:
8004 return false;
8005 }
8006 }
8007
8008 emit_insn (gen (out, in0, in1));
8009 return true;
8010 }
8011
8012 static bool
8013 aarch64_evpc_dup (struct expand_vec_perm_d *d)
8014 {
8015 rtx (*gen) (rtx, rtx, rtx);
8016 rtx out = d->target;
8017 rtx in0;
8018 enum machine_mode vmode = d->vmode;
8019 unsigned int i, elt, nelt = d->nelt;
8020 rtx lane;
8021
8022 /* TODO: This may not be big-endian safe. */
8023 if (BYTES_BIG_ENDIAN)
8024 return false;
8025
8026 elt = d->perm[0];
8027 for (i = 1; i < nelt; i++)
8028 {
8029 if (elt != d->perm[i])
8030 return false;
8031 }
8032
8033 /* The generic preparation in aarch64_expand_vec_perm_const_1
8034 swaps the operand order and the permute indices if it finds
8035 d->perm[0] to be in the second operand. Thus, we can always
8036 use d->op0 and need not do any extra arithmetic to get the
8037 correct lane number. */
8038 in0 = d->op0;
8039 lane = GEN_INT (elt);
8040
8041 switch (vmode)
8042 {
8043 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
8044 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
8045 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
8046 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
8047 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
8048 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
8049 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
8050 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
8051 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
8052 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
8053 default:
8054 return false;
8055 }
8056
8057 emit_insn (gen (out, in0, lane));
8058 return true;
8059 }
8060
8061 static bool
8062 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
8063 {
8064 rtx rperm[MAX_VECT_LEN], sel;
8065 enum machine_mode vmode = d->vmode;
8066 unsigned int i, nelt = d->nelt;
8067
8068 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
8069 numbering of elements for big-endian, we must reverse the order. */
8070 if (BYTES_BIG_ENDIAN)
8071 return false;
8072
8073 if (d->testing_p)
8074 return true;
8075
8076 /* Generic code will try constant permutation twice. Once with the
8077 original mode and again with the elements lowered to QImode.
8078 So wait and don't do the selector expansion ourselves. */
8079 if (vmode != V8QImode && vmode != V16QImode)
8080 return false;
8081
8082 for (i = 0; i < nelt; ++i)
8083 rperm[i] = GEN_INT (d->perm[i]);
8084 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8085 sel = force_reg (vmode, sel);
8086
8087 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8088 return true;
8089 }
8090
8091 static bool
8092 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8093 {
8094 /* The pattern matching functions above are written to look for a small
8095 number to begin the sequence (0, 1, N/2). If we begin with an index
8096 from the second operand, we can swap the operands. */
8097 if (d->perm[0] >= d->nelt)
8098 {
8099 unsigned i, nelt = d->nelt;
8100 rtx x;
8101
8102 for (i = 0; i < nelt; ++i)
8103 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
8104
8105 x = d->op0;
8106 d->op0 = d->op1;
8107 d->op1 = x;
8108 }
8109
8110 if (TARGET_SIMD)
8111 {
8112 if (aarch64_evpc_zip (d))
8113 return true;
8114 else if (aarch64_evpc_uzp (d))
8115 return true;
8116 else if (aarch64_evpc_trn (d))
8117 return true;
8118 else if (aarch64_evpc_dup (d))
8119 return true;
8120 return aarch64_evpc_tbl (d);
8121 }
8122 return false;
8123 }
8124
8125 /* Expand a vec_perm_const pattern. */
8126
8127 bool
8128 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8129 {
8130 struct expand_vec_perm_d d;
8131 int i, nelt, which;
8132
8133 d.target = target;
8134 d.op0 = op0;
8135 d.op1 = op1;
8136
8137 d.vmode = GET_MODE (target);
8138 gcc_assert (VECTOR_MODE_P (d.vmode));
8139 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8140 d.testing_p = false;
8141
8142 for (i = which = 0; i < nelt; ++i)
8143 {
8144 rtx e = XVECEXP (sel, 0, i);
8145 int ei = INTVAL (e) & (2 * nelt - 1);
8146 which |= (ei < nelt ? 1 : 2);
8147 d.perm[i] = ei;
8148 }
8149
8150 switch (which)
8151 {
8152 default:
8153 gcc_unreachable ();
8154
8155 case 3:
8156 d.one_vector_p = false;
8157 if (!rtx_equal_p (op0, op1))
8158 break;
8159
8160 /* The elements of PERM do not suggest that only the first operand
8161 is used, but both operands are identical. Allow easier matching
8162 of the permutation by folding the permutation into the single
8163 input vector. */
8164 /* Fall Through. */
8165 case 2:
8166 for (i = 0; i < nelt; ++i)
8167 d.perm[i] &= nelt - 1;
8168 d.op0 = op1;
8169 d.one_vector_p = true;
8170 break;
8171
8172 case 1:
8173 d.op1 = op0;
8174 d.one_vector_p = true;
8175 break;
8176 }
8177
8178 return aarch64_expand_vec_perm_const_1 (&d);
8179 }
8180
8181 static bool
8182 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8183 const unsigned char *sel)
8184 {
8185 struct expand_vec_perm_d d;
8186 unsigned int i, nelt, which;
8187 bool ret;
8188
8189 d.vmode = vmode;
8190 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8191 d.testing_p = true;
8192 memcpy (d.perm, sel, nelt);
8193
8194 /* Calculate whether all elements are in one vector. */
8195 for (i = which = 0; i < nelt; ++i)
8196 {
8197 unsigned char e = d.perm[i];
8198 gcc_assert (e < 2 * nelt);
8199 which |= (e < nelt ? 1 : 2);
8200 }
8201
8202 /* If all elements are from the second vector, reindex as if from the
8203 first vector. */
8204 if (which == 2)
8205 for (i = 0; i < nelt; ++i)
8206 d.perm[i] -= nelt;
8207
8208 /* Check whether the mask can be applied to a single vector. */
8209 d.one_vector_p = (which != 3);
8210
8211 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8212 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8213 if (!d.one_vector_p)
8214 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8215
8216 start_sequence ();
8217 ret = aarch64_expand_vec_perm_const_1 (&d);
8218 end_sequence ();
8219
8220 return ret;
8221 }
8222
8223 #undef TARGET_ADDRESS_COST
8224 #define TARGET_ADDRESS_COST aarch64_address_cost
8225
8226 /* This hook will determines whether unnamed bitfields affect the alignment
8227 of the containing structure. The hook returns true if the structure
8228 should inherit the alignment requirements of an unnamed bitfield's
8229 type. */
8230 #undef TARGET_ALIGN_ANON_BITFIELD
8231 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8232
8233 #undef TARGET_ASM_ALIGNED_DI_OP
8234 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8235
8236 #undef TARGET_ASM_ALIGNED_HI_OP
8237 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8238
8239 #undef TARGET_ASM_ALIGNED_SI_OP
8240 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8241
8242 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8243 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8244 hook_bool_const_tree_hwi_hwi_const_tree_true
8245
8246 #undef TARGET_ASM_FILE_START
8247 #define TARGET_ASM_FILE_START aarch64_start_file
8248
8249 #undef TARGET_ASM_OUTPUT_MI_THUNK
8250 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8251
8252 #undef TARGET_ASM_SELECT_RTX_SECTION
8253 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8254
8255 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8256 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8257
8258 #undef TARGET_BUILD_BUILTIN_VA_LIST
8259 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8260
8261 #undef TARGET_CALLEE_COPIES
8262 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8263
8264 #undef TARGET_CAN_ELIMINATE
8265 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8266
8267 #undef TARGET_CANNOT_FORCE_CONST_MEM
8268 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8269
8270 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8271 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8272
8273 /* Only the least significant bit is used for initialization guard
8274 variables. */
8275 #undef TARGET_CXX_GUARD_MASK_BIT
8276 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8277
8278 #undef TARGET_C_MODE_FOR_SUFFIX
8279 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8280
8281 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8282 #undef TARGET_DEFAULT_TARGET_FLAGS
8283 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8284 #endif
8285
8286 #undef TARGET_CLASS_MAX_NREGS
8287 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8288
8289 #undef TARGET_BUILTIN_DECL
8290 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8291
8292 #undef TARGET_EXPAND_BUILTIN
8293 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8294
8295 #undef TARGET_EXPAND_BUILTIN_VA_START
8296 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8297
8298 #undef TARGET_FOLD_BUILTIN
8299 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8300
8301 #undef TARGET_FUNCTION_ARG
8302 #define TARGET_FUNCTION_ARG aarch64_function_arg
8303
8304 #undef TARGET_FUNCTION_ARG_ADVANCE
8305 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8306
8307 #undef TARGET_FUNCTION_ARG_BOUNDARY
8308 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8309
8310 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8311 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8312
8313 #undef TARGET_FUNCTION_VALUE
8314 #define TARGET_FUNCTION_VALUE aarch64_function_value
8315
8316 #undef TARGET_FUNCTION_VALUE_REGNO_P
8317 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8318
8319 #undef TARGET_FRAME_POINTER_REQUIRED
8320 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8321
8322 #undef TARGET_GIMPLE_FOLD_BUILTIN
8323 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8324
8325 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8326 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8327
8328 #undef TARGET_INIT_BUILTINS
8329 #define TARGET_INIT_BUILTINS aarch64_init_builtins
8330
8331 #undef TARGET_LEGITIMATE_ADDRESS_P
8332 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8333
8334 #undef TARGET_LEGITIMATE_CONSTANT_P
8335 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8336
8337 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8338 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8339
8340 #undef TARGET_LRA_P
8341 #define TARGET_LRA_P aarch64_lra_p
8342
8343 #undef TARGET_MANGLE_TYPE
8344 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8345
8346 #undef TARGET_MEMORY_MOVE_COST
8347 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8348
8349 #undef TARGET_MUST_PASS_IN_STACK
8350 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8351
8352 /* This target hook should return true if accesses to volatile bitfields
8353 should use the narrowest mode possible. It should return false if these
8354 accesses should use the bitfield container type. */
8355 #undef TARGET_NARROW_VOLATILE_BITFIELD
8356 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8357
8358 #undef TARGET_OPTION_OVERRIDE
8359 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8360
8361 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8362 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8363 aarch64_override_options_after_change
8364
8365 #undef TARGET_PASS_BY_REFERENCE
8366 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8367
8368 #undef TARGET_PREFERRED_RELOAD_CLASS
8369 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8370
8371 #undef TARGET_SECONDARY_RELOAD
8372 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8373
8374 #undef TARGET_SHIFT_TRUNCATION_MASK
8375 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8376
8377 #undef TARGET_SETUP_INCOMING_VARARGS
8378 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8379
8380 #undef TARGET_STRUCT_VALUE_RTX
8381 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
8382
8383 #undef TARGET_REGISTER_MOVE_COST
8384 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8385
8386 #undef TARGET_RETURN_IN_MEMORY
8387 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8388
8389 #undef TARGET_RETURN_IN_MSB
8390 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8391
8392 #undef TARGET_RTX_COSTS
8393 #define TARGET_RTX_COSTS aarch64_rtx_costs
8394
8395 #undef TARGET_TRAMPOLINE_INIT
8396 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8397
8398 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8399 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8400
8401 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8402 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8403
8404 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8405 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8406
8407 #undef TARGET_VECTORIZE_ADD_STMT_COST
8408 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8409
8410 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8411 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8412 aarch64_builtin_vectorization_cost
8413
8414 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8415 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8416
8417 #undef TARGET_VECTORIZE_BUILTINS
8418 #define TARGET_VECTORIZE_BUILTINS
8419
8420 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8421 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8422 aarch64_builtin_vectorized_function
8423
8424 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8425 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8426 aarch64_autovectorize_vector_sizes
8427
8428 /* Section anchor support. */
8429
8430 #undef TARGET_MIN_ANCHOR_OFFSET
8431 #define TARGET_MIN_ANCHOR_OFFSET -256
8432
8433 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8434 byte offset; we can do much more for larger data types, but have no way
8435 to determine the size of the access. We assume accesses are aligned. */
8436 #undef TARGET_MAX_ANCHOR_OFFSET
8437 #define TARGET_MAX_ANCHOR_OFFSET 4095
8438
8439 #undef TARGET_VECTOR_ALIGNMENT
8440 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8441
8442 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8443 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8444 aarch64_simd_vector_alignment_reachable
8445
8446 /* vec_perm support. */
8447
8448 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8449 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8450 aarch64_vectorize_vec_perm_const_ok
8451
8452
8453 #undef TARGET_FIXED_CONDITION_CODE_REGS
8454 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8455
8456 struct gcc_target targetm = TARGET_INITIALIZER;
8457
8458 #include "gt-aarch64.h"