gcc/config/aarch64/aarch64.c

   1 /* Machine description for AArch64 architecture.
   2    Copyright (C) 2009-2013 Free Software Foundation, Inc.
   3    Contributed by ARM Ltd.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but
  13    WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GCC; see the file COPYING3.  If not see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "insn-codes.h"
  26 #include "rtl.h"
  27 #include "insn-attr.h"
  28 #include "tree.h"
  29 #include "regs.h"
  30 #include "df.h"
  31 #include "hard-reg-set.h"
  32 #include "output.h"
  33 #include "expr.h"
  34 #include "reload.h"
  35 #include "toplev.h"
  36 #include "target.h"
  37 #include "target-def.h"
  38 #include "targhooks.h"
  39 #include "ggc.h"
  40 #include "function.h"
  41 #include "tm_p.h"
  42 #include "recog.h"
  43 #include "langhooks.h"
  44 #include "diagnostic-core.h"
  45 #include "gimple.h"
  46 #include "optabs.h"
  47 #include "dwarf2.h"
  48 #include "cfgloop.h"
  49 #include "tree-vectorizer.h"
  50
  51 /* Defined for convenience.  */
  52 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
  53
  54 /* Classifies an address.
  55
  56    ADDRESS_REG_IMM
  57        A simple base register plus immediate offset.
  58
  59    ADDRESS_REG_WB
  60        A base register indexed by immediate offset with writeback.
  61
  62    ADDRESS_REG_REG
  63        A base register indexed by (optionally scaled) register.
  64
  65    ADDRESS_REG_UXTW
  66        A base register indexed by (optionally scaled) zero-extended register.
  67
  68    ADDRESS_REG_SXTW
  69        A base register indexed by (optionally scaled) sign-extended register.
  70
  71    ADDRESS_LO_SUM
  72        A LO_SUM rtx with a base register and "LO12" symbol relocation.
  73
  74    ADDRESS_SYMBOLIC:
  75        A constant symbolic address, in pc-relative literal pool.  */
  76
  77 enum aarch64_address_type {
  78   ADDRESS_REG_IMM,
  79   ADDRESS_REG_WB,
  80   ADDRESS_REG_REG,
  81   ADDRESS_REG_UXTW,
  82   ADDRESS_REG_SXTW,
  83   ADDRESS_LO_SUM,
  84   ADDRESS_SYMBOLIC
  85 };
  86
  87 struct aarch64_address_info {
  88   enum aarch64_address_type type;
  89   rtx base;
  90   rtx offset;
  91   int shift;
  92   enum aarch64_symbol_type symbol_type;
  93 };
  94
  95 struct simd_immediate_info
  96 {
  97   rtx value;
  98   int shift;
  99   int element_width;
 100   bool mvn;
 101   bool msl;
 102 };
 103
 104 /* The current code model.  */
 105 enum aarch64_code_model aarch64_cmodel;
 106
 107 #ifdef HAVE_AS_TLS
 108 #undef TARGET_HAVE_TLS
 109 #define TARGET_HAVE_TLS 1
 110 #endif
 111
 112 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
 113 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
 114                                                      const_tree,
 115                                                      enum machine_mode *, int *,
 116                                                      bool *);
 117 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 118 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 119 static void aarch64_override_options_after_change (void);
 120 static bool aarch64_vector_mode_supported_p (enum machine_mode);
 121 static unsigned bit_count (unsigned HOST_WIDE_INT);
 122 static bool aarch64_const_vec_all_same_int_p (rtx,
 123                                               HOST_WIDE_INT, HOST_WIDE_INT);
 124
 125 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 126                                                  const unsigned char *sel);
 127
 128 /* The processor for which instructions should be scheduled.  */
 129 enum aarch64_processor aarch64_tune = generic;
 130
 131 /* The current tuning set.  */
 132 const struct tune_params *aarch64_tune_params;
 133
 134 /* Mask to specify which instructions we are allowed to generate.  */
 135 unsigned long aarch64_isa_flags = 0;
 136
 137 /* Mask to specify which instruction scheduling options should be used.  */
 138 unsigned long aarch64_tune_flags = 0;
 139
 140 /* Tuning parameters.  */
 141
 142 #if HAVE_DESIGNATED_INITIALIZERS
 143 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
 144 #else
 145 #define NAMED_PARAM(NAME, VAL) (VAL)
 146 #endif
 147
 148 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 149 __extension__
 150 #endif
 151 static const struct cpu_rtx_cost_table generic_rtx_cost_table =
 152 {
 153   NAMED_PARAM (memory_load, COSTS_N_INSNS (1)),
 154   NAMED_PARAM (memory_store, COSTS_N_INSNS (0)),
 155   NAMED_PARAM (register_shift, COSTS_N_INSNS (1)),
 156   NAMED_PARAM (int_divide, COSTS_N_INSNS (6)),
 157   NAMED_PARAM (float_divide, COSTS_N_INSNS (2)),
 158   NAMED_PARAM (double_divide, COSTS_N_INSNS (6)),
 159   NAMED_PARAM (int_multiply, COSTS_N_INSNS (1)),
 160   NAMED_PARAM (int_multiply_extend, COSTS_N_INSNS (1)),
 161   NAMED_PARAM (int_multiply_add, COSTS_N_INSNS (1)),
 162   NAMED_PARAM (int_multiply_extend_add, COSTS_N_INSNS (1)),
 163   NAMED_PARAM (float_multiply, COSTS_N_INSNS (0)),
 164   NAMED_PARAM (double_multiply, COSTS_N_INSNS (1))
 165 };
 166
 167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 168 __extension__
 169 #endif
 170 static const struct cpu_addrcost_table generic_addrcost_table =
 171 {
 172   NAMED_PARAM (pre_modify, 0),
 173   NAMED_PARAM (post_modify, 0),
 174   NAMED_PARAM (register_offset, 0),
 175   NAMED_PARAM (register_extend, 0),
 176   NAMED_PARAM (imm_offset, 0)
 177 };
 178
 179 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 180 __extension__
 181 #endif
 182 static const struct cpu_regmove_cost generic_regmove_cost =
 183 {
 184   NAMED_PARAM (GP2GP, 1),
 185   NAMED_PARAM (GP2FP, 2),
 186   NAMED_PARAM (FP2GP, 2),
 187   /* We currently do not provide direct support for TFmode Q->Q move.
 188      Therefore we need to raise the cost above 2 in order to have
 189      reload handle the situation.  */
 190   NAMED_PARAM (FP2FP, 4)
 191 };
 192
 193 /* Generic costs for vector insn classes.  */
 194 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 195 __extension__
 196 #endif
 197 static const struct cpu_vector_cost generic_vector_cost =
 198 {
 199   NAMED_PARAM (scalar_stmt_cost, 1),
 200   NAMED_PARAM (scalar_load_cost, 1),
 201   NAMED_PARAM (scalar_store_cost, 1),
 202   NAMED_PARAM (vec_stmt_cost, 1),
 203   NAMED_PARAM (vec_to_scalar_cost, 1),
 204   NAMED_PARAM (scalar_to_vec_cost, 1),
 205   NAMED_PARAM (vec_align_load_cost, 1),
 206   NAMED_PARAM (vec_unalign_load_cost, 1),
 207   NAMED_PARAM (vec_unalign_store_cost, 1),
 208   NAMED_PARAM (vec_store_cost, 1),
 209   NAMED_PARAM (cond_taken_branch_cost, 3),
 210   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 211 };
 212
 213 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 214 __extension__
 215 #endif
 216 static const struct tune_params generic_tunings =
 217 {
 218   &generic_rtx_cost_table,
 219   &generic_addrcost_table,
 220   &generic_regmove_cost,
 221   &generic_vector_cost,
 222   NAMED_PARAM (memmov_cost, 4)
 223 };
 224
 225 /* A processor implementing AArch64.  */
 226 struct processor
 227 {
 228   const char *const name;
 229   enum aarch64_processor core;
 230   const char *arch;
 231   const unsigned long flags;
 232   const struct tune_params *const tune;
 233 };
 234
 235 /* Processor cores implementing AArch64.  */
 236 static const struct processor all_cores[] =
 237 {
 238 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
 239   {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
 240 #include "aarch64-cores.def"
 241 #undef AARCH64_CORE
 242   {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
 243   {NULL, aarch64_none, NULL, 0, NULL}
 244 };
 245
 246 /* Architectures implementing AArch64.  */
 247 static const struct processor all_architectures[] =
 248 {
 249 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
 250   {NAME, CORE, #ARCH, FLAGS, NULL},
 251 #include "aarch64-arches.def"
 252 #undef AARCH64_ARCH
 253   {"generic", generic, "8", AARCH64_FL_FOR_ARCH8, NULL},
 254   {NULL, aarch64_none, NULL, 0, NULL}
 255 };
 256
 257 /* Target specification.  These are populated as commandline arguments
 258    are processed, or NULL if not specified.  */
 259 static const struct processor *selected_arch;
 260 static const struct processor *selected_cpu;
 261 static const struct processor *selected_tune;
 262
 263 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
 264
 265 /* An ISA extension in the co-processor and main instruction set space.  */
 266 struct aarch64_option_extension
 267 {
 268   const char *const name;
 269   const unsigned long flags_on;
 270   const unsigned long flags_off;
 271 };
 272
 273 /* ISA extensions in AArch64.  */
 274 static const struct aarch64_option_extension all_extensions[] =
 275 {
 276 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
 277   {NAME, FLAGS_ON, FLAGS_OFF},
 278 #include "aarch64-option-extensions.def"
 279 #undef AARCH64_OPT_EXTENSION
 280   {NULL, 0, 0}
 281 };
 282
 283 /* Used to track the size of an address when generating a pre/post
 284    increment address.  */
 285 static enum machine_mode aarch64_memory_reference_mode;
 286
 287 /* Used to force GTY into this file.  */
 288 static GTY(()) int gty_dummy;
 289
 290 /* A table of valid AArch64 "bitmask immediate" values for
 291    logical instructions.  */
 292
 293 #define AARCH64_NUM_BITMASKS  5334
 294 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
 295
 296 /* Did we set flag_omit_frame_pointer just so
 297    aarch64_frame_pointer_required would be called? */
 298 static bool faked_omit_frame_pointer;
 299
 300 typedef enum aarch64_cond_code
 301 {
 302   AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
 303   AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
 304   AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
 305 }
 306 aarch64_cc;
 307
 308 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
 309
 310 /* The condition codes of the processor, and the inverse function.  */
 311 static const char * const aarch64_condition_codes[] =
 312 {
 313   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 314   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 315 };
 316
 317 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
 318 unsigned
 319 aarch64_dbx_register_number (unsigned regno)
 320 {
 321    if (GP_REGNUM_P (regno))
 322      return AARCH64_DWARF_R0 + regno - R0_REGNUM;
 323    else if (regno == SP_REGNUM)
 324      return AARCH64_DWARF_SP;
 325    else if (FP_REGNUM_P (regno))
 326      return AARCH64_DWARF_V0 + regno - V0_REGNUM;
 327
 328    /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
 329       equivalent DWARF register.  */
 330    return DWARF_FRAME_REGISTERS;
 331 }
 332
 333 /* Return TRUE if MODE is any of the large INT modes.  */
 334 static bool
 335 aarch64_vect_struct_mode_p (enum machine_mode mode)
 336 {
 337   return mode == OImode || mode == CImode || mode == XImode;
 338 }
 339
 340 /* Return TRUE if MODE is any of the vector modes.  */
 341 static bool
 342 aarch64_vector_mode_p (enum machine_mode mode)
 343 {
 344   return aarch64_vector_mode_supported_p (mode)
 345          || aarch64_vect_struct_mode_p (mode);
 346 }
 347
 348 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P.  */
 349 static bool
 350 aarch64_array_mode_supported_p (enum machine_mode mode,
 351                                 unsigned HOST_WIDE_INT nelems)
 352 {
 353   if (TARGET_SIMD
 354       && AARCH64_VALID_SIMD_QREG_MODE (mode)
 355       && (nelems >= 2 && nelems <= 4))
 356     return true;
 357
 358   return false;
 359 }
 360
 361 /* Implement HARD_REGNO_NREGS.  */
 362
 363 int
 364 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
 365 {
 366   switch (aarch64_regno_regclass (regno))
 367     {
 368     case FP_REGS:
 369     case FP_LO_REGS:
 370       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
 371     default:
 372       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 373     }
 374   gcc_unreachable ();
 375 }
 376
 377 /* Implement HARD_REGNO_MODE_OK.  */
 378
 379 int
 380 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
 381 {
 382   if (GET_MODE_CLASS (mode) == MODE_CC)
 383     return regno == CC_REGNUM;
 384
 385   if (regno == SP_REGNUM || regno == FRAME_POINTER_REGNUM
 386       || regno == ARG_POINTER_REGNUM)
 387     return mode == Pmode;
 388
 389   if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
 390     return 1;
 391
 392   if (FP_REGNUM_P (regno))
 393     {
 394       if (aarch64_vect_struct_mode_p (mode))
 395         return
 396           (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
 397       else
 398         return 1;
 399     }
 400
 401   return 0;
 402 }
 403
 404 /* Return true if calls to DECL should be treated as
 405    long-calls (ie called via a register).  */
 406 static bool
 407 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
 408 {
 409   return false;
 410 }
 411
 412 /* Return true if calls to symbol-ref SYM should be treated as
 413    long-calls (ie called via a register).  */
 414 bool
 415 aarch64_is_long_call_p (rtx sym)
 416 {
 417   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
 418 }
 419
 420 /* Return true if the offsets to a zero/sign-extract operation
 421    represent an expression that matches an extend operation.  The
 422    operands represent the paramters from
 423
 424    (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)).  */
 425 bool
 426 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
 427                                 rtx extract_imm)
 428 {
 429   HOST_WIDE_INT mult_val, extract_val;
 430
 431   if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
 432     return false;
 433
 434   mult_val = INTVAL (mult_imm);
 435   extract_val = INTVAL (extract_imm);
 436
 437   if (extract_val > 8
 438       && extract_val < GET_MODE_BITSIZE (mode)
 439       && exact_log2 (extract_val & ~7) > 0
 440       && (extract_val & 7) <= 4
 441       && mult_val == (1 << (extract_val & 7)))
 442     return true;
 443
 444   return false;
 445 }
 446
 447 /* Emit an insn that's a simple single-set.  Both the operands must be
 448    known to be valid.  */
 449 inline static rtx
 450 emit_set_insn (rtx x, rtx y)
 451 {
 452   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
 453 }
 454
 455 /* X and Y are two things to compare using CODE.  Emit the compare insn and
 456    return the rtx for register 0 in the proper mode.  */
 457 rtx
 458 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 459 {
 460   enum machine_mode mode = SELECT_CC_MODE (code, x, y);
 461   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
 462
 463   emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
 464   return cc_reg;
 465 }
 466
 467 /* Build the SYMBOL_REF for __tls_get_addr.  */
 468
 469 static GTY(()) rtx tls_get_addr_libfunc;
 470
 471 rtx
 472 aarch64_tls_get_addr (void)
 473 {
 474   if (!tls_get_addr_libfunc)
 475     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
 476   return tls_get_addr_libfunc;
 477 }
 478
 479 /* Return the TLS model to use for ADDR.  */
 480
 481 static enum tls_model
 482 tls_symbolic_operand_type (rtx addr)
 483 {
 484   enum tls_model tls_kind = TLS_MODEL_NONE;
 485   rtx sym, addend;
 486
 487   if (GET_CODE (addr) == CONST)
 488     {
 489       split_const (addr, &sym, &addend);
 490       if (GET_CODE (sym) == SYMBOL_REF)
 491         tls_kind = SYMBOL_REF_TLS_MODEL (sym);
 492     }
 493   else if (GET_CODE (addr) == SYMBOL_REF)
 494     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
 495
 496   return tls_kind;
 497 }
 498
 499 /* We'll allow lo_sum's in addresses in our legitimate addresses
 500    so that combine would take care of combining addresses where
 501    necessary, but for generation purposes, we'll generate the address
 502    as :
 503    RTL                               Absolute
 504    tmp = hi (symbol_ref);            adrp  x1, foo
 505    dest = lo_sum (tmp, symbol_ref);  add dest, x1, :lo_12:foo
 506                                      nop
 507
 508    PIC                               TLS
 509    adrp x1, :got:foo                 adrp tmp, :tlsgd:foo
 510    ldr  x1, [:got_lo12:foo]          add  dest, tmp, :tlsgd_lo12:foo
 511                                      bl   __tls_get_addr
 512                                      nop
 513
 514    Load TLS symbol, depending on TLS mechanism and TLS access model.
 515
 516    Global Dynamic - Traditional TLS:
 517    adrp tmp, :tlsgd:imm
 518    add  dest, tmp, #:tlsgd_lo12:imm
 519    bl   __tls_get_addr
 520
 521    Global Dynamic - TLS Descriptors:
 522    adrp dest, :tlsdesc:imm
 523    ldr  tmp, [dest, #:tlsdesc_lo12:imm]
 524    add  dest, dest, #:tlsdesc_lo12:imm
 525    blr  tmp
 526    mrs  tp, tpidr_el0
 527    add  dest, dest, tp
 528
 529    Initial Exec:
 530    mrs  tp, tpidr_el0
 531    adrp tmp, :gottprel:imm
 532    ldr  dest, [tmp, #:gottprel_lo12:imm]
 533    add  dest, dest, tp
 534
 535    Local Exec:
 536    mrs  tp, tpidr_el0
 537    add  t0, tp, #:tprel_hi12:imm
 538    add  t0, #:tprel_lo12_nc:imm
 539 */
 540
 541 static void
 542 aarch64_load_symref_appropriately (rtx dest, rtx imm,
 543                                    enum aarch64_symbol_type type)
 544 {
 545   switch (type)
 546     {
 547     case SYMBOL_SMALL_ABSOLUTE:
 548       {
 549         /* In ILP32, the mode of dest can be either SImode or DImode.  */
 550         rtx tmp_reg = dest;
 551         enum machine_mode mode = GET_MODE (dest);
 552
 553         gcc_assert (mode == Pmode || mode == ptr_mode);
 554
 555         if (can_create_pseudo_p ())
 556           tmp_reg = gen_reg_rtx (mode);
 557
 558         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 559         emit_insn (gen_add_losym (dest, tmp_reg, imm));
 560         return;
 561       }
 562
 563     case SYMBOL_TINY_ABSOLUTE:
 564       emit_insn (gen_rtx_SET (Pmode, dest, imm));
 565       return;
 566
 567     case SYMBOL_SMALL_GOT:
 568       {
 569         /* In ILP32, the mode of dest can be either SImode or DImode,
 570            while the got entry is always of SImode size.  The mode of
 571            dest depends on how dest is used: if dest is assigned to a
 572            pointer (e.g. in the memory), it has SImode; it may have
 573            DImode if dest is dereferenced to access the memeory.
 574            This is why we have to handle three different ldr_got_small
 575            patterns here (two patterns for ILP32).  */
 576         rtx tmp_reg = dest;
 577         enum machine_mode mode = GET_MODE (dest);
 578
 579         if (can_create_pseudo_p ())
 580           tmp_reg = gen_reg_rtx (mode);
 581
 582         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 583         if (mode == ptr_mode)
 584           {
 585             if (mode == DImode)
 586               emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
 587             else
 588               emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
 589           }
 590         else
 591           {
 592             gcc_assert (mode == Pmode);
 593             emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
 594           }
 595
 596         return;
 597       }
 598
 599     case SYMBOL_SMALL_TLSGD:
 600       {
 601         rtx insns;
 602         rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
 603
 604         start_sequence ();
 605         emit_call_insn (gen_tlsgd_small (result, imm));
 606         insns = get_insns ();
 607         end_sequence ();
 608
 609         RTL_CONST_CALL_P (insns) = 1;
 610         emit_libcall_block (insns, dest, result, imm);
 611         return;
 612       }
 613
 614     case SYMBOL_SMALL_TLSDESC:
 615       {
 616         rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
 617         rtx tp;
 618
 619         emit_insn (gen_tlsdesc_small (imm));
 620         tp = aarch64_load_tp (NULL);
 621         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
 622         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 623         return;
 624       }
 625
 626     case SYMBOL_SMALL_GOTTPREL:
 627       {
 628         rtx tmp_reg = gen_reg_rtx (Pmode);
 629         rtx tp = aarch64_load_tp (NULL);
 630         emit_insn (gen_tlsie_small (tmp_reg, imm));
 631         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
 632         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 633         return;
 634       }
 635
 636     case SYMBOL_SMALL_TPREL:
 637       {
 638         rtx tp = aarch64_load_tp (NULL);
 639         emit_insn (gen_tlsle_small (dest, tp, imm));
 640         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 641         return;
 642       }
 643
 644     case SYMBOL_TINY_GOT:
 645       emit_insn (gen_ldr_got_tiny (dest, imm));
 646       return;
 647
 648     default:
 649       gcc_unreachable ();
 650     }
 651 }
 652
 653 /* Emit a move from SRC to DEST.  Assume that the move expanders can
 654    handle all moves if !can_create_pseudo_p ().  The distinction is
 655    important because, unlike emit_move_insn, the move expanders know
 656    how to force Pmode objects into the constant pool even when the
 657    constant pool address is not itself legitimate.  */
 658 static rtx
 659 aarch64_emit_move (rtx dest, rtx src)
 660 {
 661   return (can_create_pseudo_p ()
 662           ? emit_move_insn (dest, src)
 663           : emit_move_insn_1 (dest, src));
 664 }
 665
 666 void
 667 aarch64_split_128bit_move (rtx dst, rtx src)
 668 {
 669   rtx low_dst;
 670
 671   enum machine_mode src_mode = GET_MODE (src);
 672   enum machine_mode dst_mode = GET_MODE (dst);
 673   int src_regno = REGNO (src);
 674   int dst_regno = REGNO (dst);
 675
 676   gcc_assert (dst_mode == TImode || dst_mode == TFmode);
 677
 678   if (REG_P (dst) && REG_P (src))
 679     {
 680       gcc_assert (src_mode == TImode || src_mode == TFmode);
 681
 682       /* Handle r -> w, w -> r.  */
 683       if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
 684         {
 685           switch (src_mode) {
 686           case TImode:
 687             emit_insn
 688               (gen_aarch64_movtilow_di (dst, gen_lowpart (word_mode, src)));
 689             emit_insn
 690               (gen_aarch64_movtihigh_di (dst, gen_highpart (word_mode, src)));
 691             return;
 692           case TFmode:
 693             emit_insn
 694               (gen_aarch64_movtflow_di (dst, gen_lowpart (word_mode, src)));
 695             emit_insn
 696               (gen_aarch64_movtfhigh_di (dst, gen_highpart (word_mode, src)));
 697             return;
 698           default:
 699             gcc_unreachable ();
 700           }
 701         }
 702       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
 703         {
 704           switch (src_mode) {
 705           case TImode:
 706             emit_insn
 707               (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst), src));
 708             emit_insn
 709               (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst), src));
 710             return;
 711           case TFmode:
 712             emit_insn
 713               (gen_aarch64_movdi_tflow (gen_lowpart (word_mode, dst), src));
 714             emit_insn
 715               (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode, dst), src));
 716             return;
 717           default:
 718             gcc_unreachable ();
 719           }
 720         }
 721       /* Fall through to r -> r cases.  */
 722     }
 723
 724   switch (dst_mode) {
 725   case TImode:
 726     low_dst = gen_lowpart (word_mode, dst);
 727     if (REG_P (low_dst)
 728         && reg_overlap_mentioned_p (low_dst, src))
 729       {
 730         aarch64_emit_move (gen_highpart (word_mode, dst),
 731                            gen_highpart_mode (word_mode, TImode, src));
 732         aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
 733       }
 734     else
 735       {
 736         aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
 737         aarch64_emit_move (gen_highpart (word_mode, dst),
 738                            gen_highpart_mode (word_mode, TImode, src));
 739       }
 740     return;
 741   case TFmode:
 742     emit_move_insn (gen_rtx_REG (DFmode, dst_regno),
 743                     gen_rtx_REG (DFmode, src_regno));
 744     emit_move_insn (gen_rtx_REG (DFmode, dst_regno + 1),
 745                     gen_rtx_REG (DFmode, src_regno + 1));
 746     return;
 747   default:
 748     gcc_unreachable ();
 749   }
 750 }
 751
 752 bool
 753 aarch64_split_128bit_move_p (rtx dst, rtx src)
 754 {
 755   return (! REG_P (src)
 756           || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
 757 }
 758
 759 /* Split a complex SIMD combine.  */
 760
 761 void
 762 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
 763 {
 764   enum machine_mode src_mode = GET_MODE (src1);
 765   enum machine_mode dst_mode = GET_MODE (dst);
 766
 767   gcc_assert (VECTOR_MODE_P (dst_mode));
 768
 769   if (REG_P (dst) && REG_P (src1) && REG_P (src2))
 770     {
 771       rtx (*gen) (rtx, rtx, rtx);
 772
 773       switch (src_mode)
 774         {
 775         case V8QImode:
 776           gen = gen_aarch64_simd_combinev8qi;
 777           break;
 778         case V4HImode:
 779           gen = gen_aarch64_simd_combinev4hi;
 780           break;
 781         case V2SImode:
 782           gen = gen_aarch64_simd_combinev2si;
 783           break;
 784         case V2SFmode:
 785           gen = gen_aarch64_simd_combinev2sf;
 786           break;
 787         case DImode:
 788           gen = gen_aarch64_simd_combinedi;
 789           break;
 790         case DFmode:
 791           gen = gen_aarch64_simd_combinedf;
 792           break;
 793         default:
 794           gcc_unreachable ();
 795         }
 796
 797       emit_insn (gen (dst, src1, src2));
 798       return;
 799     }
 800 }
 801
 802 /* Split a complex SIMD move.  */
 803
 804 void
 805 aarch64_split_simd_move (rtx dst, rtx src)
 806 {
 807   enum machine_mode src_mode = GET_MODE (src);
 808   enum machine_mode dst_mode = GET_MODE (dst);
 809
 810   gcc_assert (VECTOR_MODE_P (dst_mode));
 811
 812   if (REG_P (dst) && REG_P (src))
 813     {
 814       rtx (*gen) (rtx, rtx);
 815
 816       gcc_assert (VECTOR_MODE_P (src_mode));
 817
 818       switch (src_mode)
 819         {
 820         case V16QImode:
 821           gen = gen_aarch64_split_simd_movv16qi;
 822           break;
 823         case V8HImode:
 824           gen = gen_aarch64_split_simd_movv8hi;
 825           break;
 826         case V4SImode:
 827           gen = gen_aarch64_split_simd_movv4si;
 828           break;
 829         case V2DImode:
 830           gen = gen_aarch64_split_simd_movv2di;
 831           break;
 832         case V4SFmode:
 833           gen = gen_aarch64_split_simd_movv4sf;
 834           break;
 835         case V2DFmode:
 836           gen = gen_aarch64_split_simd_movv2df;
 837           break;
 838         default:
 839           gcc_unreachable ();
 840         }
 841
 842       emit_insn (gen (dst, src));
 843       return;
 844     }
 845 }
 846
 847 static rtx
 848 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
 849 {
 850   if (can_create_pseudo_p ())
 851     return force_reg (mode, value);
 852   else
 853     {
 854       x = aarch64_emit_move (x, value);
 855       return x;
 856     }
 857 }
 858
 859
 860 static rtx
 861 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
 862 {
 863   if (!aarch64_plus_immediate (GEN_INT (offset), mode))
 864     {
 865       rtx high;
 866       /* Load the full offset into a register.  This
 867          might be improvable in the future.  */
 868       high = GEN_INT (offset);
 869       offset = 0;
 870       high = aarch64_force_temporary (mode, temp, high);
 871       reg = aarch64_force_temporary (mode, temp,
 872                                      gen_rtx_PLUS (mode, high, reg));
 873     }
 874   return plus_constant (mode, reg, offset);
 875 }
 876
 877 void
 878 aarch64_expand_mov_immediate (rtx dest, rtx imm)
 879 {
 880   enum machine_mode mode = GET_MODE (dest);
 881   unsigned HOST_WIDE_INT mask;
 882   int i;
 883   bool first;
 884   unsigned HOST_WIDE_INT val;
 885   bool subtargets;
 886   rtx subtarget;
 887   int one_match, zero_match;
 888
 889   gcc_assert (mode == SImode || mode == DImode);
 890
 891   /* Check on what type of symbol it is.  */
 892   if (GET_CODE (imm) == SYMBOL_REF
 893       || GET_CODE (imm) == LABEL_REF
 894       || GET_CODE (imm) == CONST)
 895     {
 896       rtx mem, base, offset;
 897       enum aarch64_symbol_type sty;
 898
 899       /* If we have (const (plus symbol offset)), separate out the offset
 900          before we start classifying the symbol.  */
 901       split_const (imm, &base, &offset);
 902
 903       sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
 904       switch (sty)
 905         {
 906         case SYMBOL_FORCE_TO_MEM:
 907           if (offset != const0_rtx
 908               && targetm.cannot_force_const_mem (mode, imm))
 909             {
 910               gcc_assert(can_create_pseudo_p ());
 911               base = aarch64_force_temporary (mode, dest, base);
 912               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 913               aarch64_emit_move (dest, base);
 914               return;
 915             }
 916           mem = force_const_mem (ptr_mode, imm);
 917           gcc_assert (mem);
 918           if (mode != ptr_mode)
 919             mem = gen_rtx_ZERO_EXTEND (mode, mem);
 920           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 921           return;
 922
 923         case SYMBOL_SMALL_TLSGD:
 924         case SYMBOL_SMALL_TLSDESC:
 925         case SYMBOL_SMALL_GOTTPREL:
 926         case SYMBOL_SMALL_GOT:
 927         case SYMBOL_TINY_GOT:
 928           if (offset != const0_rtx)
 929             {
 930               gcc_assert(can_create_pseudo_p ());
 931               base = aarch64_force_temporary (mode, dest, base);
 932               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 933               aarch64_emit_move (dest, base);
 934               return;
 935             }
 936           /* FALLTHRU */
 937
 938         case SYMBOL_SMALL_TPREL:
 939         case SYMBOL_SMALL_ABSOLUTE:
 940         case SYMBOL_TINY_ABSOLUTE:
 941           aarch64_load_symref_appropriately (dest, imm, sty);
 942           return;
 943
 944         default:
 945           gcc_unreachable ();
 946         }
 947     }
 948
 949   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
 950     {
 951       emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
 952       return;
 953     }
 954
 955   if (!CONST_INT_P (imm))
 956     {
 957       if (GET_CODE (imm) == HIGH)
 958         emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
 959       else
 960         {
 961           rtx mem = force_const_mem (mode, imm);
 962           gcc_assert (mem);
 963           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 964         }
 965
 966       return;
 967     }
 968
 969   if (mode == SImode)
 970     {
 971       /* We know we can't do this in 1 insn, and we must be able to do it
 972          in two; so don't mess around looking for sequences that don't buy
 973          us anything.  */
 974       emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
 975       emit_insn (gen_insv_immsi (dest, GEN_INT (16),
 976                                  GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
 977       return;
 978     }
 979
 980   /* Remaining cases are all for DImode.  */
 981
 982   val = INTVAL (imm);
 983   subtargets = optimize && can_create_pseudo_p ();
 984
 985   one_match = 0;
 986   zero_match = 0;
 987   mask = 0xffff;
 988
 989   for (i = 0; i < 64; i += 16, mask <<= 16)
 990     {
 991       if ((val & mask) == 0)
 992         zero_match++;
 993       else if ((val & mask) == mask)
 994         one_match++;
 995     }
 996
 997   if (one_match == 2)
 998     {
 999       mask = 0xffff;
1000       for (i = 0; i < 64; i += 16, mask <<= 16)
1001         {
1002           if ((val & mask) != mask)
1003             {
1004               emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1005               emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1006                                          GEN_INT ((val >> i) & 0xffff)));
1007               return;
1008             }
1009         }
1010       gcc_unreachable ();
1011     }
1012
1013   if (zero_match == 2)
1014     goto simple_sequence;
1015
1016   mask = 0x0ffff0000UL;
1017   for (i = 16; i < 64; i += 16, mask <<= 16)
1018     {
1019       HOST_WIDE_INT comp = mask & ~(mask - 1);
1020
1021       if (aarch64_uimm12_shift (val - (val & mask)))
1022         {
1023           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1024
1025           emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1026           emit_insn (gen_adddi3 (dest, subtarget,
1027                                  GEN_INT (val - (val & mask))));
1028           return;
1029         }
1030       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1031         {
1032           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1033
1034           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1035                                   GEN_INT ((val + comp) & mask)));
1036           emit_insn (gen_adddi3 (dest, subtarget,
1037                                  GEN_INT (val - ((val + comp) & mask))));
1038           return;
1039         }
1040       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1041         {
1042           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1043
1044           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1045                                   GEN_INT ((val - comp) | ~mask)));
1046           emit_insn (gen_adddi3 (dest, subtarget,
1047                                  GEN_INT (val - ((val - comp) | ~mask))));
1048           return;
1049         }
1050       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1051         {
1052           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1053
1054           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1055                                   GEN_INT (val | ~mask)));
1056           emit_insn (gen_adddi3 (dest, subtarget,
1057                                  GEN_INT (val - (val | ~mask))));
1058           return;
1059         }
1060     }
1061
1062   /* See if we can do it by arithmetically combining two
1063      immediates.  */
1064   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1065     {
1066       int j;
1067       mask = 0xffff;
1068
1069       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1070           || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1071         {
1072           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1073           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1074                                   GEN_INT (aarch64_bitmasks[i])));
1075           emit_insn (gen_adddi3 (dest, subtarget,
1076                                  GEN_INT (val - aarch64_bitmasks[i])));
1077           return;
1078         }
1079
1080       for (j = 0; j < 64; j += 16, mask <<= 16)
1081         {
1082           if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1083             {
1084               emit_insn (gen_rtx_SET (VOIDmode, dest,
1085                                       GEN_INT (aarch64_bitmasks[i])));
1086               emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1087                                          GEN_INT ((val >> j) & 0xffff)));
1088               return;
1089             }
1090         }
1091     }
1092
1093   /* See if we can do it by logically combining two immediates.  */
1094   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1095     {
1096       if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1097         {
1098           int j;
1099
1100           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1101             if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1102               {
1103                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1104                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1105                                         GEN_INT (aarch64_bitmasks[i])));
1106                 emit_insn (gen_iordi3 (dest, subtarget,
1107                                        GEN_INT (aarch64_bitmasks[j])));
1108                 return;
1109               }
1110         }
1111       else if ((val & aarch64_bitmasks[i]) == val)
1112         {
1113           int j;
1114
1115           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1116             if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1117               {
1118
1119                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1120                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1121                                         GEN_INT (aarch64_bitmasks[j])));
1122                 emit_insn (gen_anddi3 (dest, subtarget,
1123                                        GEN_INT (aarch64_bitmasks[i])));
1124                 return;
1125               }
1126         }
1127     }
1128
1129  simple_sequence:
1130   first = true;
1131   mask = 0xffff;
1132   for (i = 0; i < 64; i += 16, mask <<= 16)
1133     {
1134       if ((val & mask) != 0)
1135         {
1136           if (first)
1137             {
1138               emit_insn (gen_rtx_SET (VOIDmode, dest,
1139                                       GEN_INT (val & mask)));
1140               first = false;
1141             }
1142           else
1143             emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1144                                        GEN_INT ((val >> i) & 0xffff)));
1145         }
1146     }
1147 }
1148
1149 static bool
1150 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1151 {
1152   /* Indirect calls are not currently supported.  */
1153   if (decl == NULL)
1154     return false;
1155
1156   /* Cannot tail-call to long-calls, since these are outside of the
1157      range of a branch instruction (we could handle this if we added
1158      support for indirect tail-calls.  */
1159   if (aarch64_decl_is_long_call_p (decl))
1160     return false;
1161
1162   return true;
1163 }
1164
1165 /* Implement TARGET_PASS_BY_REFERENCE.  */
1166
1167 static bool
1168 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1169                            enum machine_mode mode,
1170                            const_tree type,
1171                            bool named ATTRIBUTE_UNUSED)
1172 {
1173   HOST_WIDE_INT size;
1174   enum machine_mode dummymode;
1175   int nregs;
1176
1177   /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
1178   size = (mode == BLKmode && type)
1179     ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1180
1181   if (type)
1182     {
1183       /* Arrays always passed by reference.  */
1184       if (TREE_CODE (type) == ARRAY_TYPE)
1185         return true;
1186       /* Other aggregates based on their size.  */
1187       if (AGGREGATE_TYPE_P (type))
1188         size = int_size_in_bytes (type);
1189     }
1190
1191   /* Variable sized arguments are always returned by reference.  */
1192   if (size < 0)
1193     return true;
1194
1195   /* Can this be a candidate to be passed in fp/simd register(s)?  */
1196   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1197                                                &dummymode, &nregs,
1198                                                NULL))
1199     return false;
1200
1201   /* Arguments which are variable sized or larger than 2 registers are
1202      passed by reference unless they are a homogenous floating point
1203      aggregate.  */
1204   return size > 2 * UNITS_PER_WORD;
1205 }
1206
1207 /* Return TRUE if VALTYPE is padded to its least significant bits.  */
1208 static bool
1209 aarch64_return_in_msb (const_tree valtype)
1210 {
1211   enum machine_mode dummy_mode;
1212   int dummy_int;
1213
1214   /* Never happens in little-endian mode.  */
1215   if (!BYTES_BIG_ENDIAN)
1216     return false;
1217
1218   /* Only composite types smaller than or equal to 16 bytes can
1219      be potentially returned in registers.  */
1220   if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1221       || int_size_in_bytes (valtype) <= 0
1222       || int_size_in_bytes (valtype) > 16)
1223     return false;
1224
1225   /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1226      or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1227      is always passed/returned in the least significant bits of fp/simd
1228      register(s).  */
1229   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1230                                                &dummy_mode, &dummy_int, NULL))
1231     return false;
1232
1233   return true;
1234 }
1235
1236 /* Implement TARGET_FUNCTION_VALUE.
1237    Define how to find the value returned by a function.  */
1238
1239 static rtx
1240 aarch64_function_value (const_tree type, const_tree func,
1241                         bool outgoing ATTRIBUTE_UNUSED)
1242 {
1243   enum machine_mode mode;
1244   int unsignedp;
1245   int count;
1246   enum machine_mode ag_mode;
1247
1248   mode = TYPE_MODE (type);
1249   if (INTEGRAL_TYPE_P (type))
1250     mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1251
1252   if (aarch64_return_in_msb (type))
1253     {
1254       HOST_WIDE_INT size = int_size_in_bytes (type);
1255
1256       if (size % UNITS_PER_WORD != 0)
1257         {
1258           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1259           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1260         }
1261     }
1262
1263   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1264                                                &ag_mode, &count, NULL))
1265     {
1266       if (!aarch64_composite_type_p (type, mode))
1267         {
1268           gcc_assert (count == 1 && mode == ag_mode);
1269           return gen_rtx_REG (mode, V0_REGNUM);
1270         }
1271       else
1272         {
1273           int i;
1274           rtx par;
1275
1276           par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1277           for (i = 0; i < count; i++)
1278             {
1279               rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1280               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1281                                        GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1282               XVECEXP (par, 0, i) = tmp;
1283             }
1284           return par;
1285         }
1286     }
1287   else
1288     return gen_rtx_REG (mode, R0_REGNUM);
1289 }
1290
1291 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1292    Return true if REGNO is the number of a hard register in which the values
1293    of called function may come back.  */
1294
1295 static bool
1296 aarch64_function_value_regno_p (const unsigned int regno)
1297 {
1298   /* Maximum of 16 bytes can be returned in the general registers.  Examples
1299      of 16-byte return values are: 128-bit integers and 16-byte small
1300      structures (excluding homogeneous floating-point aggregates).  */
1301   if (regno == R0_REGNUM || regno == R1_REGNUM)
1302     return true;
1303
1304   /* Up to four fp/simd registers can return a function value, e.g. a
1305      homogeneous floating-point aggregate having four members.  */
1306   if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1307     return !TARGET_GENERAL_REGS_ONLY;
1308
1309   return false;
1310 }
1311
1312 /* Implement TARGET_RETURN_IN_MEMORY.
1313
1314    If the type T of the result of a function is such that
1315      void func (T arg)
1316    would require that arg be passed as a value in a register (or set of
1317    registers) according to the parameter passing rules, then the result
1318    is returned in the same registers as would be used for such an
1319    argument.  */
1320
1321 static bool
1322 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1323 {
1324   HOST_WIDE_INT size;
1325   enum machine_mode ag_mode;
1326   int count;
1327
1328   if (!AGGREGATE_TYPE_P (type)
1329       && TREE_CODE (type) != COMPLEX_TYPE
1330       && TREE_CODE (type) != VECTOR_TYPE)
1331     /* Simple scalar types always returned in registers.  */
1332     return false;
1333
1334   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1335                                                type,
1336                                                &ag_mode,
1337                                                &count,
1338                                                NULL))
1339     return false;
1340
1341   /* Types larger than 2 registers returned in memory.  */
1342   size = int_size_in_bytes (type);
1343   return (size < 0 || size > 2 * UNITS_PER_WORD);
1344 }
1345
1346 static bool
1347 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1348                                const_tree type, int *nregs)
1349 {
1350   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1351   return aarch64_vfp_is_call_or_return_candidate (mode,
1352                                                   type,
1353                                                   &pcum->aapcs_vfp_rmode,
1354                                                   nregs,
1355                                                   NULL);
1356 }
1357
1358 /* Given MODE and TYPE of a function argument, return the alignment in
1359    bits.  The idea is to suppress any stronger alignment requested by
1360    the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1361    This is a helper function for local use only.  */
1362
1363 static unsigned int
1364 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1365 {
1366   unsigned int alignment;
1367
1368   if (type)
1369     {
1370       if (!integer_zerop (TYPE_SIZE (type)))
1371         {
1372           if (TYPE_MODE (type) == mode)
1373             alignment = TYPE_ALIGN (type);
1374           else
1375             alignment = GET_MODE_ALIGNMENT (mode);
1376         }
1377       else
1378         alignment = 0;
1379     }
1380   else
1381     alignment = GET_MODE_ALIGNMENT (mode);
1382
1383   return alignment;
1384 }
1385
1386 /* Layout a function argument according to the AAPCS64 rules.  The rule
1387    numbers refer to the rule numbers in the AAPCS64.  */
1388
1389 static void
1390 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1391                     const_tree type,
1392                     bool named ATTRIBUTE_UNUSED)
1393 {
1394   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1395   int ncrn, nvrn, nregs;
1396   bool allocate_ncrn, allocate_nvrn;
1397
1398   /* We need to do this once per argument.  */
1399   if (pcum->aapcs_arg_processed)
1400     return;
1401
1402   pcum->aapcs_arg_processed = true;
1403
1404   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1405   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1406                                                  mode,
1407                                                  type,
1408                                                  &nregs);
1409
1410   /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1411      The following code thus handles passing by SIMD/FP registers first.  */
1412
1413   nvrn = pcum->aapcs_nvrn;
1414
1415   /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1416      and homogenous short-vector aggregates (HVA).  */
1417   if (allocate_nvrn)
1418     {
1419       if (nvrn + nregs <= NUM_FP_ARG_REGS)
1420         {
1421           pcum->aapcs_nextnvrn = nvrn + nregs;
1422           if (!aarch64_composite_type_p (type, mode))
1423             {
1424               gcc_assert (nregs == 1);
1425               pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1426             }
1427           else
1428             {
1429               rtx par;
1430               int i;
1431               par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1432               for (i = 0; i < nregs; i++)
1433                 {
1434                   rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1435                                          V0_REGNUM + nvrn + i);
1436                   tmp = gen_rtx_EXPR_LIST
1437                     (VOIDmode, tmp,
1438                      GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1439                   XVECEXP (par, 0, i) = tmp;
1440                 }
1441               pcum->aapcs_reg = par;
1442             }
1443           return;
1444         }
1445       else
1446         {
1447           /* C.3 NSRN is set to 8.  */
1448           pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1449           goto on_stack;
1450         }
1451     }
1452
1453   ncrn = pcum->aapcs_ncrn;
1454   nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1455            + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1456
1457
1458   /* C6 - C9.  though the sign and zero extension semantics are
1459      handled elsewhere.  This is the case where the argument fits
1460      entirely general registers.  */
1461   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1462     {
1463       unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1464
1465       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1466
1467       /* C.8 if the argument has an alignment of 16 then the NGRN is
1468          rounded up to the next even number.  */
1469       if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1470         {
1471           ++ncrn;
1472           gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1473         }
1474       /* NREGS can be 0 when e.g. an empty structure is to be passed.
1475          A reg is still generated for it, but the caller should be smart
1476          enough not to use it.  */
1477       if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1478         {
1479           pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1480         }
1481       else
1482         {
1483           rtx par;
1484           int i;
1485
1486           par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1487           for (i = 0; i < nregs; i++)
1488             {
1489               rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1490               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1491                                        GEN_INT (i * UNITS_PER_WORD));
1492               XVECEXP (par, 0, i) = tmp;
1493             }
1494           pcum->aapcs_reg = par;
1495         }
1496
1497       pcum->aapcs_nextncrn = ncrn + nregs;
1498       return;
1499     }
1500
1501   /* C.11  */
1502   pcum->aapcs_nextncrn = NUM_ARG_REGS;
1503
1504   /* The argument is passed on stack; record the needed number of words for
1505      this argument (we can re-use NREGS) and align the total size if
1506      necessary.  */
1507 on_stack:
1508   pcum->aapcs_stack_words = nregs;
1509   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1510     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1511                                                16 / UNITS_PER_WORD) + 1;
1512   return;
1513 }
1514
1515 /* Implement TARGET_FUNCTION_ARG.  */
1516
1517 static rtx
1518 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1519                       const_tree type, bool named)
1520 {
1521   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1522   gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1523
1524   if (mode == VOIDmode)
1525     return NULL_RTX;
1526
1527   aarch64_layout_arg (pcum_v, mode, type, named);
1528   return pcum->aapcs_reg;
1529 }
1530
1531 void
1532 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1533                            const_tree fntype ATTRIBUTE_UNUSED,
1534                            rtx libname ATTRIBUTE_UNUSED,
1535                            const_tree fndecl ATTRIBUTE_UNUSED,
1536                            unsigned n_named ATTRIBUTE_UNUSED)
1537 {
1538   pcum->aapcs_ncrn = 0;
1539   pcum->aapcs_nvrn = 0;
1540   pcum->aapcs_nextncrn = 0;
1541   pcum->aapcs_nextnvrn = 0;
1542   pcum->pcs_variant = ARM_PCS_AAPCS64;
1543   pcum->aapcs_reg = NULL_RTX;
1544   pcum->aapcs_arg_processed = false;
1545   pcum->aapcs_stack_words = 0;
1546   pcum->aapcs_stack_size = 0;
1547
1548   return;
1549 }
1550
1551 static void
1552 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1553                               enum machine_mode mode,
1554                               const_tree type,
1555                               bool named)
1556 {
1557   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1558   if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1559     {
1560       aarch64_layout_arg (pcum_v, mode, type, named);
1561       gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1562                   != (pcum->aapcs_stack_words != 0));
1563       pcum->aapcs_arg_processed = false;
1564       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1565       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1566       pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1567       pcum->aapcs_stack_words = 0;
1568       pcum->aapcs_reg = NULL_RTX;
1569     }
1570 }
1571
1572 bool
1573 aarch64_function_arg_regno_p (unsigned regno)
1574 {
1575   return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1576           || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1577 }
1578
1579 /* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
1580    PARM_BOUNDARY bits of alignment, but will be given anything up
1581    to STACK_BOUNDARY bits if the type requires it.  This makes sure
1582    that both before and after the layout of each argument, the Next
1583    Stacked Argument Address (NSAA) will have a minimum alignment of
1584    8 bytes.  */
1585
1586 static unsigned int
1587 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1588 {
1589   unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1590
1591   if (alignment < PARM_BOUNDARY)
1592     alignment = PARM_BOUNDARY;
1593   if (alignment > STACK_BOUNDARY)
1594     alignment = STACK_BOUNDARY;
1595   return alignment;
1596 }
1597
1598 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1599
1600    Return true if an argument passed on the stack should be padded upwards,
1601    i.e. if the least-significant byte of the stack slot has useful data.
1602
1603    Small aggregate types are placed in the lowest memory address.
1604
1605    The related parameter passing rules are B.4, C.3, C.5 and C.14.  */
1606
1607 bool
1608 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1609 {
1610   /* On little-endian targets, the least significant byte of every stack
1611      argument is passed at the lowest byte address of the stack slot.  */
1612   if (!BYTES_BIG_ENDIAN)
1613     return true;
1614
1615   /* Otherwise, integral, floating-point and pointer types are padded downward:
1616      the least significant byte of a stack argument is passed at the highest
1617      byte address of the stack slot.  */
1618   if (type
1619       ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1620          || POINTER_TYPE_P (type))
1621       : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1622     return false;
1623
1624   /* Everything else padded upward, i.e. data in first byte of stack slot.  */
1625   return true;
1626 }
1627
1628 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1629
1630    It specifies padding for the last (may also be the only)
1631    element of a block move between registers and memory.  If
1632    assuming the block is in the memory, padding upward means that
1633    the last element is padded after its highest significant byte,
1634    while in downward padding, the last element is padded at the
1635    its least significant byte side.
1636
1637    Small aggregates and small complex types are always padded
1638    upwards.
1639
1640    We don't need to worry about homogeneous floating-point or
1641    short-vector aggregates; their move is not affected by the
1642    padding direction determined here.  Regardless of endianness,
1643    each element of such an aggregate is put in the least
1644    significant bits of a fp/simd register.
1645
1646    Return !BYTES_BIG_ENDIAN if the least significant byte of the
1647    register has useful data, and return the opposite if the most
1648    significant byte does.  */
1649
1650 bool
1651 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1652                      bool first ATTRIBUTE_UNUSED)
1653 {
1654
1655   /* Small composite types are always padded upward.  */
1656   if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1657     {
1658       HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1659                             : GET_MODE_SIZE (mode));
1660       if (size < 2 * UNITS_PER_WORD)
1661         return true;
1662     }
1663
1664   /* Otherwise, use the default padding.  */
1665   return !BYTES_BIG_ENDIAN;
1666 }
1667
1668 static enum machine_mode
1669 aarch64_libgcc_cmp_return_mode (void)
1670 {
1671   return SImode;
1672 }
1673
1674 static bool
1675 aarch64_frame_pointer_required (void)
1676 {
1677   /* If the function contains dynamic stack allocations, we need to
1678      use the frame pointer to access the static parts of the frame.  */
1679   if (cfun->calls_alloca)
1680     return true;
1681
1682   /* We may have turned flag_omit_frame_pointer on in order to have this
1683      function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1684      and we'll check it here.
1685      If we really did set flag_omit_frame_pointer normally, then we return false
1686      (no frame pointer required) in all cases.  */
1687
1688   if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1689     return false;
1690   else if (flag_omit_leaf_frame_pointer)
1691     return !crtl->is_leaf;
1692   return true;
1693 }
1694
1695 /* Mark the registers that need to be saved by the callee and calculate
1696    the size of the callee-saved registers area and frame record (both FP
1697    and LR may be omitted).  */
1698 static void
1699 aarch64_layout_frame (void)
1700 {
1701   HOST_WIDE_INT offset = 0;
1702   int regno;
1703
1704   if (reload_completed && cfun->machine->frame.laid_out)
1705     return;
1706
1707   cfun->machine->frame.fp_lr_offset = 0;
1708
1709   /* First mark all the registers that really need to be saved...  */
1710   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1711     cfun->machine->frame.reg_offset[regno] = -1;
1712
1713   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1714     cfun->machine->frame.reg_offset[regno] = -1;
1715
1716   /* ... that includes the eh data registers (if needed)...  */
1717   if (crtl->calls_eh_return)
1718     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1719       cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1720
1721   /* ... and any callee saved register that dataflow says is live.  */
1722   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1723     if (df_regs_ever_live_p (regno)
1724         && !call_used_regs[regno])
1725       cfun->machine->frame.reg_offset[regno] = 0;
1726
1727   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1728     if (df_regs_ever_live_p (regno)
1729         && !call_used_regs[regno])
1730       cfun->machine->frame.reg_offset[regno] = 0;
1731
1732   if (frame_pointer_needed)
1733     {
1734       cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1735       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1736       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1737     }
1738
1739   /* Now assign stack slots for them.  */
1740   for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1741     if (cfun->machine->frame.reg_offset[regno] != -1)
1742       {
1743         cfun->machine->frame.reg_offset[regno] = offset;
1744         offset += UNITS_PER_WORD;
1745       }
1746
1747   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1748     if (cfun->machine->frame.reg_offset[regno] != -1)
1749       {
1750         cfun->machine->frame.reg_offset[regno] = offset;
1751         offset += UNITS_PER_WORD;
1752       }
1753
1754   if (frame_pointer_needed)
1755     {
1756       cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1757       offset += UNITS_PER_WORD;
1758       cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1759     }
1760
1761   if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1762     {
1763       cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1764       offset += UNITS_PER_WORD;
1765       cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1766     }
1767
1768   cfun->machine->frame.padding0 =
1769     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1770   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1771
1772   cfun->machine->frame.saved_regs_size = offset;
1773   cfun->machine->frame.laid_out = true;
1774 }
1775
1776 /* Make the last instruction frame-related and note that it performs
1777    the operation described by FRAME_PATTERN.  */
1778
1779 static void
1780 aarch64_set_frame_expr (rtx frame_pattern)
1781 {
1782   rtx insn;
1783
1784   insn = get_last_insn ();
1785   RTX_FRAME_RELATED_P (insn) = 1;
1786   RTX_FRAME_RELATED_P (frame_pattern) = 1;
1787   REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1788                                       frame_pattern,
1789                                       REG_NOTES (insn));
1790 }
1791
1792 static bool
1793 aarch64_register_saved_on_entry (int regno)
1794 {
1795   return cfun->machine->frame.reg_offset[regno] != -1;
1796 }
1797
1798
1799 static void
1800 aarch64_save_or_restore_fprs (int start_offset, int increment,
1801                               bool restore, rtx base_rtx)
1802
1803 {
1804   unsigned regno;
1805   unsigned regno2;
1806   rtx insn;
1807   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1808
1809
1810   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1811     {
1812       if (aarch64_register_saved_on_entry (regno))
1813         {
1814           rtx mem;
1815           mem = gen_mem_ref (DFmode,
1816                              plus_constant (Pmode,
1817                                             base_rtx,
1818                                             start_offset));
1819
1820           for (regno2 = regno + 1;
1821                regno2 <= V31_REGNUM
1822                  && !aarch64_register_saved_on_entry (regno2);
1823                regno2++)
1824             {
1825               /* Empty loop.  */
1826             }
1827           if (regno2 <= V31_REGNUM &&
1828               aarch64_register_saved_on_entry (regno2))
1829             {
1830               rtx mem2;
1831               /* Next highest register to be saved.  */
1832               mem2 = gen_mem_ref (DFmode,
1833                                   plus_constant
1834                                   (Pmode,
1835                                    base_rtx,
1836                                    start_offset + increment));
1837               if (restore == false)
1838                 {
1839                   insn = emit_insn
1840                     ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1841                                         mem2, gen_rtx_REG (DFmode, regno2)));
1842
1843                 }
1844               else
1845                 {
1846                   insn = emit_insn
1847                     ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1848                                        gen_rtx_REG (DFmode, regno2), mem2));
1849
1850                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno));
1851                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2));
1852                 }
1853
1854                   /* The first part of a frame-related parallel insn
1855                      is always assumed to be relevant to the frame
1856                      calculations; subsequent parts, are only
1857                      frame-related if explicitly marked.  */
1858               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1859                                             1)) = 1;
1860               regno = regno2;
1861               start_offset += increment * 2;
1862             }
1863           else
1864             {
1865               if (restore == false)
1866                 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1867               else
1868                 {
1869                   insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1870                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1871                 }
1872               start_offset += increment;
1873             }
1874           RTX_FRAME_RELATED_P (insn) = 1;
1875         }
1876     }
1877
1878 }
1879
1880
1881 /* offset from the stack pointer of where the saves and
1882    restore's have to happen.  */
1883 static void
1884 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1885                                             bool restore)
1886 {
1887   rtx insn;
1888   rtx base_rtx = stack_pointer_rtx;
1889   HOST_WIDE_INT start_offset = offset;
1890   HOST_WIDE_INT increment = UNITS_PER_WORD;
1891   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1892   unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1893   unsigned regno;
1894   unsigned regno2;
1895
1896   for (regno = R0_REGNUM; regno <= limit; regno++)
1897     {
1898       if (aarch64_register_saved_on_entry (regno))
1899         {
1900           rtx mem;
1901           mem = gen_mem_ref (Pmode,
1902                              plus_constant (Pmode,
1903                                             base_rtx,
1904                                             start_offset));
1905
1906           for (regno2 = regno + 1;
1907                regno2 <= limit
1908                  && !aarch64_register_saved_on_entry (regno2);
1909                regno2++)
1910             {
1911               /* Empty loop.  */
1912             }
1913           if (regno2 <= limit &&
1914               aarch64_register_saved_on_entry (regno2))
1915             {
1916               rtx mem2;
1917               /* Next highest register to be saved.  */
1918               mem2 = gen_mem_ref (Pmode,
1919                                   plus_constant
1920                                   (Pmode,
1921                                    base_rtx,
1922                                    start_offset + increment));
1923               if (restore == false)
1924                 {
1925                   insn = emit_insn
1926                     ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1927                                         mem2, gen_rtx_REG (DImode, regno2)));
1928
1929                 }
1930               else
1931                 {
1932                   insn = emit_insn
1933                     ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1934                                      gen_rtx_REG (DImode, regno2), mem2));
1935
1936                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1937                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1938                 }
1939
1940                   /* The first part of a frame-related parallel insn
1941                      is always assumed to be relevant to the frame
1942                      calculations; subsequent parts, are only
1943                      frame-related if explicitly marked.  */
1944               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1945                                             1)) = 1;
1946               regno = regno2;
1947               start_offset += increment * 2;
1948             }
1949           else
1950             {
1951               if (restore == false)
1952                 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1953               else
1954                 {
1955                   insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1956                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1957                 }
1958               start_offset += increment;
1959             }
1960           RTX_FRAME_RELATED_P (insn) = 1;
1961         }
1962     }
1963
1964   aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1965
1966 }
1967
1968 /* AArch64 stack frames generated by this compiler look like:
1969
1970         +-------------------------------+
1971         |                               |
1972         |  incoming stack arguments     |
1973         |                               |
1974         +-------------------------------+ <-- arg_pointer_rtx
1975         |                               |
1976         |  callee-allocated save area   |
1977         |  for register varargs         |
1978         |                               |
1979         +-------------------------------+
1980         |                               |
1981         |  local variables              |
1982         |                               |
1983         +-------------------------------+ <-- frame_pointer_rtx
1984         |                               |
1985         |  callee-saved registers       |
1986         |                               |
1987         +-------------------------------+
1988         |  LR'                          |
1989         +-------------------------------+
1990         |  FP'                          |
1991       P +-------------------------------+ <-- hard_frame_pointer_rtx
1992         |  dynamic allocation           |
1993         +-------------------------------+
1994         |                               |
1995         |  outgoing stack arguments     |
1996         |                               |
1997         +-------------------------------+ <-- stack_pointer_rtx
1998
1999    Dynamic stack allocations such as alloca insert data at point P.
2000    They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2001    hard_frame_pointer_rtx unchanged.  */
2002
2003 /* Generate the prologue instructions for entry into a function.
2004    Establish the stack frame by decreasing the stack pointer with a
2005    properly calculated size and, if necessary, create a frame record
2006    filled with the values of LR and previous frame pointer.  The
2007    current FP is also set up is it is in use.  */
2008
2009 void
2010 aarch64_expand_prologue (void)
2011 {
2012   /* sub sp, sp, #<frame_size>
2013      stp {fp, lr}, [sp, #<frame_size> - 16]
2014      add fp, sp, #<frame_size> - hardfp_offset
2015      stp {cs_reg}, [fp, #-16] etc.
2016
2017      sub sp, sp, <final_adjustment_if_any>
2018   */
2019   HOST_WIDE_INT original_frame_size;    /* local variables + vararg save */
2020   HOST_WIDE_INT frame_size, offset;
2021   HOST_WIDE_INT fp_offset;              /* FP offset from SP */
2022   rtx insn;
2023
2024   aarch64_layout_frame ();
2025   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2026   gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2027               && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2028   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2029                 + crtl->outgoing_args_size);
2030   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2031                                           STACK_BOUNDARY / BITS_PER_UNIT);
2032
2033   if (flag_stack_usage_info)
2034     current_function_static_stack_size = frame_size;
2035
2036   fp_offset = (offset
2037                - original_frame_size
2038                - cfun->machine->frame.saved_regs_size);
2039
2040   /* Store pairs and load pairs have a range only -512 to 504.  */
2041   if (offset >= 512)
2042     {
2043       /* When the frame has a large size, an initial decrease is done on
2044          the stack pointer to jump over the callee-allocated save area for
2045          register varargs, the local variable area and/or the callee-saved
2046          register area.  This will allow the pre-index write-back
2047          store pair instructions to be used for setting up the stack frame
2048          efficiently.  */
2049       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2050       if (offset >= 512)
2051         offset = cfun->machine->frame.saved_regs_size;
2052
2053       frame_size -= (offset + crtl->outgoing_args_size);
2054       fp_offset = 0;
2055
2056       if (frame_size >= 0x1000000)
2057         {
2058           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2059           emit_move_insn (op0, GEN_INT (-frame_size));
2060           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2061           aarch64_set_frame_expr (gen_rtx_SET
2062                                   (Pmode, stack_pointer_rtx,
2063                                    gen_rtx_PLUS (Pmode,
2064                                                  stack_pointer_rtx,
2065                                                  GEN_INT (-frame_size))));
2066         }
2067       else if (frame_size > 0)
2068         {
2069           if ((frame_size & 0xfff) != frame_size)
2070             {
2071               insn = emit_insn (gen_add2_insn
2072                                 (stack_pointer_rtx,
2073                                  GEN_INT (-(frame_size
2074                                             & ~(HOST_WIDE_INT)0xfff))));
2075               RTX_FRAME_RELATED_P (insn) = 1;
2076             }
2077           if ((frame_size & 0xfff) != 0)
2078             {
2079               insn = emit_insn (gen_add2_insn
2080                                 (stack_pointer_rtx,
2081                                  GEN_INT (-(frame_size
2082                                             & (HOST_WIDE_INT)0xfff))));
2083               RTX_FRAME_RELATED_P (insn) = 1;
2084             }
2085         }
2086     }
2087   else
2088     frame_size = -1;
2089
2090   if (offset > 0)
2091     {
2092       /* Save the frame pointer and lr if the frame pointer is needed
2093          first.  Make the frame pointer point to the location of the
2094          old frame pointer on the stack.  */
2095       if (frame_pointer_needed)
2096         {
2097           rtx mem_fp, mem_lr;
2098
2099           if (fp_offset)
2100             {
2101               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2102                                                GEN_INT (-offset)));
2103               RTX_FRAME_RELATED_P (insn) = 1;
2104               aarch64_set_frame_expr (gen_rtx_SET
2105                                       (Pmode, stack_pointer_rtx,
2106                                        gen_rtx_MINUS (Pmode,
2107                                                       stack_pointer_rtx,
2108                                                       GEN_INT (offset))));
2109               mem_fp = gen_frame_mem (DImode,
2110                                       plus_constant (Pmode,
2111                                                      stack_pointer_rtx,
2112                                                      fp_offset));
2113               mem_lr = gen_frame_mem (DImode,
2114                                       plus_constant (Pmode,
2115                                                      stack_pointer_rtx,
2116                                                      fp_offset
2117                                                      + UNITS_PER_WORD));
2118               insn = emit_insn (gen_store_pairdi (mem_fp,
2119                                                   hard_frame_pointer_rtx,
2120                                                   mem_lr,
2121                                                   gen_rtx_REG (DImode,
2122                                                                LR_REGNUM)));
2123             }
2124           else
2125             {
2126               insn = emit_insn (gen_storewb_pairdi_di
2127                                 (stack_pointer_rtx, stack_pointer_rtx,
2128                                  hard_frame_pointer_rtx,
2129                                  gen_rtx_REG (DImode, LR_REGNUM),
2130                                  GEN_INT (-offset),
2131                                  GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2132               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2133             }
2134
2135           /* The first part of a frame-related parallel insn is always
2136              assumed to be relevant to the frame calculations;
2137              subsequent parts, are only frame-related if explicitly
2138              marked.  */
2139           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2140           RTX_FRAME_RELATED_P (insn) = 1;
2141
2142           /* Set up frame pointer to point to the location of the
2143              previous frame pointer on the stack.  */
2144           insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2145                                            stack_pointer_rtx,
2146                                            GEN_INT (fp_offset)));
2147           aarch64_set_frame_expr (gen_rtx_SET
2148                                   (Pmode, hard_frame_pointer_rtx,
2149                                    gen_rtx_PLUS (Pmode,
2150                                                  stack_pointer_rtx,
2151                                                  GEN_INT (fp_offset))));
2152           RTX_FRAME_RELATED_P (insn) = 1;
2153           insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2154                                            hard_frame_pointer_rtx));
2155         }
2156       else
2157         {
2158           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2159                                            GEN_INT (-offset)));
2160           RTX_FRAME_RELATED_P (insn) = 1;
2161         }
2162
2163       aarch64_save_or_restore_callee_save_registers
2164         (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2165     }
2166
2167   /* when offset >= 512,
2168      sub sp, sp, #<outgoing_args_size> */
2169   if (frame_size > -1)
2170     {
2171       if (crtl->outgoing_args_size > 0)
2172         {
2173           insn = emit_insn (gen_add2_insn
2174                             (stack_pointer_rtx,
2175                              GEN_INT (- crtl->outgoing_args_size)));
2176           RTX_FRAME_RELATED_P (insn) = 1;
2177         }
2178     }
2179 }
2180
2181 /* Generate the epilogue instructions for returning from a function.  */
2182 void
2183 aarch64_expand_epilogue (bool for_sibcall)
2184 {
2185   HOST_WIDE_INT original_frame_size, frame_size, offset;
2186   HOST_WIDE_INT fp_offset;
2187   rtx insn;
2188   rtx cfa_reg;
2189
2190   aarch64_layout_frame ();
2191   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2192   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2193                 + crtl->outgoing_args_size);
2194   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2195                                           STACK_BOUNDARY / BITS_PER_UNIT);
2196
2197   fp_offset = (offset
2198                - original_frame_size
2199                - cfun->machine->frame.saved_regs_size);
2200
2201   cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2202
2203   /* Store pairs and load pairs have a range only -512 to 504.  */
2204   if (offset >= 512)
2205     {
2206       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2207       if (offset >= 512)
2208         offset = cfun->machine->frame.saved_regs_size;
2209
2210       frame_size -= (offset + crtl->outgoing_args_size);
2211       fp_offset = 0;
2212       if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2213         {
2214           insn = emit_insn (gen_add2_insn
2215                             (stack_pointer_rtx,
2216                              GEN_INT (crtl->outgoing_args_size)));
2217           RTX_FRAME_RELATED_P (insn) = 1;
2218         }
2219     }
2220   else
2221     frame_size = -1;
2222
2223   /* If there were outgoing arguments or we've done dynamic stack
2224      allocation, then restore the stack pointer from the frame
2225      pointer.  This is at most one insn and more efficient than using
2226      GCC's internal mechanism.  */
2227   if (frame_pointer_needed
2228       && (crtl->outgoing_args_size || cfun->calls_alloca))
2229     {
2230       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2231                                        hard_frame_pointer_rtx,
2232                                        GEN_INT (- fp_offset)));
2233       RTX_FRAME_RELATED_P (insn) = 1;
2234       /* As SP is set to (FP - fp_offset), according to the rules in
2235          dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2236          from the value of SP from now on.  */
2237       cfa_reg = stack_pointer_rtx;
2238     }
2239
2240   aarch64_save_or_restore_callee_save_registers
2241     (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2242
2243   /* Restore the frame pointer and lr if the frame pointer is needed.  */
2244   if (offset > 0)
2245     {
2246       if (frame_pointer_needed)
2247         {
2248           rtx mem_fp, mem_lr;
2249
2250           if (fp_offset)
2251             {
2252               mem_fp = gen_frame_mem (DImode,
2253                                       plus_constant (Pmode,
2254                                                      stack_pointer_rtx,
2255                                                      fp_offset));
2256               mem_lr = gen_frame_mem (DImode,
2257                                       plus_constant (Pmode,
2258                                                      stack_pointer_rtx,
2259                                                      fp_offset
2260                                                      + UNITS_PER_WORD));
2261               insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2262                                                  mem_fp,
2263                                                  gen_rtx_REG (DImode,
2264                                                               LR_REGNUM),
2265                                                  mem_lr));
2266             }
2267           else
2268             {
2269               insn = emit_insn (gen_loadwb_pairdi_di
2270                                 (stack_pointer_rtx,
2271                                  stack_pointer_rtx,
2272                                  hard_frame_pointer_rtx,
2273                                  gen_rtx_REG (DImode, LR_REGNUM),
2274                                  GEN_INT (offset),
2275                                  GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2276               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2277               add_reg_note (insn, REG_CFA_ADJUST_CFA,
2278                             (gen_rtx_SET (Pmode, stack_pointer_rtx,
2279                                           plus_constant (Pmode, cfa_reg,
2280                                                          offset))));
2281             }
2282
2283           /* The first part of a frame-related parallel insn
2284              is always assumed to be relevant to the frame
2285              calculations; subsequent parts, are only
2286              frame-related if explicitly marked.  */
2287           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2288           RTX_FRAME_RELATED_P (insn) = 1;
2289           add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2290           add_reg_note (insn, REG_CFA_RESTORE,
2291                         gen_rtx_REG (DImode, LR_REGNUM));
2292
2293           if (fp_offset)
2294             {
2295               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2296                                                GEN_INT (offset)));
2297               RTX_FRAME_RELATED_P (insn) = 1;
2298             }
2299         }
2300       else
2301         {
2302           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2303                                            GEN_INT (offset)));
2304           RTX_FRAME_RELATED_P (insn) = 1;
2305         }
2306     }
2307
2308   /* Stack adjustment for exception handler.  */
2309   if (crtl->calls_eh_return)
2310     {
2311       /* We need to unwind the stack by the offset computed by
2312          EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
2313          based on SP.  Ideally we would update the SP and define the
2314          CFA along the lines of:
2315
2316          SP = SP + EH_RETURN_STACKADJ_RTX
2317          (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2318
2319          However the dwarf emitter only understands a constant
2320          register offset.
2321
2322          The solution chosen here is to use the otherwise unused IP0
2323          as a temporary register to hold the current SP value.  The
2324          CFA is described using IP0 then SP is modified.  */
2325
2326       rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2327
2328       insn = emit_move_insn (ip0, stack_pointer_rtx);
2329       add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2330       RTX_FRAME_RELATED_P (insn) = 1;
2331
2332       emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2333
2334       /* Ensure the assignment to IP0 does not get optimized away.  */
2335       emit_use (ip0);
2336     }
2337
2338   if (frame_size > -1)
2339     {
2340       if (frame_size >= 0x1000000)
2341         {
2342           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2343           emit_move_insn (op0, GEN_INT (frame_size));
2344           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2345           aarch64_set_frame_expr (gen_rtx_SET
2346                                   (Pmode, stack_pointer_rtx,
2347                                    gen_rtx_PLUS (Pmode,
2348                                                  stack_pointer_rtx,
2349                                                  GEN_INT (frame_size))));
2350         }
2351       else if (frame_size > 0)
2352         {
2353           if ((frame_size & 0xfff) != 0)
2354             {
2355               insn = emit_insn (gen_add2_insn
2356                                 (stack_pointer_rtx,
2357                                  GEN_INT ((frame_size
2358                                            & (HOST_WIDE_INT) 0xfff))));
2359               RTX_FRAME_RELATED_P (insn) = 1;
2360             }
2361           if ((frame_size & 0xfff) != frame_size)
2362             {
2363               insn = emit_insn (gen_add2_insn
2364                                 (stack_pointer_rtx,
2365                                  GEN_INT ((frame_size
2366                                            & ~ (HOST_WIDE_INT) 0xfff))));
2367               RTX_FRAME_RELATED_P (insn) = 1;
2368             }
2369         }
2370
2371       aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2372                                            gen_rtx_PLUS (Pmode,
2373                                                          stack_pointer_rtx,
2374                                                          GEN_INT (offset))));
2375     }
2376
2377   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2378   if (!for_sibcall)
2379     emit_jump_insn (ret_rtx);
2380 }
2381
2382 /* Return the place to copy the exception unwinding return address to.
2383    This will probably be a stack slot, but could (in theory be the
2384    return register).  */
2385 rtx
2386 aarch64_final_eh_return_addr (void)
2387 {
2388   HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2389   aarch64_layout_frame ();
2390   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2391   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2392                 + crtl->outgoing_args_size);
2393   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2394                                           STACK_BOUNDARY / BITS_PER_UNIT);
2395   fp_offset = offset
2396     - original_frame_size
2397     - cfun->machine->frame.saved_regs_size;
2398
2399   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2400     return gen_rtx_REG (DImode, LR_REGNUM);
2401
2402   /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
2403      result in a store to save LR introduced by builtin_eh_return () being
2404      incorrectly deleted because the alias is not detected.
2405      So in the calculation of the address to copy the exception unwinding
2406      return address to, we note 2 cases.
2407      If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2408      we return a SP-relative location since all the addresses are SP-relative
2409      in this case.  This prevents the store from being optimized away.
2410      If the fp_offset is not 0, then the addresses will be FP-relative and
2411      therefore we return a FP-relative location.  */
2412
2413   if (frame_pointer_needed)
2414     {
2415       if (fp_offset)
2416         return gen_frame_mem (DImode,
2417                               plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2418       else
2419         return gen_frame_mem (DImode,
2420                               plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2421     }
2422
2423   /* If FP is not needed, we calculate the location of LR, which would be
2424      at the top of the saved registers block.  */
2425
2426   return gen_frame_mem (DImode,
2427                         plus_constant (Pmode,
2428                                        stack_pointer_rtx,
2429                                        fp_offset
2430                                        + cfun->machine->frame.saved_regs_size
2431                                        - 2 * UNITS_PER_WORD));
2432 }
2433
2434 /* Output code to build up a constant in a register.  */
2435 static void
2436 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2437 {
2438   if (aarch64_bitmask_imm (val, DImode))
2439     emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2440   else
2441     {
2442       int i;
2443       int ncount = 0;
2444       int zcount = 0;
2445       HOST_WIDE_INT valp = val >> 16;
2446       HOST_WIDE_INT valm;
2447       HOST_WIDE_INT tval;
2448
2449       for (i = 16; i < 64; i += 16)
2450         {
2451           valm = (valp & 0xffff);
2452
2453           if (valm != 0)
2454             ++ zcount;
2455
2456           if (valm != 0xffff)
2457             ++ ncount;
2458
2459           valp >>= 16;
2460         }
2461
2462       /* zcount contains the number of additional MOVK instructions
2463          required if the constant is built up with an initial MOVZ instruction,
2464          while ncount is the number of MOVK instructions required if starting
2465          with a MOVN instruction.  Choose the sequence that yields the fewest
2466          number of instructions, preferring MOVZ instructions when they are both
2467          the same.  */
2468       if (ncount < zcount)
2469         {
2470           emit_move_insn (gen_rtx_REG (Pmode, regnum),
2471                           GEN_INT ((~val) & 0xffff));
2472           tval = 0xffff;
2473         }
2474       else
2475         {
2476           emit_move_insn (gen_rtx_REG (Pmode, regnum),
2477                           GEN_INT (val & 0xffff));
2478           tval = 0;
2479         }
2480
2481       val >>= 16;
2482
2483       for (i = 16; i < 64; i += 16)
2484         {
2485           if ((val & 0xffff) != tval)
2486             emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2487                                        GEN_INT (i), GEN_INT (val & 0xffff)));
2488           val >>= 16;
2489         }
2490     }
2491 }
2492
2493 static void
2494 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2495 {
2496   HOST_WIDE_INT mdelta = delta;
2497   rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2498   rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2499
2500   if (mdelta < 0)
2501     mdelta = -mdelta;
2502
2503   if (mdelta >= 4096 * 4096)
2504     {
2505       aarch64_build_constant (scratchreg, delta);
2506       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2507     }
2508   else if (mdelta > 0)
2509     {
2510       if (mdelta >= 4096)
2511         {
2512           emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2513           rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2514           if (delta < 0)
2515             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2516                                     gen_rtx_MINUS (Pmode, this_rtx, shift)));
2517           else
2518             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2519                                     gen_rtx_PLUS (Pmode, this_rtx, shift)));
2520         }
2521       if (mdelta % 4096 != 0)
2522         {
2523           scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2524           emit_insn (gen_rtx_SET (Pmode, this_rtx,
2525                                   gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2526         }
2527     }
2528 }
2529
2530 /* Output code to add DELTA to the first argument, and then jump
2531    to FUNCTION.  Used for C++ multiple inheritance.  */
2532 static void
2533 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2534                          HOST_WIDE_INT delta,
2535                          HOST_WIDE_INT vcall_offset,
2536                          tree function)
2537 {
2538   /* The this pointer is always in x0.  Note that this differs from
2539      Arm where the this pointer maybe bumped to r1 if r0 is required
2540      to return a pointer to an aggregate.  On AArch64 a result value
2541      pointer will be in x8.  */
2542   int this_regno = R0_REGNUM;
2543   rtx this_rtx, temp0, temp1, addr, insn, funexp;
2544
2545   reload_completed = 1;
2546   emit_note (NOTE_INSN_PROLOGUE_END);
2547
2548   if (vcall_offset == 0)
2549     aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2550   else
2551     {
2552       gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2553
2554       this_rtx = gen_rtx_REG (Pmode, this_regno);
2555       temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2556       temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2557
2558       addr = this_rtx;
2559       if (delta != 0)
2560         {
2561           if (delta >= -256 && delta < 256)
2562             addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2563                                        plus_constant (Pmode, this_rtx, delta));
2564           else
2565             aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2566         }
2567
2568       if (Pmode == ptr_mode)
2569         aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2570       else
2571         aarch64_emit_move (temp0,
2572                            gen_rtx_ZERO_EXTEND (Pmode,
2573                                                 gen_rtx_MEM (ptr_mode, addr)));
2574
2575       if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2576           addr = plus_constant (Pmode, temp0, vcall_offset);
2577       else
2578         {
2579           aarch64_build_constant (IP1_REGNUM, vcall_offset);
2580           addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2581         }
2582
2583       if (Pmode == ptr_mode)
2584         aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2585       else
2586         aarch64_emit_move (temp1,
2587                            gen_rtx_SIGN_EXTEND (Pmode,
2588                                                 gen_rtx_MEM (ptr_mode, addr)));
2589
2590       emit_insn (gen_add2_insn (this_rtx, temp1));
2591     }
2592
2593   /* Generate a tail call to the target function.  */
2594   if (!TREE_USED (function))
2595     {
2596       assemble_external (function);
2597       TREE_USED (function) = 1;
2598     }
2599   funexp = XEXP (DECL_RTL (function), 0);
2600   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2601   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2602   SIBLING_CALL_P (insn) = 1;
2603
2604   insn = get_insns ();
2605   shorten_branches (insn);
2606   final_start_function (insn, file, 1);
2607   final (insn, file, 1);
2608   final_end_function ();
2609
2610   /* Stop pretending to be a post-reload pass.  */
2611   reload_completed = 0;
2612 }
2613
2614 static int
2615 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2616 {
2617   if (GET_CODE (*x) == SYMBOL_REF)
2618     return SYMBOL_REF_TLS_MODEL (*x) != 0;
2619
2620   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2621      TLS offsets, not real symbol references.  */
2622   if (GET_CODE (*x) == UNSPEC
2623       && XINT (*x, 1) == UNSPEC_TLS)
2624     return -1;
2625
2626   return 0;
2627 }
2628
2629 static bool
2630 aarch64_tls_referenced_p (rtx x)
2631 {
2632   if (!TARGET_HAVE_TLS)
2633     return false;
2634
2635   return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2636 }
2637
2638
2639 static int
2640 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2641 {
2642   const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2643   const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2644
2645   if (*imm1 < *imm2)
2646     return -1;
2647   if (*imm1 > *imm2)
2648     return +1;
2649   return 0;
2650 }
2651
2652
2653 static void
2654 aarch64_build_bitmask_table (void)
2655 {
2656   unsigned HOST_WIDE_INT mask, imm;
2657   unsigned int log_e, e, s, r;
2658   unsigned int nimms = 0;
2659
2660   for (log_e = 1; log_e <= 6; log_e++)
2661     {
2662       e = 1 << log_e;
2663       if (e == 64)
2664         mask = ~(HOST_WIDE_INT) 0;
2665       else
2666         mask = ((HOST_WIDE_INT) 1 << e) - 1;
2667       for (s = 1; s < e; s++)
2668         {
2669           for (r = 0; r < e; r++)
2670             {
2671               /* set s consecutive bits to 1 (s < 64) */
2672               imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2673               /* rotate right by r */
2674               if (r != 0)
2675                 imm = ((imm >> r) | (imm << (e - r))) & mask;
2676               /* replicate the constant depending on SIMD size */
2677               switch (log_e) {
2678               case 1: imm |= (imm <<  2);
2679               case 2: imm |= (imm <<  4);
2680               case 3: imm |= (imm <<  8);
2681               case 4: imm |= (imm << 16);
2682               case 5: imm |= (imm << 32);
2683               case 6:
2684                 break;
2685               default:
2686                 gcc_unreachable ();
2687               }
2688               gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2689               aarch64_bitmasks[nimms++] = imm;
2690             }
2691         }
2692     }
2693
2694   gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2695   qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2696          aarch64_bitmasks_cmp);
2697 }
2698
2699
2700 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2701    a left shift of 0 or 12 bits.  */
2702 bool
2703 aarch64_uimm12_shift (HOST_WIDE_INT val)
2704 {
2705   return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2706           || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2707           );
2708 }
2709
2710
2711 /* Return true if val is an immediate that can be loaded into a
2712    register by a MOVZ instruction.  */
2713 static bool
2714 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2715 {
2716   if (GET_MODE_SIZE (mode) > 4)
2717     {
2718       if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2719           || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2720         return 1;
2721     }
2722   else
2723     {
2724       /* Ignore sign extension.  */
2725       val &= (HOST_WIDE_INT) 0xffffffff;
2726     }
2727   return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2728           || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2729 }
2730
2731
2732 /* Return true if val is a valid bitmask immediate.  */
2733 bool
2734 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2735 {
2736   if (GET_MODE_SIZE (mode) < 8)
2737     {
2738       /* Replicate bit pattern.  */
2739       val &= (HOST_WIDE_INT) 0xffffffff;
2740       val |= val << 32;
2741     }
2742   return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2743                   sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2744 }
2745
2746
2747 /* Return true if val is an immediate that can be loaded into a
2748    register in a single instruction.  */
2749 bool
2750 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2751 {
2752   if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2753     return 1;
2754   return aarch64_bitmask_imm (val, mode);
2755 }
2756
2757 static bool
2758 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2759 {
2760   rtx base, offset;
2761
2762   if (GET_CODE (x) == HIGH)
2763     return true;
2764
2765   split_const (x, &base, &offset);
2766   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2767     {
2768       if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2769           != SYMBOL_FORCE_TO_MEM)
2770         return true;
2771       else
2772         /* Avoid generating a 64-bit relocation in ILP32; leave
2773            to aarch64_expand_mov_immediate to handle it properly.  */
2774         return mode != ptr_mode;
2775     }
2776
2777   return aarch64_tls_referenced_p (x);
2778 }
2779
2780 /* Return true if register REGNO is a valid index register.
2781    STRICT_P is true if REG_OK_STRICT is in effect.  */
2782
2783 bool
2784 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2785 {
2786   if (!HARD_REGISTER_NUM_P (regno))
2787     {
2788       if (!strict_p)
2789         return true;
2790
2791       if (!reg_renumber)
2792         return false;
2793
2794       regno = reg_renumber[regno];
2795     }
2796   return GP_REGNUM_P (regno);
2797 }
2798
2799 /* Return true if register REGNO is a valid base register for mode MODE.
2800    STRICT_P is true if REG_OK_STRICT is in effect.  */
2801
2802 bool
2803 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2804 {
2805   if (!HARD_REGISTER_NUM_P (regno))
2806     {
2807       if (!strict_p)
2808         return true;
2809
2810       if (!reg_renumber)
2811         return false;
2812
2813       regno = reg_renumber[regno];
2814     }
2815
2816   /* The fake registers will be eliminated to either the stack or
2817      hard frame pointer, both of which are usually valid base registers.
2818      Reload deals with the cases where the eliminated form isn't valid.  */
2819   return (GP_REGNUM_P (regno)
2820           || regno == SP_REGNUM
2821           || regno == FRAME_POINTER_REGNUM
2822           || regno == ARG_POINTER_REGNUM);
2823 }
2824
2825 /* Return true if X is a valid base register for mode MODE.
2826    STRICT_P is true if REG_OK_STRICT is in effect.  */
2827
2828 static bool
2829 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2830 {
2831   if (!strict_p && GET_CODE (x) == SUBREG)
2832     x = SUBREG_REG (x);
2833
2834   return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2835 }
2836
2837 /* Return true if address offset is a valid index.  If it is, fill in INFO
2838    appropriately.  STRICT_P is true if REG_OK_STRICT is in effect.  */
2839
2840 static bool
2841 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2842                         enum machine_mode mode, bool strict_p)
2843 {
2844   enum aarch64_address_type type;
2845   rtx index;
2846   int shift;
2847
2848   /* (reg:P) */
2849   if ((REG_P (x) || GET_CODE (x) == SUBREG)
2850       && GET_MODE (x) == Pmode)
2851     {
2852       type = ADDRESS_REG_REG;
2853       index = x;
2854       shift = 0;
2855     }
2856   /* (sign_extend:DI (reg:SI)) */
2857   else if ((GET_CODE (x) == SIGN_EXTEND
2858             || GET_CODE (x) == ZERO_EXTEND)
2859            && GET_MODE (x) == DImode
2860            && GET_MODE (XEXP (x, 0)) == SImode)
2861     {
2862       type = (GET_CODE (x) == SIGN_EXTEND)
2863         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2864       index = XEXP (x, 0);
2865       shift = 0;
2866     }
2867   /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2868   else if (GET_CODE (x) == MULT
2869            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2870                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2871            && GET_MODE (XEXP (x, 0)) == DImode
2872            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2873            && CONST_INT_P (XEXP (x, 1)))
2874     {
2875       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2876         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2877       index = XEXP (XEXP (x, 0), 0);
2878       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2879     }
2880   /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2881   else if (GET_CODE (x) == ASHIFT
2882            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2883                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2884            && GET_MODE (XEXP (x, 0)) == DImode
2885            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2886            && CONST_INT_P (XEXP (x, 1)))
2887     {
2888       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2889         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2890       index = XEXP (XEXP (x, 0), 0);
2891       shift = INTVAL (XEXP (x, 1));
2892     }
2893   /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2894   else if ((GET_CODE (x) == SIGN_EXTRACT
2895             || GET_CODE (x) == ZERO_EXTRACT)
2896            && GET_MODE (x) == DImode
2897            && GET_CODE (XEXP (x, 0)) == MULT
2898            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2899            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2900     {
2901       type = (GET_CODE (x) == SIGN_EXTRACT)
2902         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2903       index = XEXP (XEXP (x, 0), 0);
2904       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2905       if (INTVAL (XEXP (x, 1)) != 32 + shift
2906           || INTVAL (XEXP (x, 2)) != 0)
2907         shift = -1;
2908     }
2909   /* (and:DI (mult:DI (reg:DI) (const_int scale))
2910      (const_int 0xffffffff<<shift)) */
2911   else if (GET_CODE (x) == AND
2912            && GET_MODE (x) == DImode
2913            && GET_CODE (XEXP (x, 0)) == MULT
2914            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2915            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2916            && CONST_INT_P (XEXP (x, 1)))
2917     {
2918       type = ADDRESS_REG_UXTW;
2919       index = XEXP (XEXP (x, 0), 0);
2920       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2921       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2922         shift = -1;
2923     }
2924   /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2925   else if ((GET_CODE (x) == SIGN_EXTRACT
2926             || GET_CODE (x) == ZERO_EXTRACT)
2927            && GET_MODE (x) == DImode
2928            && GET_CODE (XEXP (x, 0)) == ASHIFT
2929            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2930            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2931     {
2932       type = (GET_CODE (x) == SIGN_EXTRACT)
2933         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2934       index = XEXP (XEXP (x, 0), 0);
2935       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2936       if (INTVAL (XEXP (x, 1)) != 32 + shift
2937           || INTVAL (XEXP (x, 2)) != 0)
2938         shift = -1;
2939     }
2940   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2941      (const_int 0xffffffff<<shift)) */
2942   else if (GET_CODE (x) == AND
2943            && GET_MODE (x) == DImode
2944            && GET_CODE (XEXP (x, 0)) == ASHIFT
2945            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2946            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2947            && CONST_INT_P (XEXP (x, 1)))
2948     {
2949       type = ADDRESS_REG_UXTW;
2950       index = XEXP (XEXP (x, 0), 0);
2951       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2952       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2953         shift = -1;
2954     }
2955   /* (mult:P (reg:P) (const_int scale)) */
2956   else if (GET_CODE (x) == MULT
2957            && GET_MODE (x) == Pmode
2958            && GET_MODE (XEXP (x, 0)) == Pmode
2959            && CONST_INT_P (XEXP (x, 1)))
2960     {
2961       type = ADDRESS_REG_REG;
2962       index = XEXP (x, 0);
2963       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2964     }
2965   /* (ashift:P (reg:P) (const_int shift)) */
2966   else if (GET_CODE (x) == ASHIFT
2967            && GET_MODE (x) == Pmode
2968            && GET_MODE (XEXP (x, 0)) == Pmode
2969            && CONST_INT_P (XEXP (x, 1)))
2970     {
2971       type = ADDRESS_REG_REG;
2972       index = XEXP (x, 0);
2973       shift = INTVAL (XEXP (x, 1));
2974     }
2975   else
2976     return false;
2977
2978   if (GET_CODE (index) == SUBREG)
2979     index = SUBREG_REG (index);
2980
2981   if ((shift == 0 ||
2982        (shift > 0 && shift <= 3
2983         && (1 << shift) == GET_MODE_SIZE (mode)))
2984       && REG_P (index)
2985       && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2986     {
2987       info->type = type;
2988       info->offset = index;
2989       info->shift = shift;
2990       return true;
2991     }
2992
2993   return false;
2994 }
2995
2996 static inline bool
2997 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2998 {
2999   return (offset >= -64 * GET_MODE_SIZE (mode)
3000           && offset < 64 * GET_MODE_SIZE (mode)
3001           && offset % GET_MODE_SIZE (mode) == 0);
3002 }
3003
3004 static inline bool
3005 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3006                                HOST_WIDE_INT offset)
3007 {
3008   return offset >= -256 && offset < 256;
3009 }
3010
3011 static inline bool
3012 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3013 {
3014   return (offset >= 0
3015           && offset < 4096 * GET_MODE_SIZE (mode)
3016           && offset % GET_MODE_SIZE (mode) == 0);
3017 }
3018
3019 /* Return true if X is a valid address for machine mode MODE.  If it is,
3020    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
3021    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
3022
3023 static bool
3024 aarch64_classify_address (struct aarch64_address_info *info,
3025                           rtx x, enum machine_mode mode,
3026                           RTX_CODE outer_code, bool strict_p)
3027 {
3028   enum rtx_code code = GET_CODE (x);
3029   rtx op0, op1;
3030   bool allow_reg_index_p =
3031     outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3032
3033   /* Don't support anything other than POST_INC or REG addressing for
3034      AdvSIMD.  */
3035   if (aarch64_vector_mode_p (mode)
3036       && (code != POST_INC && code != REG))
3037     return false;
3038
3039   switch (code)
3040     {
3041     case REG:
3042     case SUBREG:
3043       info->type = ADDRESS_REG_IMM;
3044       info->base = x;
3045       info->offset = const0_rtx;
3046       return aarch64_base_register_rtx_p (x, strict_p);
3047
3048     case PLUS:
3049       op0 = XEXP (x, 0);
3050       op1 = XEXP (x, 1);
3051       if (GET_MODE_SIZE (mode) != 0
3052           && CONST_INT_P (op1)
3053           && aarch64_base_register_rtx_p (op0, strict_p))
3054         {
3055           HOST_WIDE_INT offset = INTVAL (op1);
3056
3057           info->type = ADDRESS_REG_IMM;
3058           info->base = op0;
3059           info->offset = op1;
3060
3061           /* TImode and TFmode values are allowed in both pairs of X
3062              registers and individual Q registers.  The available
3063              address modes are:
3064              X,X: 7-bit signed scaled offset
3065              Q:   9-bit signed offset
3066              We conservatively require an offset representable in either mode.
3067            */
3068           if (mode == TImode || mode == TFmode)
3069             return (offset_7bit_signed_scaled_p (mode, offset)
3070                     && offset_9bit_signed_unscaled_p (mode, offset));
3071
3072           if (outer_code == PARALLEL)
3073             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3074                     && offset_7bit_signed_scaled_p (mode, offset));
3075           else
3076             return (offset_9bit_signed_unscaled_p (mode, offset)
3077                     || offset_12bit_unsigned_scaled_p (mode, offset));
3078         }
3079
3080       if (allow_reg_index_p)
3081         {
3082           /* Look for base + (scaled/extended) index register.  */
3083           if (aarch64_base_register_rtx_p (op0, strict_p)
3084               && aarch64_classify_index (info, op1, mode, strict_p))
3085             {
3086               info->base = op0;
3087               return true;
3088             }
3089           if (aarch64_base_register_rtx_p (op1, strict_p)
3090               && aarch64_classify_index (info, op0, mode, strict_p))
3091             {
3092               info->base = op1;
3093               return true;
3094             }
3095         }
3096
3097       return false;
3098
3099     case POST_INC:
3100     case POST_DEC:
3101     case PRE_INC:
3102     case PRE_DEC:
3103       info->type = ADDRESS_REG_WB;
3104       info->base = XEXP (x, 0);
3105       info->offset = NULL_RTX;
3106       return aarch64_base_register_rtx_p (info->base, strict_p);
3107
3108     case POST_MODIFY:
3109     case PRE_MODIFY:
3110       info->type = ADDRESS_REG_WB;
3111       info->base = XEXP (x, 0);
3112       if (GET_CODE (XEXP (x, 1)) == PLUS
3113           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3114           && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3115           && aarch64_base_register_rtx_p (info->base, strict_p))
3116         {
3117           HOST_WIDE_INT offset;
3118           info->offset = XEXP (XEXP (x, 1), 1);
3119           offset = INTVAL (info->offset);
3120
3121           /* TImode and TFmode values are allowed in both pairs of X
3122              registers and individual Q registers.  The available
3123              address modes are:
3124              X,X: 7-bit signed scaled offset
3125              Q:   9-bit signed offset
3126              We conservatively require an offset representable in either mode.
3127            */
3128           if (mode == TImode || mode == TFmode)
3129             return (offset_7bit_signed_scaled_p (mode, offset)
3130                     && offset_9bit_signed_unscaled_p (mode, offset));
3131
3132           if (outer_code == PARALLEL)
3133             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3134                     && offset_7bit_signed_scaled_p (mode, offset));
3135           else
3136             return offset_9bit_signed_unscaled_p (mode, offset);
3137         }
3138       return false;
3139
3140     case CONST:
3141     case SYMBOL_REF:
3142     case LABEL_REF:
3143       /* load literal: pc-relative constant pool entry.  Only supported
3144          for SI mode or larger.  */
3145       info->type = ADDRESS_SYMBOLIC;
3146       if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3147         {
3148           rtx sym, addend;
3149
3150           split_const (x, &sym, &addend);
3151           return (GET_CODE (sym) == LABEL_REF
3152                   || (GET_CODE (sym) == SYMBOL_REF
3153                       && CONSTANT_POOL_ADDRESS_P (sym)));
3154         }
3155       return false;
3156
3157     case LO_SUM:
3158       info->type = ADDRESS_LO_SUM;
3159       info->base = XEXP (x, 0);
3160       info->offset = XEXP (x, 1);
3161       if (allow_reg_index_p
3162           && aarch64_base_register_rtx_p (info->base, strict_p))
3163         {
3164           rtx sym, offs;
3165           split_const (info->offset, &sym, &offs);
3166           if (GET_CODE (sym) == SYMBOL_REF
3167               && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3168                   == SYMBOL_SMALL_ABSOLUTE))
3169             {
3170               /* The symbol and offset must be aligned to the access size.  */
3171               unsigned int align;
3172               unsigned int ref_size;
3173
3174               if (CONSTANT_POOL_ADDRESS_P (sym))
3175                 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3176               else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3177                 {
3178                   tree exp = SYMBOL_REF_DECL (sym);
3179                   align = TYPE_ALIGN (TREE_TYPE (exp));
3180                   align = CONSTANT_ALIGNMENT (exp, align);
3181                 }
3182               else if (SYMBOL_REF_DECL (sym))
3183                 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3184               else
3185                 align = BITS_PER_UNIT;
3186
3187               ref_size = GET_MODE_SIZE (mode);
3188               if (ref_size == 0)
3189                 ref_size = GET_MODE_SIZE (DImode);
3190
3191               return ((INTVAL (offs) & (ref_size - 1)) == 0
3192                       && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3193             }
3194         }
3195       return false;
3196
3197     default:
3198       return false;
3199     }
3200 }
3201
3202 bool
3203 aarch64_symbolic_address_p (rtx x)
3204 {
3205   rtx offset;
3206
3207   split_const (x, &x, &offset);
3208   return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3209 }
3210
3211 /* Classify the base of symbolic expression X, given that X appears in
3212    context CONTEXT.  */
3213
3214 enum aarch64_symbol_type
3215 aarch64_classify_symbolic_expression (rtx x,
3216                                       enum aarch64_symbol_context context)
3217 {
3218   rtx offset;
3219
3220   split_const (x, &x, &offset);
3221   return aarch64_classify_symbol (x, context);
3222 }
3223
3224
3225 /* Return TRUE if X is a legitimate address for accessing memory in
3226    mode MODE.  */
3227 static bool
3228 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3229 {
3230   struct aarch64_address_info addr;
3231
3232   return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3233 }
3234
3235 /* Return TRUE if X is a legitimate address for accessing memory in
3236    mode MODE.  OUTER_CODE will be PARALLEL if this is a load/store
3237    pair operation.  */
3238 bool
3239 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3240                            RTX_CODE outer_code, bool strict_p)
3241 {
3242   struct aarch64_address_info addr;
3243
3244   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3245 }
3246
3247 /* Return TRUE if rtx X is immediate constant 0.0 */
3248 bool
3249 aarch64_float_const_zero_rtx_p (rtx x)
3250 {
3251   REAL_VALUE_TYPE r;
3252
3253   if (GET_MODE (x) == VOIDmode)
3254     return false;
3255
3256   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3257   if (REAL_VALUE_MINUS_ZERO (r))
3258     return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3259   return REAL_VALUES_EQUAL (r, dconst0);
3260 }
3261
3262 /* Return the fixed registers used for condition codes.  */
3263
3264 static bool
3265 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3266 {
3267   *p1 = CC_REGNUM;
3268   *p2 = INVALID_REGNUM;
3269   return true;
3270 }
3271
3272 enum machine_mode
3273 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3274 {
3275   /* All floating point compares return CCFP if it is an equality
3276      comparison, and CCFPE otherwise.  */
3277   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3278     {
3279       switch (code)
3280         {
3281         case EQ:
3282         case NE:
3283         case UNORDERED:
3284         case ORDERED:
3285         case UNLT:
3286         case UNLE:
3287         case UNGT:
3288         case UNGE:
3289         case UNEQ:
3290         case LTGT:
3291           return CCFPmode;
3292
3293         case LT:
3294         case LE:
3295         case GT:
3296         case GE:
3297           return CCFPEmode;
3298
3299         default:
3300           gcc_unreachable ();
3301         }
3302     }
3303
3304   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3305       && y == const0_rtx
3306       && (code == EQ || code == NE || code == LT || code == GE)
3307       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3308           || GET_CODE (x) == NEG))
3309     return CC_NZmode;
3310
3311   /* A compare with a shifted operand.  Because of canonicalization,
3312      the comparison will have to be swapped when we emit the assembly
3313      code.  */
3314   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3315       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3316       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3317           || GET_CODE (x) == LSHIFTRT
3318           || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3319     return CC_SWPmode;
3320
3321   /* A compare of a mode narrower than SI mode against zero can be done
3322      by extending the value in the comparison.  */
3323   if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3324       && y == const0_rtx)
3325     /* Only use sign-extension if we really need it.  */
3326     return ((code == GT || code == GE || code == LE || code == LT)
3327             ? CC_SESWPmode : CC_ZESWPmode);
3328
3329   /* For everything else, return CCmode.  */
3330   return CCmode;
3331 }
3332
3333 static unsigned
3334 aarch64_get_condition_code (rtx x)
3335 {
3336   enum machine_mode mode = GET_MODE (XEXP (x, 0));
3337   enum rtx_code comp_code = GET_CODE (x);
3338
3339   if (GET_MODE_CLASS (mode) != MODE_CC)
3340     mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3341
3342   switch (mode)
3343     {
3344     case CCFPmode:
3345     case CCFPEmode:
3346       switch (comp_code)
3347         {
3348         case GE: return AARCH64_GE;
3349         case GT: return AARCH64_GT;
3350         case LE: return AARCH64_LS;
3351         case LT: return AARCH64_MI;
3352         case NE: return AARCH64_NE;
3353         case EQ: return AARCH64_EQ;
3354         case ORDERED: return AARCH64_VC;
3355         case UNORDERED: return AARCH64_VS;
3356         case UNLT: return AARCH64_LT;
3357         case UNLE: return AARCH64_LE;
3358         case UNGT: return AARCH64_HI;
3359         case UNGE: return AARCH64_PL;
3360         default: gcc_unreachable ();
3361         }
3362       break;
3363
3364     case CCmode:
3365       switch (comp_code)
3366         {
3367         case NE: return AARCH64_NE;
3368         case EQ: return AARCH64_EQ;
3369         case GE: return AARCH64_GE;
3370         case GT: return AARCH64_GT;
3371         case LE: return AARCH64_LE;
3372         case LT: return AARCH64_LT;
3373         case GEU: return AARCH64_CS;
3374         case GTU: return AARCH64_HI;
3375         case LEU: return AARCH64_LS;
3376         case LTU: return AARCH64_CC;
3377         default: gcc_unreachable ();
3378         }
3379       break;
3380
3381     case CC_SWPmode:
3382     case CC_ZESWPmode:
3383     case CC_SESWPmode:
3384       switch (comp_code)
3385         {
3386         case NE: return AARCH64_NE;
3387         case EQ: return AARCH64_EQ;
3388         case GE: return AARCH64_LE;
3389         case GT: return AARCH64_LT;
3390         case LE: return AARCH64_GE;
3391         case LT: return AARCH64_GT;
3392         case GEU: return AARCH64_LS;
3393         case GTU: return AARCH64_CC;
3394         case LEU: return AARCH64_CS;
3395         case LTU: return AARCH64_HI;
3396         default: gcc_unreachable ();
3397         }
3398       break;
3399
3400     case CC_NZmode:
3401       switch (comp_code)
3402         {
3403         case NE: return AARCH64_NE;
3404         case EQ: return AARCH64_EQ;
3405         case GE: return AARCH64_PL;
3406         case LT: return AARCH64_MI;
3407         default: gcc_unreachable ();
3408         }
3409       break;
3410
3411     default:
3412       gcc_unreachable ();
3413       break;
3414     }
3415 }
3416
3417 static unsigned
3418 bit_count (unsigned HOST_WIDE_INT value)
3419 {
3420   unsigned count = 0;
3421
3422   while (value)
3423     {
3424       count++;
3425       value &= value - 1;
3426     }
3427
3428   return count;
3429 }
3430
3431 void
3432 aarch64_print_operand (FILE *f, rtx x, char code)
3433 {
3434   switch (code)
3435     {
3436     case 'e':
3437       /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w.  */
3438       {
3439         int n;
3440
3441         if (GET_CODE (x) != CONST_INT
3442             || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3443           {
3444             output_operand_lossage ("invalid operand for '%%%c'", code);
3445             return;
3446           }
3447
3448         switch (n)
3449           {
3450           case 3:
3451             fputc ('b', f);
3452             break;
3453           case 4:
3454             fputc ('h', f);
3455             break;
3456           case 5:
3457             fputc ('w', f);
3458             break;
3459           default:
3460             output_operand_lossage ("invalid operand for '%%%c'", code);
3461             return;
3462           }
3463       }
3464       break;
3465
3466     case 'p':
3467       {
3468         int n;
3469
3470         /* Print N such that 2^N == X.  */
3471         if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3472           {
3473             output_operand_lossage ("invalid operand for '%%%c'", code);
3474             return;
3475           }
3476
3477         asm_fprintf (f, "%d", n);
3478       }
3479       break;
3480
3481     case 'P':
3482       /* Print the number of non-zero bits in X (a const_int).  */
3483       if (GET_CODE (x) != CONST_INT)
3484         {
3485           output_operand_lossage ("invalid operand for '%%%c'", code);
3486           return;
3487         }
3488
3489       asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3490       break;
3491
3492     case 'H':
3493       /* Print the higher numbered register of a pair (TImode) of regs.  */
3494       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3495         {
3496           output_operand_lossage ("invalid operand for '%%%c'", code);
3497           return;
3498         }
3499
3500       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3501       break;
3502
3503     case 'm':
3504       /* Print a condition (eq, ne, etc).  */
3505
3506       /* CONST_TRUE_RTX means always -- that's the default.  */
3507       if (x == const_true_rtx)
3508         return;
3509
3510       if (!COMPARISON_P (x))
3511         {
3512           output_operand_lossage ("invalid operand for '%%%c'", code);
3513           return;
3514         }
3515
3516       fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3517       break;
3518
3519     case 'M':
3520       /* Print the inverse of a condition (eq <-> ne, etc).  */
3521
3522       /* CONST_TRUE_RTX means never -- that's the default.  */
3523       if (x == const_true_rtx)
3524         {
3525           fputs ("nv", f);
3526           return;
3527         }
3528
3529       if (!COMPARISON_P (x))
3530         {
3531           output_operand_lossage ("invalid operand for '%%%c'", code);
3532           return;
3533         }
3534
3535       fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3536                                   (aarch64_get_condition_code (x))], f);
3537       break;
3538
3539     case 'b':
3540     case 'h':
3541     case 's':
3542     case 'd':
3543     case 'q':
3544       /* Print a scalar FP/SIMD register name.  */
3545       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3546         {
3547           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3548           return;
3549         }
3550       asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3551       break;
3552
3553     case 'S':
3554     case 'T':
3555     case 'U':
3556     case 'V':
3557       /* Print the first FP/SIMD register name in a list.  */
3558       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3559         {
3560           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3561           return;
3562         }
3563       asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3564       break;
3565
3566     case 'X':
3567       /* Print bottom 16 bits of integer constant in hex.  */
3568       if (GET_CODE (x) != CONST_INT)
3569         {
3570           output_operand_lossage ("invalid operand for '%%%c'", code);
3571           return;
3572         }
3573       asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3574       break;
3575
3576     case 'w':
3577     case 'x':
3578       /* Print a general register name or the zero register (32-bit or
3579          64-bit).  */
3580       if (x == const0_rtx
3581           || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3582         {
3583           asm_fprintf (f, "%czr", code);
3584           break;
3585         }
3586
3587       if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3588         {
3589           asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3590           break;
3591         }
3592
3593       if (REG_P (x) && REGNO (x) == SP_REGNUM)
3594         {
3595           asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3596           break;
3597         }
3598
3599       /* Fall through */
3600
3601     case 0:
3602       /* Print a normal operand, if it's a general register, then we
3603          assume DImode.  */
3604       if (x == NULL)
3605         {
3606           output_operand_lossage ("missing operand");
3607           return;
3608         }
3609
3610       switch (GET_CODE (x))
3611         {
3612         case REG:
3613           asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3614           break;
3615
3616         case MEM:
3617           aarch64_memory_reference_mode = GET_MODE (x);
3618           output_address (XEXP (x, 0));
3619           break;
3620
3621         case LABEL_REF:
3622         case SYMBOL_REF:
3623           output_addr_const (asm_out_file, x);
3624           break;
3625
3626         case CONST_INT:
3627           asm_fprintf (f, "%wd", INTVAL (x));
3628           break;
3629
3630         case CONST_VECTOR:
3631           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3632             {
3633               gcc_assert (aarch64_const_vec_all_same_int_p (x,
3634                                                             HOST_WIDE_INT_MIN,
3635                                                             HOST_WIDE_INT_MAX));
3636               asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3637             }
3638           else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3639             {
3640               fputc ('0', f);
3641             }
3642           else
3643             gcc_unreachable ();
3644           break;
3645
3646         case CONST_DOUBLE:
3647           /* CONST_DOUBLE can represent a double-width integer.
3648              In this case, the mode of x is VOIDmode.  */
3649           if (GET_MODE (x) == VOIDmode)
3650             ; /* Do Nothing.  */
3651           else if (aarch64_float_const_zero_rtx_p (x))
3652             {
3653               fputc ('0', f);
3654               break;
3655             }
3656           else if (aarch64_float_const_representable_p (x))
3657             {
3658 #define buf_size 20
3659               char float_buf[buf_size] = {'\0'};
3660               REAL_VALUE_TYPE r;
3661               REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3662               real_to_decimal_for_mode (float_buf, &r,
3663                                         buf_size, buf_size,
3664                                         1, GET_MODE (x));
3665               asm_fprintf (asm_out_file, "%s", float_buf);
3666               break;
3667 #undef buf_size
3668             }
3669           output_operand_lossage ("invalid constant");
3670           return;
3671         default:
3672           output_operand_lossage ("invalid operand");
3673           return;
3674         }
3675       break;
3676
3677     case 'A':
3678       if (GET_CODE (x) == HIGH)
3679         x = XEXP (x, 0);
3680
3681       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3682         {
3683         case SYMBOL_SMALL_GOT:
3684           asm_fprintf (asm_out_file, ":got:");
3685           break;
3686
3687         case SYMBOL_SMALL_TLSGD:
3688           asm_fprintf (asm_out_file, ":tlsgd:");
3689           break;
3690
3691         case SYMBOL_SMALL_TLSDESC:
3692           asm_fprintf (asm_out_file, ":tlsdesc:");
3693           break;
3694
3695         case SYMBOL_SMALL_GOTTPREL:
3696           asm_fprintf (asm_out_file, ":gottprel:");
3697           break;
3698
3699         case SYMBOL_SMALL_TPREL:
3700           asm_fprintf (asm_out_file, ":tprel:");
3701           break;
3702
3703         case SYMBOL_TINY_GOT:
3704           gcc_unreachable ();
3705           break;
3706
3707         default:
3708           break;
3709         }
3710       output_addr_const (asm_out_file, x);
3711       break;
3712
3713     case 'L':
3714       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3715         {
3716         case SYMBOL_SMALL_GOT:
3717           asm_fprintf (asm_out_file, ":lo12:");
3718           break;
3719
3720         case SYMBOL_SMALL_TLSGD:
3721           asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3722           break;
3723
3724         case SYMBOL_SMALL_TLSDESC:
3725           asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3726           break;
3727
3728         case SYMBOL_SMALL_GOTTPREL:
3729           asm_fprintf (asm_out_file, ":gottprel_lo12:");
3730           break;
3731
3732         case SYMBOL_SMALL_TPREL:
3733           asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3734           break;
3735
3736         case SYMBOL_TINY_GOT:
3737           asm_fprintf (asm_out_file, ":got:");
3738           break;
3739
3740         default:
3741           break;
3742         }
3743       output_addr_const (asm_out_file, x);
3744       break;
3745
3746     case 'G':
3747
3748       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3749         {
3750         case SYMBOL_SMALL_TPREL:
3751           asm_fprintf (asm_out_file, ":tprel_hi12:");
3752           break;
3753         default:
3754           break;
3755         }
3756       output_addr_const (asm_out_file, x);
3757       break;
3758
3759     default:
3760       output_operand_lossage ("invalid operand prefix '%%%c'", code);
3761       return;
3762     }
3763 }
3764
3765 void
3766 aarch64_print_operand_address (FILE *f, rtx x)
3767 {
3768   struct aarch64_address_info addr;
3769
3770   if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3771                              MEM, true))
3772     switch (addr.type)
3773       {
3774       case ADDRESS_REG_IMM:
3775         if (addr.offset == const0_rtx)
3776           asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3777         else
3778           asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3779                        INTVAL (addr.offset));
3780         return;
3781
3782       case ADDRESS_REG_REG:
3783         if (addr.shift == 0)
3784           asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3785                        reg_names [REGNO (addr.offset)]);
3786         else
3787           asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3788                        reg_names [REGNO (addr.offset)], addr.shift);
3789         return;
3790
3791       case ADDRESS_REG_UXTW:
3792         if (addr.shift == 0)
3793           asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3794                        REGNO (addr.offset) - R0_REGNUM);
3795         else
3796           asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3797                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3798         return;
3799
3800       case ADDRESS_REG_SXTW:
3801         if (addr.shift == 0)
3802           asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3803                        REGNO (addr.offset) - R0_REGNUM);
3804         else
3805           asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3806                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3807         return;
3808
3809       case ADDRESS_REG_WB:
3810         switch (GET_CODE (x))
3811           {
3812           case PRE_INC:
3813             asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3814                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3815             return;
3816           case POST_INC:
3817             asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3818                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3819             return;
3820           case PRE_DEC:
3821             asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3822                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3823             return;
3824           case POST_DEC:
3825             asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3826                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3827             return;
3828           case PRE_MODIFY:
3829             asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3830                          INTVAL (addr.offset));
3831             return;
3832           case POST_MODIFY:
3833             asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3834                          INTVAL (addr.offset));
3835             return;
3836           default:
3837             break;
3838           }
3839         break;
3840
3841       case ADDRESS_LO_SUM:
3842         asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3843         output_addr_const (f, addr.offset);
3844         asm_fprintf (f, "]");
3845         return;
3846
3847       case ADDRESS_SYMBOLIC:
3848         break;
3849       }
3850
3851   output_addr_const (f, x);
3852 }
3853
3854 void
3855 aarch64_function_profiler (FILE *f ATTRIBUTE_UNUSED,
3856                            int labelno ATTRIBUTE_UNUSED)
3857 {
3858   sorry ("function profiling");
3859 }
3860
3861 bool
3862 aarch64_label_mentioned_p (rtx x)
3863 {
3864   const char *fmt;
3865   int i;
3866
3867   if (GET_CODE (x) == LABEL_REF)
3868     return true;
3869
3870   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3871      referencing instruction, but they are constant offsets, not
3872      symbols.  */
3873   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3874     return false;
3875
3876   fmt = GET_RTX_FORMAT (GET_CODE (x));
3877   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3878     {
3879       if (fmt[i] == 'E')
3880         {
3881           int j;
3882
3883           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3884             if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3885               return 1;
3886         }
3887       else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3888         return 1;
3889     }
3890
3891   return 0;
3892 }
3893
3894 /* Implement REGNO_REG_CLASS.  */
3895
3896 enum reg_class
3897 aarch64_regno_regclass (unsigned regno)
3898 {
3899   if (GP_REGNUM_P (regno))
3900     return CORE_REGS;
3901
3902   if (regno == SP_REGNUM)
3903     return STACK_REG;
3904
3905   if (regno == FRAME_POINTER_REGNUM
3906       || regno == ARG_POINTER_REGNUM)
3907     return CORE_REGS;
3908
3909   if (FP_REGNUM_P (regno))
3910     return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
3911
3912   return NO_REGS;
3913 }
3914
3915 /* Try a machine-dependent way of reloading an illegitimate address
3916    operand.  If we find one, push the reload and return the new rtx.  */
3917
3918 rtx
3919 aarch64_legitimize_reload_address (rtx *x_p,
3920                                    enum machine_mode mode,
3921                                    int opnum, int type,
3922                                    int ind_levels ATTRIBUTE_UNUSED)
3923 {
3924   rtx x = *x_p;
3925
3926   /* Do not allow mem (plus (reg, const)) if vector mode.  */
3927   if (aarch64_vector_mode_p (mode)
3928       && GET_CODE (x) == PLUS
3929       && REG_P (XEXP (x, 0))
3930       && CONST_INT_P (XEXP (x, 1)))
3931     {
3932       rtx orig_rtx = x;
3933       x = copy_rtx (x);
3934       push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3935                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3936                    opnum, (enum reload_type) type);
3937       return x;
3938     }
3939
3940   /* We must recognize output that we have already generated ourselves.  */
3941   if (GET_CODE (x) == PLUS
3942       && GET_CODE (XEXP (x, 0)) == PLUS
3943       && REG_P (XEXP (XEXP (x, 0), 0))
3944       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3945       && CONST_INT_P (XEXP (x, 1)))
3946     {
3947       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3948                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3949                    opnum, (enum reload_type) type);
3950       return x;
3951     }
3952
3953   /* We wish to handle large displacements off a base register by splitting
3954      the addend across an add and the mem insn.  This can cut the number of
3955      extra insns needed from 3 to 1.  It is only useful for load/store of a
3956      single register with 12 bit offset field.  */
3957   if (GET_CODE (x) == PLUS
3958       && REG_P (XEXP (x, 0))
3959       && CONST_INT_P (XEXP (x, 1))
3960       && HARD_REGISTER_P (XEXP (x, 0))
3961       && mode != TImode
3962       && mode != TFmode
3963       && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3964     {
3965       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3966       HOST_WIDE_INT low = val & 0xfff;
3967       HOST_WIDE_INT high = val - low;
3968       HOST_WIDE_INT offs;
3969       rtx cst;
3970       enum machine_mode xmode = GET_MODE (x);
3971
3972       /* In ILP32, xmode can be either DImode or SImode.  */
3973       gcc_assert (xmode == DImode || xmode == SImode);
3974
3975       /* Reload non-zero BLKmode offsets.  This is because we cannot ascertain
3976          BLKmode alignment.  */
3977       if (GET_MODE_SIZE (mode) == 0)
3978         return NULL_RTX;
3979
3980       offs = low % GET_MODE_SIZE (mode);
3981
3982       /* Align misaligned offset by adjusting high part to compensate.  */
3983       if (offs != 0)
3984         {
3985           if (aarch64_uimm12_shift (high + offs))
3986             {
3987               /* Align down.  */
3988               low = low - offs;
3989               high = high + offs;
3990             }
3991           else
3992             {
3993               /* Align up.  */
3994               offs = GET_MODE_SIZE (mode) - offs;
3995               low = low + offs;
3996               high = high + (low & 0x1000) - offs;
3997               low &= 0xfff;
3998             }
3999         }
4000
4001       /* Check for overflow.  */
4002       if (high + low != val)
4003         return NULL_RTX;
4004
4005       cst = GEN_INT (high);
4006       if (!aarch64_uimm12_shift (high))
4007         cst = force_const_mem (xmode, cst);
4008
4009       /* Reload high part into base reg, leaving the low part
4010          in the mem instruction.  */
4011       x = gen_rtx_PLUS (xmode,
4012                         gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4013                         GEN_INT (low));
4014
4015       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4016                    BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4017                    opnum, (enum reload_type) type);
4018       return x;
4019     }
4020
4021   return NULL_RTX;
4022 }
4023
4024
4025 static reg_class_t
4026 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4027                           reg_class_t rclass,
4028                           enum machine_mode mode,
4029                           secondary_reload_info *sri)
4030 {
4031   /* Address expressions of the form PLUS (SP, large_offset) need two
4032      scratch registers, one for the constant, and one for holding a
4033      copy of SP, since SP cannot be used on the RHS of an add-reg
4034      instruction.  */
4035   if (mode == DImode
4036       && GET_CODE (x) == PLUS
4037       && XEXP (x, 0) == stack_pointer_rtx
4038       && CONST_INT_P (XEXP (x, 1))
4039       && !aarch64_uimm12_shift (INTVAL (XEXP (x, 1))))
4040     {
4041       sri->icode = CODE_FOR_reload_sp_immediate;
4042       return NO_REGS;
4043     }
4044
4045   /* Without the TARGET_SIMD instructions we cannot move a Q register
4046      to a Q register directly.  We need a scratch.  */
4047   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4048       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4049       && reg_class_subset_p (rclass, FP_REGS))
4050     {
4051       if (mode == TFmode)
4052         sri->icode = CODE_FOR_aarch64_reload_movtf;
4053       else if (mode == TImode)
4054         sri->icode = CODE_FOR_aarch64_reload_movti;
4055       return NO_REGS;
4056     }
4057
4058   /* A TFmode or TImode memory access should be handled via an FP_REGS
4059      because AArch64 has richer addressing modes for LDR/STR instructions
4060      than LDP/STP instructions.  */
4061   if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4062       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4063     return FP_REGS;
4064
4065   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4066       return CORE_REGS;
4067
4068   return NO_REGS;
4069 }
4070
4071 static bool
4072 aarch64_can_eliminate (const int from, const int to)
4073 {
4074   /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4075      HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
4076
4077   if (frame_pointer_needed)
4078     {
4079       if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4080         return true;
4081       if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4082         return false;
4083       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4084           && !cfun->calls_alloca)
4085         return true;
4086       if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4087         return true;
4088     return false;
4089     }
4090   else
4091     {
4092       /* If we decided that we didn't need a leaf frame pointer but then used
4093          LR in the function, then we'll want a frame pointer after all, so
4094          prevent this elimination to ensure a frame pointer is used.
4095
4096          NOTE: the original value of flag_omit_frame_pointer gets trashed
4097          IFF flag_omit_leaf_frame_pointer is true, so we check the value
4098          of faked_omit_frame_pointer here (which is true when we always
4099          wish to keep non-leaf frame pointers but only wish to keep leaf frame
4100          pointers when LR is clobbered).  */
4101       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4102           && df_regs_ever_live_p (LR_REGNUM)
4103           && faked_omit_frame_pointer)
4104         return false;
4105     }
4106
4107   return true;
4108 }
4109
4110 HOST_WIDE_INT
4111 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4112 {
4113   HOST_WIDE_INT frame_size;
4114   HOST_WIDE_INT offset;
4115
4116   aarch64_layout_frame ();
4117   frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4118                 + crtl->outgoing_args_size
4119                 + cfun->machine->saved_varargs_size);
4120
4121    frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4122    offset = frame_size;
4123
4124    if (to == HARD_FRAME_POINTER_REGNUM)
4125      {
4126        if (from == ARG_POINTER_REGNUM)
4127          return offset - crtl->outgoing_args_size;
4128
4129        if (from == FRAME_POINTER_REGNUM)
4130          return cfun->machine->frame.saved_regs_size;
4131      }
4132
4133    if (to == STACK_POINTER_REGNUM)
4134      {
4135        if (from == FRAME_POINTER_REGNUM)
4136          {
4137            HOST_WIDE_INT elim = crtl->outgoing_args_size
4138                               + cfun->machine->frame.saved_regs_size
4139                               - cfun->machine->frame.fp_lr_offset;
4140            elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4141            return elim;
4142          }
4143      }
4144
4145    return offset;
4146 }
4147
4148
4149 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
4150    previous frame.  */
4151
4152 rtx
4153 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4154 {
4155   if (count != 0)
4156     return const0_rtx;
4157   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4158 }
4159
4160
4161 static void
4162 aarch64_asm_trampoline_template (FILE *f)
4163 {
4164   if (TARGET_ILP32)
4165     {
4166       asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4167       asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4168     }
4169   else
4170     {
4171       asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4172       asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4173     }
4174   asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4175   assemble_aligned_integer (4, const0_rtx);
4176   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4177   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4178 }
4179
4180 static void
4181 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4182 {
4183   rtx fnaddr, mem, a_tramp;
4184   const int tramp_code_sz = 16;
4185
4186   /* Don't need to copy the trailing D-words, we fill those in below.  */
4187   emit_block_move (m_tramp, assemble_trampoline_template (),
4188                    GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4189   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4190   fnaddr = XEXP (DECL_RTL (fndecl), 0);
4191   if (GET_MODE (fnaddr) != ptr_mode)
4192     fnaddr = convert_memory_address (ptr_mode, fnaddr);
4193   emit_move_insn (mem, fnaddr);
4194
4195   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4196   emit_move_insn (mem, chain_value);
4197
4198   /* XXX We should really define a "clear_cache" pattern and use
4199      gen_clear_cache().  */
4200   a_tramp = XEXP (m_tramp, 0);
4201   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4202                      LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4203                      plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4204                      ptr_mode);
4205 }
4206
4207 static unsigned char
4208 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4209 {
4210   switch (regclass)
4211     {
4212     case CORE_REGS:
4213     case POINTER_REGS:
4214     case GENERAL_REGS:
4215     case ALL_REGS:
4216     case FP_REGS:
4217     case FP_LO_REGS:
4218       return
4219         aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4220                                        (GET_MODE_SIZE (mode) + 7) / 8;
4221     case STACK_REG:
4222       return 1;
4223
4224     case NO_REGS:
4225       return 0;
4226
4227     default:
4228       break;
4229     }
4230   gcc_unreachable ();
4231 }
4232
4233 static reg_class_t
4234 aarch64_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t regclass)
4235 {
4236   return ((regclass == POINTER_REGS || regclass == STACK_REG)
4237           ? GENERAL_REGS : regclass);
4238 }
4239
4240 void
4241 aarch64_asm_output_labelref (FILE* f, const char *name)
4242 {
4243   asm_fprintf (f, "%U%s", name);
4244 }
4245
4246 static void
4247 aarch64_elf_asm_constructor (rtx symbol, int priority)
4248 {
4249   if (priority == DEFAULT_INIT_PRIORITY)
4250     default_ctor_section_asm_out_constructor (symbol, priority);
4251   else
4252     {
4253       section *s;
4254       char buf[18];
4255       snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4256       s = get_section (buf, SECTION_WRITE, NULL);
4257       switch_to_section (s);
4258       assemble_align (POINTER_SIZE);
4259       assemble_aligned_integer (POINTER_BYTES, symbol);
4260     }
4261 }
4262
4263 static void
4264 aarch64_elf_asm_destructor (rtx symbol, int priority)
4265 {
4266   if (priority == DEFAULT_INIT_PRIORITY)
4267     default_dtor_section_asm_out_destructor (symbol, priority);
4268   else
4269     {
4270       section *s;
4271       char buf[18];
4272       snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4273       s = get_section (buf, SECTION_WRITE, NULL);
4274       switch_to_section (s);
4275       assemble_align (POINTER_SIZE);
4276       assemble_aligned_integer (POINTER_BYTES, symbol);
4277     }
4278 }
4279
4280 const char*
4281 aarch64_output_casesi (rtx *operands)
4282 {
4283   char buf[100];
4284   char label[100];
4285   rtx diff_vec = PATTERN (next_active_insn (operands[2]));
4286   int index;
4287   static const char *const patterns[4][2] =
4288   {
4289     {
4290       "ldrb\t%w3, [%0,%w1,uxtw]",
4291       "add\t%3, %4, %w3, sxtb #2"
4292     },
4293     {
4294       "ldrh\t%w3, [%0,%w1,uxtw #1]",
4295       "add\t%3, %4, %w3, sxth #2"
4296     },
4297     {
4298       "ldr\t%w3, [%0,%w1,uxtw #2]",
4299       "add\t%3, %4, %w3, sxtw #2"
4300     },
4301     /* We assume that DImode is only generated when not optimizing and
4302        that we don't really need 64-bit address offsets.  That would
4303        imply an object file with 8GB of code in a single function!  */
4304     {
4305       "ldr\t%w3, [%0,%w1,uxtw #2]",
4306       "add\t%3, %4, %w3, sxtw #2"
4307     }
4308   };
4309
4310   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4311
4312   index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4313
4314   gcc_assert (index >= 0 && index <= 3);
4315
4316   /* Need to implement table size reduction, by chaning the code below.  */
4317   output_asm_insn (patterns[index][0], operands);
4318   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4319   snprintf (buf, sizeof (buf),
4320             "adr\t%%4, %s", targetm.strip_name_encoding (label));
4321   output_asm_insn (buf, operands);
4322   output_asm_insn (patterns[index][1], operands);
4323   output_asm_insn ("br\t%3", operands);
4324   assemble_label (asm_out_file, label);
4325   return "";
4326 }
4327
4328
4329 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4330    masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4331    operator.  */
4332
4333 int
4334 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4335 {
4336   if (shift >= 0 && shift <= 3)
4337     {
4338       int size;
4339       for (size = 8; size <= 32; size *= 2)
4340         {
4341           HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4342           if (mask == bits << shift)
4343             return size;
4344         }
4345     }
4346   return 0;
4347 }
4348
4349 static bool
4350 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4351                                    const_rtx x ATTRIBUTE_UNUSED)
4352 {
4353   /* We can't use blocks for constants when we're using a per-function
4354      constant pool.  */
4355   return false;
4356 }
4357
4358 static section *
4359 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4360                             rtx x ATTRIBUTE_UNUSED,
4361                             unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4362 {
4363   /* Force all constant pool entries into the current function section.  */
4364   return function_section (current_function_decl);
4365 }
4366
4367
4368 /* Costs.  */
4369
4370 /* Helper function for rtx cost calculation.  Strip a shift expression
4371    from X.  Returns the inner operand if successful, or the original
4372    expression on failure.  */
4373 static rtx
4374 aarch64_strip_shift (rtx x)
4375 {
4376   rtx op = x;
4377
4378   if ((GET_CODE (op) == ASHIFT
4379        || GET_CODE (op) == ASHIFTRT
4380        || GET_CODE (op) == LSHIFTRT)
4381       && CONST_INT_P (XEXP (op, 1)))
4382     return XEXP (op, 0);
4383
4384   if (GET_CODE (op) == MULT
4385       && CONST_INT_P (XEXP (op, 1))
4386       && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4387     return XEXP (op, 0);
4388
4389   return x;
4390 }
4391
4392 /* Helper function for rtx cost calculation.  Strip a shift or extend
4393    expression from X.  Returns the inner operand if successful, or the
4394    original expression on failure.  We deal with a number of possible
4395    canonicalization variations here.  */
4396 static rtx
4397 aarch64_strip_shift_or_extend (rtx x)
4398 {
4399   rtx op = x;
4400
4401   /* Zero and sign extraction of a widened value.  */
4402   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4403       && XEXP (op, 2) == const0_rtx
4404       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4405                                          XEXP (op, 1)))
4406     return XEXP (XEXP (op, 0), 0);
4407
4408   /* It can also be represented (for zero-extend) as an AND with an
4409      immediate.  */
4410   if (GET_CODE (op) == AND
4411       && GET_CODE (XEXP (op, 0)) == MULT
4412       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4413       && CONST_INT_P (XEXP (op, 1))
4414       && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4415                            INTVAL (XEXP (op, 1))) != 0)
4416     return XEXP (XEXP (op, 0), 0);
4417
4418   /* Now handle extended register, as this may also have an optional
4419      left shift by 1..4.  */
4420   if (GET_CODE (op) == ASHIFT
4421       && CONST_INT_P (XEXP (op, 1))
4422       && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4423     op = XEXP (op, 0);
4424
4425   if (GET_CODE (op) == ZERO_EXTEND
4426       || GET_CODE (op) == SIGN_EXTEND)
4427     op = XEXP (op, 0);
4428
4429   if (op != x)
4430     return op;
4431
4432   return aarch64_strip_shift (x);
4433 }
4434
4435 /* Calculate the cost of calculating X, storing it in *COST.  Result
4436    is true if the total cost of the operation has now been calculated.  */
4437 static bool
4438 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4439                    int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4440 {
4441   rtx op0, op1;
4442   const struct cpu_rtx_cost_table *extra_cost
4443     = aarch64_tune_params->insn_extra_cost;
4444
4445   switch (code)
4446     {
4447     case SET:
4448       op0 = SET_DEST (x);
4449       op1 = SET_SRC (x);
4450
4451       switch (GET_CODE (op0))
4452         {
4453         case MEM:
4454           if (speed)
4455             *cost += extra_cost->memory_store;
4456
4457           if (op1 != const0_rtx)
4458             *cost += rtx_cost (op1, SET, 1, speed);
4459           return true;
4460
4461         case SUBREG:
4462           if (! REG_P (SUBREG_REG (op0)))
4463             *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4464           /* Fall through.  */
4465         case REG:
4466           /* Cost is just the cost of the RHS of the set.  */
4467           *cost += rtx_cost (op1, SET, 1, true);
4468           return true;
4469
4470         case ZERO_EXTRACT:  /* Bit-field insertion.  */
4471         case SIGN_EXTRACT:
4472           /* Strip any redundant widening of the RHS to meet the width of
4473              the target.  */
4474           if (GET_CODE (op1) == SUBREG)
4475             op1 = SUBREG_REG (op1);
4476           if ((GET_CODE (op1) == ZERO_EXTEND
4477                || GET_CODE (op1) == SIGN_EXTEND)
4478               && GET_CODE (XEXP (op0, 1)) == CONST_INT
4479               && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4480                   >= INTVAL (XEXP (op0, 1))))
4481             op1 = XEXP (op1, 0);
4482           *cost += rtx_cost (op1, SET, 1, speed);
4483           return true;
4484
4485         default:
4486           break;
4487         }
4488       return false;
4489
4490     case MEM:
4491       if (speed)
4492         *cost += extra_cost->memory_load;
4493
4494       return true;
4495
4496     case NEG:
4497       op0 = CONST0_RTX (GET_MODE (x));
4498       op1 = XEXP (x, 0);
4499       goto cost_minus;
4500
4501     case COMPARE:
4502       op0 = XEXP (x, 0);
4503       op1 = XEXP (x, 1);
4504
4505       if (op1 == const0_rtx
4506           && GET_CODE (op0) == AND)
4507         {
4508           x = op0;
4509           goto cost_logic;
4510         }
4511
4512       /* Comparisons can work if the order is swapped.
4513          Canonicalization puts the more complex operation first, but
4514          we want it in op1.  */
4515       if (! (REG_P (op0)
4516              || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4517         {
4518           op0 = XEXP (x, 1);
4519           op1 = XEXP (x, 0);
4520         }
4521       goto cost_minus;
4522
4523     case MINUS:
4524       op0 = XEXP (x, 0);
4525       op1 = XEXP (x, 1);
4526
4527     cost_minus:
4528       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4529           || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4530               && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4531         {
4532           if (op0 != const0_rtx)
4533             *cost += rtx_cost (op0, MINUS, 0, speed);
4534
4535           if (CONST_INT_P (op1))
4536             {
4537               if (!aarch64_uimm12_shift (INTVAL (op1)))
4538                 *cost += rtx_cost (op1, MINUS, 1, speed);
4539             }
4540           else
4541             {
4542               op1 = aarch64_strip_shift_or_extend (op1);
4543               *cost += rtx_cost (op1, MINUS, 1, speed);
4544             }
4545           return true;
4546         }
4547
4548       return false;
4549
4550     case PLUS:
4551       op0 = XEXP (x, 0);
4552       op1 = XEXP (x, 1);
4553
4554       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4555         {
4556           if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4557             {
4558               *cost += rtx_cost (op0, PLUS, 0, speed);
4559             }
4560           else
4561             {
4562               rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4563
4564               if (new_op0 == op0
4565                   && GET_CODE (op0) == MULT)
4566                 {
4567                   if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4568                        && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4569                       || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4570                           && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4571                     {
4572                       *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4573                                           speed)
4574                                 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4575                                             speed)
4576                                 + rtx_cost (op1, PLUS, 1, speed));
4577                       if (speed)
4578                         *cost += extra_cost->int_multiply_extend_add;
4579                       return true;
4580                     }
4581                   *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4582                             + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4583                             + rtx_cost (op1, PLUS, 1, speed));
4584
4585                   if (speed)
4586                     *cost += extra_cost->int_multiply_add;
4587                 }
4588
4589               *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4590                         + rtx_cost (op1, PLUS, 1, speed));
4591             }
4592           return true;
4593         }
4594
4595       return false;
4596
4597     case IOR:
4598     case XOR:
4599     case AND:
4600     cost_logic:
4601       op0 = XEXP (x, 0);
4602       op1 = XEXP (x, 1);
4603
4604       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4605         {
4606           if (CONST_INT_P (op1)
4607               && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4608             {
4609               *cost += rtx_cost (op0, AND, 0, speed);
4610             }
4611           else
4612             {
4613               if (GET_CODE (op0) == NOT)
4614                 op0 = XEXP (op0, 0);
4615               op0 = aarch64_strip_shift (op0);
4616               *cost += (rtx_cost (op0, AND, 0, speed)
4617                         + rtx_cost (op1, AND, 1, speed));
4618             }
4619           return true;
4620         }
4621       return false;
4622
4623     case ZERO_EXTEND:
4624       if ((GET_MODE (x) == DImode
4625            && GET_MODE (XEXP (x, 0)) == SImode)
4626           || GET_CODE (XEXP (x, 0)) == MEM)
4627         {
4628           *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4629           return true;
4630         }
4631       return false;
4632
4633     case SIGN_EXTEND:
4634       if (GET_CODE (XEXP (x, 0)) == MEM)
4635         {
4636           *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4637           return true;
4638         }
4639       return false;
4640
4641     case ROTATE:
4642       if (!CONST_INT_P (XEXP (x, 1)))
4643         *cost += COSTS_N_INSNS (2);
4644       /* Fall through.  */
4645     case ROTATERT:
4646     case LSHIFTRT:
4647     case ASHIFT:
4648     case ASHIFTRT:
4649
4650       /* Shifting by a register often takes an extra cycle.  */
4651       if (speed && !CONST_INT_P (XEXP (x, 1)))
4652         *cost += extra_cost->register_shift;
4653
4654       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4655       return true;
4656
4657     case HIGH:
4658       if (!CONSTANT_P (XEXP (x, 0)))
4659         *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4660       return true;
4661
4662     case LO_SUM:
4663       if (!CONSTANT_P (XEXP (x, 1)))
4664         *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4665       *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4666       return true;
4667
4668     case ZERO_EXTRACT:
4669     case SIGN_EXTRACT:
4670       *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4671       return true;
4672
4673     case MULT:
4674       op0 = XEXP (x, 0);
4675       op1 = XEXP (x, 1);
4676
4677       *cost = COSTS_N_INSNS (1);
4678       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4679         {
4680           if (CONST_INT_P (op1)
4681               && exact_log2 (INTVAL (op1)) > 0)
4682             {
4683               *cost += rtx_cost (op0, ASHIFT, 0, speed);
4684               return true;
4685             }
4686
4687           if ((GET_CODE (op0) == ZERO_EXTEND
4688                && GET_CODE (op1) == ZERO_EXTEND)
4689               || (GET_CODE (op0) == SIGN_EXTEND
4690                   && GET_CODE (op1) == SIGN_EXTEND))
4691             {
4692               *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4693                         + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4694               if (speed)
4695                 *cost += extra_cost->int_multiply_extend;
4696               return true;
4697             }
4698
4699           if (speed)
4700             *cost += extra_cost->int_multiply;
4701         }
4702       else if (speed)
4703         {
4704           if (GET_MODE (x) == DFmode)
4705             *cost += extra_cost->double_multiply;
4706           else if (GET_MODE (x) == SFmode)
4707             *cost += extra_cost->float_multiply;
4708         }
4709
4710       return false;  /* All arguments need to be in registers.  */
4711
4712     case MOD:
4713     case UMOD:
4714       *cost = COSTS_N_INSNS (2);
4715       if (speed)
4716         {
4717           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4718             *cost += (extra_cost->int_multiply_add
4719                       + extra_cost->int_divide);
4720           else if (GET_MODE (x) == DFmode)
4721             *cost += (extra_cost->double_multiply
4722                       + extra_cost->double_divide);
4723           else if (GET_MODE (x) == SFmode)
4724             *cost += (extra_cost->float_multiply
4725                       + extra_cost->float_divide);
4726         }
4727       return false;  /* All arguments need to be in registers.  */
4728
4729     case DIV:
4730     case UDIV:
4731       *cost = COSTS_N_INSNS (1);
4732       if (speed)
4733         {
4734           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4735             *cost += extra_cost->int_divide;
4736           else if (GET_MODE (x) == DFmode)
4737             *cost += extra_cost->double_divide;
4738           else if (GET_MODE (x) == SFmode)
4739             *cost += extra_cost->float_divide;
4740         }
4741       return false;  /* All arguments need to be in registers.  */
4742
4743     default:
4744       break;
4745     }
4746   return false;
4747 }
4748
4749 static int
4750 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4751                   enum machine_mode mode ATTRIBUTE_UNUSED,
4752                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4753 {
4754   enum rtx_code c  = GET_CODE (x);
4755   const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4756
4757   if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4758     return addr_cost->pre_modify;
4759
4760   if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4761     return addr_cost->post_modify;
4762
4763   if (c == PLUS)
4764     {
4765       if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4766         return addr_cost->imm_offset;
4767       else if (GET_CODE (XEXP (x, 0)) == MULT
4768                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4769                || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4770         return addr_cost->register_extend;
4771
4772       return addr_cost->register_offset;
4773     }
4774   else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4775     return addr_cost->imm_offset;
4776
4777   return 0;
4778 }
4779
4780 static int
4781 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4782                             reg_class_t from, reg_class_t to)
4783 {
4784   const struct cpu_regmove_cost *regmove_cost
4785     = aarch64_tune_params->regmove_cost;
4786
4787   if (from == GENERAL_REGS && to == GENERAL_REGS)
4788     return regmove_cost->GP2GP;
4789   else if (from == GENERAL_REGS)
4790     return regmove_cost->GP2FP;
4791   else if (to == GENERAL_REGS)
4792     return regmove_cost->FP2GP;
4793
4794   /* When AdvSIMD instructions are disabled it is not possible to move
4795      a 128-bit value directly between Q registers.  This is handled in
4796      secondary reload.  A general register is used as a scratch to move
4797      the upper DI value and the lower DI value is moved directly,
4798      hence the cost is the sum of three moves. */
4799
4800   if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4801     return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4802
4803   return regmove_cost->FP2FP;
4804 }
4805
4806 static int
4807 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4808                           reg_class_t rclass ATTRIBUTE_UNUSED,
4809                           bool in ATTRIBUTE_UNUSED)
4810 {
4811   return aarch64_tune_params->memmov_cost;
4812 }
4813
4814 /* Vectorizer cost model target hooks.  */
4815
4816 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
4817 static int
4818 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4819                                     tree vectype,
4820                                     int misalign ATTRIBUTE_UNUSED)
4821 {
4822   unsigned elements;
4823
4824   switch (type_of_cost)
4825     {
4826       case scalar_stmt:
4827         return aarch64_tune_params->vec_costs->scalar_stmt_cost;
4828
4829       case scalar_load:
4830         return aarch64_tune_params->vec_costs->scalar_load_cost;
4831
4832       case scalar_store:
4833         return aarch64_tune_params->vec_costs->scalar_store_cost;
4834
4835       case vector_stmt:
4836         return aarch64_tune_params->vec_costs->vec_stmt_cost;
4837
4838       case vector_load:
4839         return aarch64_tune_params->vec_costs->vec_align_load_cost;
4840
4841       case vector_store:
4842         return aarch64_tune_params->vec_costs->vec_store_cost;
4843
4844       case vec_to_scalar:
4845         return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
4846
4847       case scalar_to_vec:
4848         return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
4849
4850       case unaligned_load:
4851         return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
4852
4853       case unaligned_store:
4854         return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
4855
4856       case cond_branch_taken:
4857         return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
4858
4859       case cond_branch_not_taken:
4860         return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
4861
4862       case vec_perm:
4863       case vec_promote_demote:
4864         return aarch64_tune_params->vec_costs->vec_stmt_cost;
4865
4866       case vec_construct:
4867         elements = TYPE_VECTOR_SUBPARTS (vectype);
4868         return elements / 2 + 1;
4869
4870       default:
4871         gcc_unreachable ();
4872     }
4873 }
4874
4875 /* Implement targetm.vectorize.add_stmt_cost.  */
4876 static unsigned
4877 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4878                        struct _stmt_vec_info *stmt_info, int misalign,
4879                        enum vect_cost_model_location where)
4880 {
4881   unsigned *cost = (unsigned *) data;
4882   unsigned retval = 0;
4883
4884   if (flag_vect_cost_model)
4885     {
4886       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4887       int stmt_cost =
4888             aarch64_builtin_vectorization_cost (kind, vectype, misalign);
4889
4890       /* Statements in an inner loop relative to the loop being
4891          vectorized are weighted more heavily.  The value here is
4892          a function (linear for now) of the loop nest level.  */
4893       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4894         {
4895           loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4896           struct loop *loop =  LOOP_VINFO_LOOP (loop_info);
4897           unsigned nest_level = loop_depth (loop);
4898
4899           count *= nest_level;
4900         }
4901
4902       retval = (unsigned) (count * stmt_cost);
4903       cost[where] += retval;
4904     }
4905
4906   return retval;
4907 }
4908
4909 static void initialize_aarch64_code_model (void);
4910
4911 /* Parse the architecture extension string.  */
4912
4913 static void
4914 aarch64_parse_extension (char *str)
4915 {
4916   /* The extension string is parsed left to right.  */
4917   const struct aarch64_option_extension *opt = NULL;
4918
4919   /* Flag to say whether we are adding or removing an extension.  */
4920   int adding_ext = -1;
4921
4922   while (str != NULL && *str != 0)
4923     {
4924       char *ext;
4925       size_t len;
4926
4927       str++;
4928       ext = strchr (str, '+');
4929
4930       if (ext != NULL)
4931         len = ext - str;
4932       else
4933         len = strlen (str);
4934
4935       if (len >= 2 && strncmp (str, "no", 2) == 0)
4936         {
4937           adding_ext = 0;
4938           len -= 2;
4939           str += 2;
4940         }
4941       else if (len > 0)
4942         adding_ext = 1;
4943
4944       if (len == 0)
4945         {
4946           error ("missing feature modifier after %qs", "+no");
4947           return;
4948         }
4949
4950       /* Scan over the extensions table trying to find an exact match.  */
4951       for (opt = all_extensions; opt->name != NULL; opt++)
4952         {
4953           if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
4954             {
4955               /* Add or remove the extension.  */
4956               if (adding_ext)
4957                 aarch64_isa_flags |= opt->flags_on;
4958               else
4959                 aarch64_isa_flags &= ~(opt->flags_off);
4960               break;
4961             }
4962         }
4963
4964       if (opt->name == NULL)
4965         {
4966           /* Extension not found in list.  */
4967           error ("unknown feature modifier %qs", str);
4968           return;
4969         }
4970
4971       str = ext;
4972     };
4973
4974   return;
4975 }
4976
4977 /* Parse the ARCH string.  */
4978
4979 static void
4980 aarch64_parse_arch (void)
4981 {
4982   char *ext;
4983   const struct processor *arch;
4984   char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
4985   size_t len;
4986
4987   strcpy (str, aarch64_arch_string);
4988
4989   ext = strchr (str, '+');
4990
4991   if (ext != NULL)
4992     len = ext - str;
4993   else
4994     len = strlen (str);
4995
4996   if (len == 0)
4997     {
4998       error ("missing arch name in -march=%qs", str);
4999       return;
5000     }
5001
5002   /* Loop through the list of supported ARCHs to find a match.  */
5003   for (arch = all_architectures; arch->name != NULL; arch++)
5004     {
5005       if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5006         {
5007           selected_arch = arch;
5008           aarch64_isa_flags = selected_arch->flags;
5009           selected_cpu = &all_cores[selected_arch->core];
5010
5011           if (ext != NULL)
5012             {
5013               /* ARCH string contains at least one extension.  */
5014               aarch64_parse_extension (ext);
5015             }
5016
5017           return;
5018         }
5019     }
5020
5021   /* ARCH name not found in list.  */
5022   error ("unknown value %qs for -march", str);
5023   return;
5024 }
5025
5026 /* Parse the CPU string.  */
5027
5028 static void
5029 aarch64_parse_cpu (void)
5030 {
5031   char *ext;
5032   const struct processor *cpu;
5033   char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5034   size_t len;
5035
5036   strcpy (str, aarch64_cpu_string);
5037
5038   ext = strchr (str, '+');
5039
5040   if (ext != NULL)
5041     len = ext - str;
5042   else
5043     len = strlen (str);
5044
5045   if (len == 0)
5046     {
5047       error ("missing cpu name in -mcpu=%qs", str);
5048       return;
5049     }
5050
5051   /* Loop through the list of supported CPUs to find a match.  */
5052   for (cpu = all_cores; cpu->name != NULL; cpu++)
5053     {
5054       if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5055         {
5056           selected_cpu = cpu;
5057           aarch64_isa_flags = selected_cpu->flags;
5058
5059           if (ext != NULL)
5060             {
5061               /* CPU string contains at least one extension.  */
5062               aarch64_parse_extension (ext);
5063             }
5064
5065           return;
5066         }
5067     }
5068
5069   /* CPU name not found in list.  */
5070   error ("unknown value %qs for -mcpu", str);
5071   return;
5072 }
5073
5074 /* Parse the TUNE string.  */
5075
5076 static void
5077 aarch64_parse_tune (void)
5078 {
5079   const struct processor *cpu;
5080   char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5081   strcpy (str, aarch64_tune_string);
5082
5083   /* Loop through the list of supported CPUs to find a match.  */
5084   for (cpu = all_cores; cpu->name != NULL; cpu++)
5085     {
5086       if (strcmp (cpu->name, str) == 0)
5087         {
5088           selected_tune = cpu;
5089           return;
5090         }
5091     }
5092
5093   /* CPU name not found in list.  */
5094   error ("unknown value %qs for -mtune", str);
5095   return;
5096 }
5097
5098
5099 /* Implement TARGET_OPTION_OVERRIDE.  */
5100
5101 static void
5102 aarch64_override_options (void)
5103 {
5104   /* march wins over mcpu, so when march is defined, mcpu takes the same value,
5105      otherwise march remains undefined.  mtune can be used with either march or
5106      mcpu.  */
5107
5108   if (aarch64_arch_string)
5109     {
5110       aarch64_parse_arch ();
5111       aarch64_cpu_string = NULL;
5112     }
5113
5114   if (aarch64_cpu_string)
5115     {
5116       aarch64_parse_cpu ();
5117       selected_arch = NULL;
5118     }
5119
5120   if (aarch64_tune_string)
5121     {
5122       aarch64_parse_tune ();
5123     }
5124
5125   initialize_aarch64_code_model ();
5126
5127   aarch64_build_bitmask_table ();
5128
5129   /* This target defaults to strict volatile bitfields.  */
5130   if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5131     flag_strict_volatile_bitfields = 1;
5132
5133   /* If the user did not specify a processor, choose the default
5134      one for them.  This will be the CPU set during configuration using
5135      --with-cpu, otherwise it is "generic".  */
5136   if (!selected_cpu)
5137     {
5138       selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5139       aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5140     }
5141
5142   gcc_assert (selected_cpu);
5143
5144   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
5145   if (!selected_tune)
5146     selected_tune = &all_cores[selected_cpu->core];
5147
5148   aarch64_tune_flags = selected_tune->flags;
5149   aarch64_tune = selected_tune->core;
5150   aarch64_tune_params = selected_tune->tune;
5151
5152   aarch64_override_options_after_change ();
5153 }
5154
5155 /* Implement targetm.override_options_after_change.  */
5156
5157 static void
5158 aarch64_override_options_after_change (void)
5159 {
5160   faked_omit_frame_pointer = false;
5161
5162   /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
5163      that aarch64_frame_pointer_required will be called.  We need to remember
5164      whether flag_omit_frame_pointer was turned on normally or just faked.  */
5165
5166   if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
5167     {
5168       flag_omit_frame_pointer = true;
5169       faked_omit_frame_pointer = true;
5170     }
5171 }
5172
5173 static struct machine_function *
5174 aarch64_init_machine_status (void)
5175 {
5176   struct machine_function *machine;
5177   machine = ggc_alloc_cleared_machine_function ();
5178   return machine;
5179 }
5180
5181 void
5182 aarch64_init_expanders (void)
5183 {
5184   init_machine_status = aarch64_init_machine_status;
5185 }
5186
5187 /* A checking mechanism for the implementation of the various code models.  */
5188 static void
5189 initialize_aarch64_code_model (void)
5190 {
5191    if (flag_pic)
5192      {
5193        switch (aarch64_cmodel_var)
5194          {
5195          case AARCH64_CMODEL_TINY:
5196            aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5197            break;
5198          case AARCH64_CMODEL_SMALL:
5199            aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5200            break;
5201          case AARCH64_CMODEL_LARGE:
5202            sorry ("code model %qs with -f%s", "large",
5203                   flag_pic > 1 ? "PIC" : "pic");
5204          default:
5205            gcc_unreachable ();
5206          }
5207      }
5208    else
5209      aarch64_cmodel = aarch64_cmodel_var;
5210 }
5211
5212 /* Return true if SYMBOL_REF X binds locally.  */
5213
5214 static bool
5215 aarch64_symbol_binds_local_p (const_rtx x)
5216 {
5217   return (SYMBOL_REF_DECL (x)
5218           ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5219           : SYMBOL_REF_LOCAL_P (x));
5220 }
5221
5222 /* Return true if SYMBOL_REF X is thread local */
5223 static bool
5224 aarch64_tls_symbol_p (rtx x)
5225 {
5226   if (! TARGET_HAVE_TLS)
5227     return false;
5228
5229   if (GET_CODE (x) != SYMBOL_REF)
5230     return false;
5231
5232   return SYMBOL_REF_TLS_MODEL (x) != 0;
5233 }
5234
5235 /* Classify a TLS symbol into one of the TLS kinds.  */
5236 enum aarch64_symbol_type
5237 aarch64_classify_tls_symbol (rtx x)
5238 {
5239   enum tls_model tls_kind = tls_symbolic_operand_type (x);
5240
5241   switch (tls_kind)
5242     {
5243     case TLS_MODEL_GLOBAL_DYNAMIC:
5244     case TLS_MODEL_LOCAL_DYNAMIC:
5245       return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5246
5247     case TLS_MODEL_INITIAL_EXEC:
5248       return SYMBOL_SMALL_GOTTPREL;
5249
5250     case TLS_MODEL_LOCAL_EXEC:
5251       return SYMBOL_SMALL_TPREL;
5252
5253     case TLS_MODEL_EMULATED:
5254     case TLS_MODEL_NONE:
5255       return SYMBOL_FORCE_TO_MEM;
5256
5257     default:
5258       gcc_unreachable ();
5259     }
5260 }
5261
5262 /* Return the method that should be used to access SYMBOL_REF or
5263    LABEL_REF X in context CONTEXT.  */
5264
5265 enum aarch64_symbol_type
5266 aarch64_classify_symbol (rtx x,
5267                          enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5268 {
5269   if (GET_CODE (x) == LABEL_REF)
5270     {
5271       switch (aarch64_cmodel)
5272         {
5273         case AARCH64_CMODEL_LARGE:
5274           return SYMBOL_FORCE_TO_MEM;
5275
5276         case AARCH64_CMODEL_TINY_PIC:
5277         case AARCH64_CMODEL_TINY:
5278           return SYMBOL_TINY_ABSOLUTE;
5279
5280         case AARCH64_CMODEL_SMALL_PIC:
5281         case AARCH64_CMODEL_SMALL:
5282           return SYMBOL_SMALL_ABSOLUTE;
5283
5284         default:
5285           gcc_unreachable ();
5286         }
5287     }
5288
5289   if (GET_CODE (x) == SYMBOL_REF)
5290     {
5291       if (aarch64_cmodel == AARCH64_CMODEL_LARGE
5292           || CONSTANT_POOL_ADDRESS_P (x))
5293         return SYMBOL_FORCE_TO_MEM;
5294
5295       if (aarch64_tls_symbol_p (x))
5296         return aarch64_classify_tls_symbol (x);
5297
5298       switch (aarch64_cmodel)
5299         {
5300         case AARCH64_CMODEL_TINY:
5301           if (SYMBOL_REF_WEAK (x))
5302             return SYMBOL_FORCE_TO_MEM;
5303           return SYMBOL_TINY_ABSOLUTE;
5304
5305         case AARCH64_CMODEL_SMALL:
5306           if (SYMBOL_REF_WEAK (x))
5307             return SYMBOL_FORCE_TO_MEM;
5308           return SYMBOL_SMALL_ABSOLUTE;
5309
5310         case AARCH64_CMODEL_TINY_PIC:
5311           if (!aarch64_symbol_binds_local_p (x))
5312             return SYMBOL_TINY_GOT;
5313           return SYMBOL_TINY_ABSOLUTE;
5314
5315         case AARCH64_CMODEL_SMALL_PIC:
5316           if (!aarch64_symbol_binds_local_p (x))
5317             return SYMBOL_SMALL_GOT;
5318           return SYMBOL_SMALL_ABSOLUTE;
5319
5320         default:
5321           gcc_unreachable ();
5322         }
5323     }
5324
5325   /* By default push everything into the constant pool.  */
5326   return SYMBOL_FORCE_TO_MEM;
5327 }
5328
5329 bool
5330 aarch64_constant_address_p (rtx x)
5331 {
5332   return (CONSTANT_P (x) && memory_address_p (DImode, x));
5333 }
5334
5335 bool
5336 aarch64_legitimate_pic_operand_p (rtx x)
5337 {
5338   if (GET_CODE (x) == SYMBOL_REF
5339       || (GET_CODE (x) == CONST
5340           && GET_CODE (XEXP (x, 0)) == PLUS
5341           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5342      return false;
5343
5344   return true;
5345 }
5346
5347 /* Return true if X holds either a quarter-precision or
5348      floating-point +0.0 constant.  */
5349 static bool
5350 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5351 {
5352   if (!CONST_DOUBLE_P (x))
5353     return false;
5354
5355   /* TODO: We could handle moving 0.0 to a TFmode register,
5356      but first we would like to refactor the movtf_aarch64
5357      to be more amicable to split moves properly and
5358      correctly gate on TARGET_SIMD.  For now - reject all
5359      constants which are not to SFmode or DFmode registers.  */
5360   if (!(mode == SFmode || mode == DFmode))
5361     return false;
5362
5363   if (aarch64_float_const_zero_rtx_p (x))
5364     return true;
5365   return aarch64_float_const_representable_p (x);
5366 }
5367
5368 static bool
5369 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5370 {
5371   /* Do not allow vector struct mode constants.  We could support
5372      0 and -1 easily, but they need support in aarch64-simd.md.  */
5373   if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5374     return false;
5375
5376   /* This could probably go away because
5377      we now decompose CONST_INTs according to expand_mov_immediate.  */
5378   if ((GET_CODE (x) == CONST_VECTOR
5379        && aarch64_simd_valid_immediate (x, mode, false, NULL))
5380       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5381         return !targetm.cannot_force_const_mem (mode, x);
5382
5383   if (GET_CODE (x) == HIGH
5384       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5385     return true;
5386
5387   return aarch64_constant_address_p (x);
5388 }
5389
5390 rtx
5391 aarch64_load_tp (rtx target)
5392 {
5393   if (!target
5394       || GET_MODE (target) != Pmode
5395       || !register_operand (target, Pmode))
5396     target = gen_reg_rtx (Pmode);
5397
5398   /* Can return in any reg.  */
5399   emit_insn (gen_aarch64_load_tp_hard (target));
5400   return target;
5401 }
5402
5403 /* On AAPCS systems, this is the "struct __va_list".  */
5404 static GTY(()) tree va_list_type;
5405
5406 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5407    Return the type to use as __builtin_va_list.
5408
5409    AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5410
5411    struct __va_list
5412    {
5413      void *__stack;
5414      void *__gr_top;
5415      void *__vr_top;
5416      int   __gr_offs;
5417      int   __vr_offs;
5418    };  */
5419
5420 static tree
5421 aarch64_build_builtin_va_list (void)
5422 {
5423   tree va_list_name;
5424   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5425
5426   /* Create the type.  */
5427   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5428   /* Give it the required name.  */
5429   va_list_name = build_decl (BUILTINS_LOCATION,
5430                              TYPE_DECL,
5431                              get_identifier ("__va_list"),
5432                              va_list_type);
5433   DECL_ARTIFICIAL (va_list_name) = 1;
5434   TYPE_NAME (va_list_type) = va_list_name;
5435   TYPE_STUB_DECL (va_list_type) = va_list_name;
5436
5437   /* Create the fields.  */
5438   f_stack = build_decl (BUILTINS_LOCATION,
5439                         FIELD_DECL, get_identifier ("__stack"),
5440                         ptr_type_node);
5441   f_grtop = build_decl (BUILTINS_LOCATION,
5442                         FIELD_DECL, get_identifier ("__gr_top"),
5443                         ptr_type_node);
5444   f_vrtop = build_decl (BUILTINS_LOCATION,
5445                         FIELD_DECL, get_identifier ("__vr_top"),
5446                         ptr_type_node);
5447   f_groff = build_decl (BUILTINS_LOCATION,
5448                         FIELD_DECL, get_identifier ("__gr_offs"),
5449                         integer_type_node);
5450   f_vroff = build_decl (BUILTINS_LOCATION,
5451                         FIELD_DECL, get_identifier ("__vr_offs"),
5452                         integer_type_node);
5453
5454   DECL_ARTIFICIAL (f_stack) = 1;
5455   DECL_ARTIFICIAL (f_grtop) = 1;
5456   DECL_ARTIFICIAL (f_vrtop) = 1;
5457   DECL_ARTIFICIAL (f_groff) = 1;
5458   DECL_ARTIFICIAL (f_vroff) = 1;
5459
5460   DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5461   DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5462   DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5463   DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5464   DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5465
5466   TYPE_FIELDS (va_list_type) = f_stack;
5467   DECL_CHAIN (f_stack) = f_grtop;
5468   DECL_CHAIN (f_grtop) = f_vrtop;
5469   DECL_CHAIN (f_vrtop) = f_groff;
5470   DECL_CHAIN (f_groff) = f_vroff;
5471
5472   /* Compute its layout.  */
5473   layout_type (va_list_type);
5474
5475   return va_list_type;
5476 }
5477
5478 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
5479 static void
5480 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5481 {
5482   const CUMULATIVE_ARGS *cum;
5483   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5484   tree stack, grtop, vrtop, groff, vroff;
5485   tree t;
5486   int gr_save_area_size;
5487   int vr_save_area_size;
5488   int vr_offset;
5489
5490   cum = &crtl->args.info;
5491   gr_save_area_size
5492     = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5493   vr_save_area_size
5494     = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5495
5496   if (TARGET_GENERAL_REGS_ONLY)
5497     {
5498       if (cum->aapcs_nvrn > 0)
5499         sorry ("%qs and floating point or vector arguments",
5500                "-mgeneral-regs-only");
5501       vr_save_area_size = 0;
5502     }
5503
5504   f_stack = TYPE_FIELDS (va_list_type_node);
5505   f_grtop = DECL_CHAIN (f_stack);
5506   f_vrtop = DECL_CHAIN (f_grtop);
5507   f_groff = DECL_CHAIN (f_vrtop);
5508   f_vroff = DECL_CHAIN (f_groff);
5509
5510   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5511                   NULL_TREE);
5512   grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5513                   NULL_TREE);
5514   vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5515                   NULL_TREE);
5516   groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5517                   NULL_TREE);
5518   vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5519                   NULL_TREE);
5520
5521   /* Emit code to initialize STACK, which points to the next varargs stack
5522      argument.  CUM->AAPCS_STACK_SIZE gives the number of stack words used
5523      by named arguments.  STACK is 8-byte aligned.  */
5524   t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5525   if (cum->aapcs_stack_size > 0)
5526     t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5527   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5528   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5529
5530   /* Emit code to initialize GRTOP, the top of the GR save area.
5531      virtual_incoming_args_rtx should have been 16 byte aligned.  */
5532   t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5533   t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5534   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5535
5536   /* Emit code to initialize VRTOP, the top of the VR save area.
5537      This address is gr_save_area_bytes below GRTOP, rounded
5538      down to the next 16-byte boundary.  */
5539   t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5540   vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5541                              STACK_BOUNDARY / BITS_PER_UNIT);
5542
5543   if (vr_offset)
5544     t = fold_build_pointer_plus_hwi (t, -vr_offset);
5545   t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5546   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5547
5548   /* Emit code to initialize GROFF, the offset from GRTOP of the
5549      next GPR argument.  */
5550   t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5551               build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5552   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5553
5554   /* Likewise emit code to initialize VROFF, the offset from FTOP
5555      of the next VR argument.  */
5556   t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5557               build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5558   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5559 }
5560
5561 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
5562
5563 static tree
5564 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5565                               gimple_seq *post_p ATTRIBUTE_UNUSED)
5566 {
5567   tree addr;
5568   bool indirect_p;
5569   bool is_ha;           /* is HFA or HVA.  */
5570   bool dw_align;        /* double-word align.  */
5571   enum machine_mode ag_mode = VOIDmode;
5572   int nregs;
5573   enum machine_mode mode;
5574
5575   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5576   tree stack, f_top, f_off, off, arg, roundup, on_stack;
5577   HOST_WIDE_INT size, rsize, adjust, align;
5578   tree t, u, cond1, cond2;
5579
5580   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5581   if (indirect_p)
5582     type = build_pointer_type (type);
5583
5584   mode = TYPE_MODE (type);
5585
5586   f_stack = TYPE_FIELDS (va_list_type_node);
5587   f_grtop = DECL_CHAIN (f_stack);
5588   f_vrtop = DECL_CHAIN (f_grtop);
5589   f_groff = DECL_CHAIN (f_vrtop);
5590   f_vroff = DECL_CHAIN (f_groff);
5591
5592   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5593                   f_stack, NULL_TREE);
5594   size = int_size_in_bytes (type);
5595   align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5596
5597   dw_align = false;
5598   adjust = 0;
5599   if (aarch64_vfp_is_call_or_return_candidate (mode,
5600                                                type,
5601                                                &ag_mode,
5602                                                &nregs,
5603                                                &is_ha))
5604     {
5605       /* TYPE passed in fp/simd registers.  */
5606       if (TARGET_GENERAL_REGS_ONLY)
5607         sorry ("%qs and floating point or vector arguments",
5608                "-mgeneral-regs-only");
5609
5610       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5611                       unshare_expr (valist), f_vrtop, NULL_TREE);
5612       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5613                       unshare_expr (valist), f_vroff, NULL_TREE);
5614
5615       rsize = nregs * UNITS_PER_VREG;
5616
5617       if (is_ha)
5618         {
5619           if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5620             adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5621         }
5622       else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5623                && size < UNITS_PER_VREG)
5624         {
5625           adjust = UNITS_PER_VREG - size;
5626         }
5627     }
5628   else
5629     {
5630       /* TYPE passed in general registers.  */
5631       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5632                       unshare_expr (valist), f_grtop, NULL_TREE);
5633       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5634                       unshare_expr (valist), f_groff, NULL_TREE);
5635       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5636       nregs = rsize / UNITS_PER_WORD;
5637
5638       if (align > 8)
5639         dw_align = true;
5640
5641       if (BLOCK_REG_PADDING (mode, type, 1) == downward
5642           && size < UNITS_PER_WORD)
5643         {
5644           adjust = UNITS_PER_WORD  - size;
5645         }
5646     }
5647
5648   /* Get a local temporary for the field value.  */
5649   off = get_initialized_tmp_var (f_off, pre_p, NULL);
5650
5651   /* Emit code to branch if off >= 0.  */
5652   t = build2 (GE_EXPR, boolean_type_node, off,
5653               build_int_cst (TREE_TYPE (off), 0));
5654   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5655
5656   if (dw_align)
5657     {
5658       /* Emit: offs = (offs + 15) & -16.  */
5659       t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5660                   build_int_cst (TREE_TYPE (off), 15));
5661       t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5662                   build_int_cst (TREE_TYPE (off), -16));
5663       roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5664     }
5665   else
5666     roundup = NULL;
5667
5668   /* Update ap.__[g|v]r_offs  */
5669   t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5670               build_int_cst (TREE_TYPE (off), rsize));
5671   t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5672
5673   /* String up.  */
5674   if (roundup)
5675     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5676
5677   /* [cond2] if (ap.__[g|v]r_offs > 0)  */
5678   u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5679               build_int_cst (TREE_TYPE (f_off), 0));
5680   cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5681
5682   /* String up: make sure the assignment happens before the use.  */
5683   t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5684   COND_EXPR_ELSE (cond1) = t;
5685
5686   /* Prepare the trees handling the argument that is passed on the stack;
5687      the top level node will store in ON_STACK.  */
5688   arg = get_initialized_tmp_var (stack, pre_p, NULL);
5689   if (align > 8)
5690     {
5691       /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
5692       t = fold_convert (intDI_type_node, arg);
5693       t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5694                   build_int_cst (TREE_TYPE (t), 15));
5695       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5696                   build_int_cst (TREE_TYPE (t), -16));
5697       t = fold_convert (TREE_TYPE (arg), t);
5698       roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5699     }
5700   else
5701     roundup = NULL;
5702   /* Advance ap.__stack  */
5703   t = fold_convert (intDI_type_node, arg);
5704   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5705               build_int_cst (TREE_TYPE (t), size + 7));
5706   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5707               build_int_cst (TREE_TYPE (t), -8));
5708   t = fold_convert (TREE_TYPE (arg), t);
5709   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5710   /* String up roundup and advance.  */
5711   if (roundup)
5712     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5713   /* String up with arg */
5714   on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5715   /* Big-endianness related address adjustment.  */
5716   if (BLOCK_REG_PADDING (mode, type, 1) == downward
5717       && size < UNITS_PER_WORD)
5718   {
5719     t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5720                 size_int (UNITS_PER_WORD - size));
5721     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5722   }
5723
5724   COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5725   COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5726
5727   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
5728   t = off;
5729   if (adjust)
5730     t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5731                 build_int_cst (TREE_TYPE (off), adjust));
5732
5733   t = fold_convert (sizetype, t);
5734   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5735
5736   if (is_ha)
5737     {
5738       /* type ha; // treat as "struct {ftype field[n];}"
5739          ... [computing offs]
5740          for (i = 0; i <nregs; ++i, offs += 16)
5741            ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5742          return ha;  */
5743       int i;
5744       tree tmp_ha, field_t, field_ptr_t;
5745
5746       /* Declare a local variable.  */
5747       tmp_ha = create_tmp_var_raw (type, "ha");
5748       gimple_add_tmp_var (tmp_ha);
5749
5750       /* Establish the base type.  */
5751       switch (ag_mode)
5752         {
5753         case SFmode:
5754           field_t = float_type_node;
5755           field_ptr_t = float_ptr_type_node;
5756           break;
5757         case DFmode:
5758           field_t = double_type_node;
5759           field_ptr_t = double_ptr_type_node;
5760           break;
5761         case TFmode:
5762           field_t = long_double_type_node;
5763           field_ptr_t = long_double_ptr_type_node;
5764           break;
5765 /* The half precision and quad precision are not fully supported yet.  Enable
5766    the following code after the support is complete.  Need to find the correct
5767    type node for __fp16 *.  */
5768 #if 0
5769         case HFmode:
5770           field_t = float_type_node;
5771           field_ptr_t = float_ptr_type_node;
5772           break;
5773 #endif
5774         case V2SImode:
5775         case V4SImode:
5776             {
5777               tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5778               field_t = build_vector_type_for_mode (innertype, ag_mode);
5779               field_ptr_t = build_pointer_type (field_t);
5780             }
5781           break;
5782         default:
5783           gcc_assert (0);
5784         }
5785
5786       /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
5787       tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5788       addr = t;
5789       t = fold_convert (field_ptr_t, addr);
5790       t = build2 (MODIFY_EXPR, field_t,
5791                   build1 (INDIRECT_REF, field_t, tmp_ha),
5792                   build1 (INDIRECT_REF, field_t, t));
5793
5794       /* ha.field[i] = *((field_ptr_t)vr_saved_area + i)  */
5795       for (i = 1; i < nregs; ++i)
5796         {
5797           addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5798           u = fold_convert (field_ptr_t, addr);
5799           u = build2 (MODIFY_EXPR, field_t,
5800                       build2 (MEM_REF, field_t, tmp_ha,
5801                               build_int_cst (field_ptr_t,
5802                                              (i *
5803                                               int_size_in_bytes (field_t)))),
5804                       build1 (INDIRECT_REF, field_t, u));
5805           t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5806         }
5807
5808       u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5809       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5810     }
5811
5812   COND_EXPR_ELSE (cond2) = t;
5813   addr = fold_convert (build_pointer_type (type), cond1);
5814   addr = build_va_arg_indirect_ref (addr);
5815
5816   if (indirect_p)
5817     addr = build_va_arg_indirect_ref (addr);
5818
5819   return addr;
5820 }
5821
5822 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
5823
5824 static void
5825 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5826                                 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5827                                 int no_rtl)
5828 {
5829   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5830   CUMULATIVE_ARGS local_cum;
5831   int gr_saved, vr_saved;
5832
5833   /* The caller has advanced CUM up to, but not beyond, the last named
5834      argument.  Advance a local copy of CUM past the last "real" named
5835      argument, to find out how many registers are left over.  */
5836   local_cum = *cum;
5837   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5838
5839   /* Found out how many registers we need to save.  */
5840   gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5841   vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5842
5843   if (TARGET_GENERAL_REGS_ONLY)
5844     {
5845       if (local_cum.aapcs_nvrn > 0)
5846         sorry ("%qs and floating point or vector arguments",
5847                "-mgeneral-regs-only");
5848       vr_saved = 0;
5849     }
5850
5851   if (!no_rtl)
5852     {
5853       if (gr_saved > 0)
5854         {
5855           rtx ptr, mem;
5856
5857           /* virtual_incoming_args_rtx should have been 16-byte aligned.  */
5858           ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5859                                - gr_saved * UNITS_PER_WORD);
5860           mem = gen_frame_mem (BLKmode, ptr);
5861           set_mem_alias_set (mem, get_varargs_alias_set ());
5862
5863           move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5864                                mem, gr_saved);
5865         }
5866       if (vr_saved > 0)
5867         {
5868           /* We can't use move_block_from_reg, because it will use
5869              the wrong mode, storing D regs only.  */
5870           enum machine_mode mode = TImode;
5871           int off, i;
5872
5873           /* Set OFF to the offset from virtual_incoming_args_rtx of
5874              the first vector register.  The VR save area lies below
5875              the GR one, and is aligned to 16 bytes.  */
5876           off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5877                                    STACK_BOUNDARY / BITS_PER_UNIT);
5878           off -= vr_saved * UNITS_PER_VREG;
5879
5880           for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5881             {
5882               rtx ptr, mem;
5883
5884               ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5885               mem = gen_frame_mem (mode, ptr);
5886               set_mem_alias_set (mem, get_varargs_alias_set ());
5887               aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5888               off += UNITS_PER_VREG;
5889             }
5890         }
5891     }
5892
5893   /* We don't save the size into *PRETEND_SIZE because we want to avoid
5894      any complication of having crtl->args.pretend_args_size changed.  */
5895   cfun->machine->saved_varargs_size
5896     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5897                       STACK_BOUNDARY / BITS_PER_UNIT)
5898        + vr_saved * UNITS_PER_VREG);
5899 }
5900
5901 static void
5902 aarch64_conditional_register_usage (void)
5903 {
5904   int i;
5905   if (!TARGET_FLOAT)
5906     {
5907       for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5908         {
5909           fixed_regs[i] = 1;
5910           call_used_regs[i] = 1;
5911         }
5912     }
5913 }
5914
5915 /* Walk down the type tree of TYPE counting consecutive base elements.
5916    If *MODEP is VOIDmode, then set it to the first valid floating point
5917    type.  If a non-floating point type is found, or if a floating point
5918    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5919    otherwise return the count in the sub-tree.  */
5920 static int
5921 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5922 {
5923   enum machine_mode mode;
5924   HOST_WIDE_INT size;
5925
5926   switch (TREE_CODE (type))
5927     {
5928     case REAL_TYPE:
5929       mode = TYPE_MODE (type);
5930       if (mode != DFmode && mode != SFmode && mode != TFmode)
5931         return -1;
5932
5933       if (*modep == VOIDmode)
5934         *modep = mode;
5935
5936       if (*modep == mode)
5937         return 1;
5938
5939       break;
5940
5941     case COMPLEX_TYPE:
5942       mode = TYPE_MODE (TREE_TYPE (type));
5943       if (mode != DFmode && mode != SFmode && mode != TFmode)
5944         return -1;
5945
5946       if (*modep == VOIDmode)
5947         *modep = mode;
5948
5949       if (*modep == mode)
5950         return 2;
5951
5952       break;
5953
5954     case VECTOR_TYPE:
5955       /* Use V2SImode and V4SImode as representatives of all 64-bit
5956          and 128-bit vector types.  */
5957       size = int_size_in_bytes (type);
5958       switch (size)
5959         {
5960         case 8:
5961           mode = V2SImode;
5962           break;
5963         case 16:
5964           mode = V4SImode;
5965           break;
5966         default:
5967           return -1;
5968         }
5969
5970       if (*modep == VOIDmode)
5971         *modep = mode;
5972
5973       /* Vector modes are considered to be opaque: two vectors are
5974          equivalent for the purposes of being homogeneous aggregates
5975          if they are the same size.  */
5976       if (*modep == mode)
5977         return 1;
5978
5979       break;
5980
5981     case ARRAY_TYPE:
5982       {
5983         int count;
5984         tree index = TYPE_DOMAIN (type);
5985
5986         /* Can't handle incomplete types.  */
5987         if (!COMPLETE_TYPE_P (type))
5988           return -1;
5989
5990         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5991         if (count == -1
5992             || !index
5993             || !TYPE_MAX_VALUE (index)
5994             || !host_integerp (TYPE_MAX_VALUE (index), 1)
5995             || !TYPE_MIN_VALUE (index)
5996             || !host_integerp (TYPE_MIN_VALUE (index), 1)
5997             || count < 0)
5998           return -1;
5999
6000         count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
6001                       - tree_low_cst (TYPE_MIN_VALUE (index), 1));
6002
6003         /* There must be no padding.  */
6004         if (!host_integerp (TYPE_SIZE (type), 1)
6005             || (tree_low_cst (TYPE_SIZE (type), 1)
6006                 != count * GET_MODE_BITSIZE (*modep)))
6007           return -1;
6008
6009         return count;
6010       }
6011
6012     case RECORD_TYPE:
6013       {
6014         int count = 0;
6015         int sub_count;
6016         tree field;
6017
6018         /* Can't handle incomplete types.  */
6019         if (!COMPLETE_TYPE_P (type))
6020           return -1;
6021
6022         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6023           {
6024             if (TREE_CODE (field) != FIELD_DECL)
6025               continue;
6026
6027             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6028             if (sub_count < 0)
6029               return -1;
6030             count += sub_count;
6031           }
6032
6033         /* There must be no padding.  */
6034         if (!host_integerp (TYPE_SIZE (type), 1)
6035             || (tree_low_cst (TYPE_SIZE (type), 1)
6036                 != count * GET_MODE_BITSIZE (*modep)))
6037           return -1;
6038
6039         return count;
6040       }
6041
6042     case UNION_TYPE:
6043     case QUAL_UNION_TYPE:
6044       {
6045         /* These aren't very interesting except in a degenerate case.  */
6046         int count = 0;
6047         int sub_count;
6048         tree field;
6049
6050         /* Can't handle incomplete types.  */
6051         if (!COMPLETE_TYPE_P (type))
6052           return -1;
6053
6054         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6055           {
6056             if (TREE_CODE (field) != FIELD_DECL)
6057               continue;
6058
6059             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6060             if (sub_count < 0)
6061               return -1;
6062             count = count > sub_count ? count : sub_count;
6063           }
6064
6065         /* There must be no padding.  */
6066         if (!host_integerp (TYPE_SIZE (type), 1)
6067             || (tree_low_cst (TYPE_SIZE (type), 1)
6068                 != count * GET_MODE_BITSIZE (*modep)))
6069           return -1;
6070
6071         return count;
6072       }
6073
6074     default:
6075       break;
6076     }
6077
6078   return -1;
6079 }
6080
6081 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6082    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
6083    array types.  The C99 floating-point complex types are also considered
6084    as composite types, according to AAPCS64 \S 7.1.1.  The complex integer
6085    types, which are GCC extensions and out of the scope of AAPCS64, are
6086    treated as composite types here as well.
6087
6088    Note that MODE itself is not sufficient in determining whether a type
6089    is such a composite type or not.  This is because
6090    stor-layout.c:compute_record_mode may have already changed the MODE
6091    (BLKmode) of a RECORD_TYPE TYPE to some other mode.  For example, a
6092    structure with only one field may have its MODE set to the mode of the
6093    field.  Also an integer mode whose size matches the size of the
6094    RECORD_TYPE type may be used to substitute the original mode
6095    (i.e. BLKmode) in certain circumstances.  In other words, MODE cannot be
6096    solely relied on.  */
6097
6098 static bool
6099 aarch64_composite_type_p (const_tree type,
6100                           enum machine_mode mode)
6101 {
6102   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6103     return true;
6104
6105   if (mode == BLKmode
6106       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6107       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6108     return true;
6109
6110   return false;
6111 }
6112
6113 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6114    type as described in AAPCS64 \S 4.1.2.
6115
6116    See the comment above aarch64_composite_type_p for the notes on MODE.  */
6117
6118 static bool
6119 aarch64_short_vector_p (const_tree type,
6120                         enum machine_mode mode)
6121 {
6122   HOST_WIDE_INT size = -1;
6123
6124   if (type && TREE_CODE (type) == VECTOR_TYPE)
6125     size = int_size_in_bytes (type);
6126   else if (!aarch64_composite_type_p (type, mode)
6127            && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6128                || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6129     size = GET_MODE_SIZE (mode);
6130
6131   return (size == 8 || size == 16) ? true : false;
6132 }
6133
6134 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6135    shall be passed or returned in simd/fp register(s) (providing these
6136    parameter passing registers are available).
6137
6138    Upon successful return, *COUNT returns the number of needed registers,
6139    *BASE_MODE returns the mode of the individual register and when IS_HAF
6140    is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6141    floating-point aggregate or a homogeneous short-vector aggregate.  */
6142
6143 static bool
6144 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6145                                          const_tree type,
6146                                          enum machine_mode *base_mode,
6147                                          int *count,
6148                                          bool *is_ha)
6149 {
6150   enum machine_mode new_mode = VOIDmode;
6151   bool composite_p = aarch64_composite_type_p (type, mode);
6152
6153   if (is_ha != NULL) *is_ha = false;
6154
6155   if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6156       || aarch64_short_vector_p (type, mode))
6157     {
6158       *count = 1;
6159       new_mode = mode;
6160     }
6161   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6162     {
6163       if (is_ha != NULL) *is_ha = true;
6164       *count = 2;
6165       new_mode = GET_MODE_INNER (mode);
6166     }
6167   else if (type && composite_p)
6168     {
6169       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6170
6171       if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6172         {
6173           if (is_ha != NULL) *is_ha = true;
6174           *count = ag_count;
6175         }
6176       else
6177         return false;
6178     }
6179   else
6180     return false;
6181
6182   *base_mode = new_mode;
6183   return true;
6184 }
6185
6186 /* Implement TARGET_STRUCT_VALUE_RTX.  */
6187
6188 static rtx
6189 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6190                           int incoming ATTRIBUTE_UNUSED)
6191 {
6192   return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6193 }
6194
6195 /* Implements target hook vector_mode_supported_p.  */
6196 static bool
6197 aarch64_vector_mode_supported_p (enum machine_mode mode)
6198 {
6199   if (TARGET_SIMD
6200       && (mode == V4SImode  || mode == V8HImode
6201           || mode == V16QImode || mode == V2DImode
6202           || mode == V2SImode  || mode == V4HImode
6203           || mode == V8QImode || mode == V2SFmode
6204           || mode == V4SFmode || mode == V2DFmode))
6205     return true;
6206
6207   return false;
6208 }
6209
6210 /* Return appropriate SIMD container
6211    for MODE within a vector of WIDTH bits.  */
6212 static enum machine_mode
6213 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6214 {
6215   gcc_assert (width == 64 || width == 128);
6216   if (TARGET_SIMD)
6217     {
6218       if (width == 128)
6219         switch (mode)
6220           {
6221           case DFmode:
6222             return V2DFmode;
6223           case SFmode:
6224             return V4SFmode;
6225           case SImode:
6226             return V4SImode;
6227           case HImode:
6228             return V8HImode;
6229           case QImode:
6230             return V16QImode;
6231           case DImode:
6232             return V2DImode;
6233           default:
6234             break;
6235           }
6236       else
6237         switch (mode)
6238           {
6239           case SFmode:
6240             return V2SFmode;
6241           case SImode:
6242             return V2SImode;
6243           case HImode:
6244             return V4HImode;
6245           case QImode:
6246             return V8QImode;
6247           default:
6248             break;
6249           }
6250     }
6251   return word_mode;
6252 }
6253
6254 /* Return 128-bit container as the preferred SIMD mode for MODE.  */
6255 static enum machine_mode
6256 aarch64_preferred_simd_mode (enum machine_mode mode)
6257 {
6258   return aarch64_simd_container_mode (mode, 128);
6259 }
6260
6261 /* Return the bitmask of possible vector sizes for the vectorizer
6262    to iterate over.  */
6263 static unsigned int
6264 aarch64_autovectorize_vector_sizes (void)
6265 {
6266   return (16 | 8);
6267 }
6268
6269 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6270    vector types in order to conform to the AAPCS64 (see "Procedure
6271    Call Standard for the ARM 64-bit Architecture", Appendix A).  To
6272    qualify for emission with the mangled names defined in that document,
6273    a vector type must not only be of the correct mode but also be
6274    composed of AdvSIMD vector element types (e.g.
6275    _builtin_aarch64_simd_qi); these types are registered by
6276    aarch64_init_simd_builtins ().  In other words, vector types defined
6277    in other ways e.g. via vector_size attribute will get default
6278    mangled names.  */
6279 typedef struct
6280 {
6281   enum machine_mode mode;
6282   const char *element_type_name;
6283   const char *mangled_name;
6284 } aarch64_simd_mangle_map_entry;
6285
6286 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6287   /* 64-bit containerized types.  */
6288   { V8QImode,  "__builtin_aarch64_simd_qi",     "10__Int8x8_t" },
6289   { V8QImode,  "__builtin_aarch64_simd_uqi",    "11__Uint8x8_t" },
6290   { V4HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x4_t" },
6291   { V4HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x4_t" },
6292   { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
6293   { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
6294   { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
6295   { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
6296   { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6297   /* 128-bit containerized types.  */
6298   { V16QImode, "__builtin_aarch64_simd_qi",     "11__Int8x16_t" },
6299   { V16QImode, "__builtin_aarch64_simd_uqi",    "12__Uint8x16_t" },
6300   { V8HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x8_t" },
6301   { V8HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x8_t" },
6302   { V4SImode,  "__builtin_aarch64_simd_si",     "11__Int32x4_t" },
6303   { V4SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x4_t" },
6304   { V2DImode,  "__builtin_aarch64_simd_di",     "11__Int64x2_t" },
6305   { V2DImode,  "__builtin_aarch64_simd_udi",    "12__Uint64x2_t" },
6306   { V4SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x4_t" },
6307   { V2DFmode,  "__builtin_aarch64_simd_df",     "13__Float64x2_t" },
6308   { V16QImode, "__builtin_aarch64_simd_poly8",  "12__Poly8x16_t" },
6309   { V8HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6310   { VOIDmode, NULL, NULL }
6311 };
6312
6313 /* Implement TARGET_MANGLE_TYPE.  */
6314
6315 static const char *
6316 aarch64_mangle_type (const_tree type)
6317 {
6318   /* The AArch64 ABI documents say that "__va_list" has to be
6319      managled as if it is in the "std" namespace.  */
6320   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6321     return "St9__va_list";
6322
6323   /* Check the mode of the vector type, and the name of the vector
6324      element type, against the table.  */
6325   if (TREE_CODE (type) == VECTOR_TYPE)
6326     {
6327       aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6328
6329       while (pos->mode != VOIDmode)
6330         {
6331           tree elt_type = TREE_TYPE (type);
6332
6333           if (pos->mode == TYPE_MODE (type)
6334               && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6335               && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6336                           pos->element_type_name))
6337             return pos->mangled_name;
6338
6339           pos++;
6340         }
6341     }
6342
6343   /* Use the default mangling.  */
6344   return NULL;
6345 }
6346
6347 /* Return the equivalent letter for size.  */
6348 static char
6349 sizetochar (int size)
6350 {
6351   switch (size)
6352     {
6353     case 64: return 'd';
6354     case 32: return 's';
6355     case 16: return 'h';
6356     case 8 : return 'b';
6357     default: gcc_unreachable ();
6358     }
6359 }
6360
6361 /* Return true iff x is a uniform vector of floating-point
6362    constants, and the constant can be represented in
6363    quarter-precision form.  Note, as aarch64_float_const_representable
6364    rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
6365 static bool
6366 aarch64_vect_float_const_representable_p (rtx x)
6367 {
6368   int i = 0;
6369   REAL_VALUE_TYPE r0, ri;
6370   rtx x0, xi;
6371
6372   if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6373     return false;
6374
6375   x0 = CONST_VECTOR_ELT (x, 0);
6376   if (!CONST_DOUBLE_P (x0))
6377     return false;
6378
6379   REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6380
6381   for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6382     {
6383       xi = CONST_VECTOR_ELT (x, i);
6384       if (!CONST_DOUBLE_P (xi))
6385         return false;
6386
6387       REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6388       if (!REAL_VALUES_EQUAL (r0, ri))
6389         return false;
6390     }
6391
6392   return aarch64_float_const_representable_p (x0);
6393 }
6394
6395 /* Return true for valid and false for invalid.  */
6396 bool
6397 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6398                               struct simd_immediate_info *info)
6399 {
6400 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)  \
6401   matches = 1;                                          \
6402   for (i = 0; i < idx; i += (STRIDE))                   \
6403     if (!(TEST))                                        \
6404       matches = 0;                                      \
6405   if (matches)                                          \
6406     {                                                   \
6407       immtype = (CLASS);                                \
6408       elsize = (ELSIZE);                                \
6409       eshift = (SHIFT);                                 \
6410       emvn = (NEG);                                     \
6411       break;                                            \
6412     }
6413
6414   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6415   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6416   unsigned char bytes[16];
6417   int immtype = -1, matches;
6418   unsigned int invmask = inverse ? 0xff : 0;
6419   int eshift, emvn;
6420
6421   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6422     {
6423       if (! (aarch64_simd_imm_zero_p (op, mode)
6424              || aarch64_vect_float_const_representable_p (op)))
6425         return false;
6426
6427       if (info)
6428         {
6429           info->value = CONST_VECTOR_ELT (op, 0);
6430           info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
6431           info->mvn = false;
6432           info->shift = 0;
6433         }
6434
6435       return true;
6436     }
6437
6438   /* Splat vector constant out into a byte vector.  */
6439   for (i = 0; i < n_elts; i++)
6440     {
6441       rtx el = CONST_VECTOR_ELT (op, i);
6442       unsigned HOST_WIDE_INT elpart;
6443       unsigned int part, parts;
6444
6445       if (GET_CODE (el) == CONST_INT)
6446         {
6447           elpart = INTVAL (el);
6448           parts = 1;
6449         }
6450       else if (GET_CODE (el) == CONST_DOUBLE)
6451         {
6452           elpart = CONST_DOUBLE_LOW (el);
6453           parts = 2;
6454         }
6455       else
6456         gcc_unreachable ();
6457
6458       for (part = 0; part < parts; part++)
6459         {
6460           unsigned int byte;
6461           for (byte = 0; byte < innersize; byte++)
6462             {
6463               bytes[idx++] = (elpart & 0xff) ^ invmask;
6464               elpart >>= BITS_PER_UNIT;
6465             }
6466           if (GET_CODE (el) == CONST_DOUBLE)
6467             elpart = CONST_DOUBLE_HIGH (el);
6468         }
6469     }
6470
6471   /* Sanity check.  */
6472   gcc_assert (idx == GET_MODE_SIZE (mode));
6473
6474   do
6475     {
6476       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6477              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6478
6479       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6480              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6481
6482       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6483              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6484
6485       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6486              && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6487
6488       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6489
6490       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6491
6492       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6493              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6494
6495       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6496              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6497
6498       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6499              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6500
6501       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6502              && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6503
6504       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6505
6506       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6507
6508       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6509              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6510
6511       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6512              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6513
6514       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6515              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6516
6517       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6518              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6519
6520       CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6521
6522       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6523              && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6524     }
6525   while (0);
6526
6527   if (immtype == -1)
6528     return false;
6529
6530   if (info)
6531     {
6532       info->element_width = elsize;
6533       info->mvn = emvn != 0;
6534       info->shift = eshift;
6535
6536       unsigned HOST_WIDE_INT imm = 0;
6537
6538       if (immtype >= 12 && immtype <= 15)
6539         info->msl = true;
6540
6541       /* Un-invert bytes of recognized vector, if necessary.  */
6542       if (invmask != 0)
6543         for (i = 0; i < idx; i++)
6544           bytes[i] ^= invmask;
6545
6546       if (immtype == 17)
6547         {
6548           /* FIXME: Broken on 32-bit H_W_I hosts.  */
6549           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6550
6551           for (i = 0; i < 8; i++)
6552             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6553               << (i * BITS_PER_UNIT);
6554
6555
6556           info->value = GEN_INT (imm);
6557         }
6558       else
6559         {
6560           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6561             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6562
6563           /* Construct 'abcdefgh' because the assembler cannot handle
6564              generic constants.  */
6565           if (info->mvn)
6566             imm = ~imm;
6567           imm = (imm >> info->shift) & 0xff;
6568           info->value = GEN_INT (imm);
6569         }
6570     }
6571
6572   return true;
6573 #undef CHECK
6574 }
6575
6576 static bool
6577 aarch64_const_vec_all_same_int_p (rtx x,
6578                                   HOST_WIDE_INT minval,
6579                                   HOST_WIDE_INT maxval)
6580 {
6581   HOST_WIDE_INT firstval;
6582   int count, i;
6583
6584   if (GET_CODE (x) != CONST_VECTOR
6585       || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6586     return false;
6587
6588   firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6589   if (firstval < minval || firstval > maxval)
6590     return false;
6591
6592   count = CONST_VECTOR_NUNITS (x);
6593   for (i = 1; i < count; i++)
6594     if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6595       return false;
6596
6597   return true;
6598 }
6599
6600 /* Check of immediate shift constants are within range.  */
6601 bool
6602 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6603 {
6604   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6605   if (left)
6606     return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6607   else
6608     return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6609 }
6610
6611 /* Return true if X is a uniform vector where all elements
6612    are either the floating-point constant 0.0 or the
6613    integer constant 0.  */
6614 bool
6615 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6616 {
6617   return x == CONST0_RTX (mode);
6618 }
6619
6620 bool
6621 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6622 {
6623   HOST_WIDE_INT imm = INTVAL (x);
6624   int i;
6625
6626   for (i = 0; i < 8; i++)
6627     {
6628       unsigned int byte = imm & 0xff;
6629       if (byte != 0xff && byte != 0)
6630        return false;
6631       imm >>= 8;
6632     }
6633
6634   return true;
6635 }
6636
6637 bool
6638 aarch64_mov_operand_p (rtx x,
6639                        enum aarch64_symbol_context context,
6640                        enum machine_mode mode)
6641 {
6642   if (GET_CODE (x) == HIGH
6643       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6644     return true;
6645
6646   if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
6647     return true;
6648
6649   if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
6650     return true;
6651
6652   return aarch64_classify_symbolic_expression (x, context)
6653     == SYMBOL_TINY_ABSOLUTE;
6654 }
6655
6656 /* Return a const_int vector of VAL.  */
6657 rtx
6658 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6659 {
6660   int nunits = GET_MODE_NUNITS (mode);
6661   rtvec v = rtvec_alloc (nunits);
6662   int i;
6663
6664   for (i=0; i < nunits; i++)
6665     RTVEC_ELT (v, i) = GEN_INT (val);
6666
6667   return gen_rtx_CONST_VECTOR (mode, v);
6668 }
6669
6670 /* Check OP is a legal scalar immediate for the MOVI instruction.  */
6671
6672 bool
6673 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6674 {
6675   enum machine_mode vmode;
6676
6677   gcc_assert (!VECTOR_MODE_P (mode));
6678   vmode = aarch64_preferred_simd_mode (mode);
6679   rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
6680   return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
6681 }
6682
6683 /* Construct and return a PARALLEL RTX vector.  */
6684 rtx
6685 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6686 {
6687   int nunits = GET_MODE_NUNITS (mode);
6688   rtvec v = rtvec_alloc (nunits / 2);
6689   int base = high ? nunits / 2 : 0;
6690   rtx t1;
6691   int i;
6692
6693   for (i=0; i < nunits / 2; i++)
6694     RTVEC_ELT (v, i) = GEN_INT (base + i);
6695
6696   t1 = gen_rtx_PARALLEL (mode, v);
6697   return t1;
6698 }
6699
6700 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
6701    HIGH (exclusive).  */
6702 void
6703 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6704 {
6705   HOST_WIDE_INT lane;
6706   gcc_assert (GET_CODE (operand) == CONST_INT);
6707   lane = INTVAL (operand);
6708
6709   if (lane < low || lane >= high)
6710     error ("lane out of range");
6711 }
6712
6713 void
6714 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6715 {
6716   gcc_assert (GET_CODE (operand) == CONST_INT);
6717   HOST_WIDE_INT lane = INTVAL (operand);
6718
6719   if (lane < low || lane >= high)
6720     error ("constant out of range");
6721 }
6722
6723 /* Emit code to reinterpret one AdvSIMD type as another,
6724    without altering bits.  */
6725 void
6726 aarch64_simd_reinterpret (rtx dest, rtx src)
6727 {
6728   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6729 }
6730
6731 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6732    registers).  */
6733 void
6734 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6735                             rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6736                             rtx op1)
6737 {
6738   rtx mem = gen_rtx_MEM (mode, destaddr);
6739   rtx tmp1 = gen_reg_rtx (mode);
6740   rtx tmp2 = gen_reg_rtx (mode);
6741
6742   emit_insn (intfn (tmp1, op1, tmp2));
6743
6744   emit_move_insn (mem, tmp1);
6745   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6746   emit_move_insn (mem, tmp2);
6747 }
6748
6749 /* Return TRUE if OP is a valid vector addressing mode.  */
6750 bool
6751 aarch64_simd_mem_operand_p (rtx op)
6752 {
6753   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6754                         || GET_CODE (XEXP (op, 0)) == REG);
6755 }
6756
6757 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6758    not to early-clobber SRC registers in the process.
6759
6760    We assume that the operands described by SRC and DEST represent a
6761    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
6762    number of components into which the copy has been decomposed.  */
6763 void
6764 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6765                                 rtx *src, unsigned int count)
6766 {
6767   unsigned int i;
6768
6769   if (!reg_overlap_mentioned_p (operands[0], operands[1])
6770       || REGNO (operands[0]) < REGNO (operands[1]))
6771     {
6772       for (i = 0; i < count; i++)
6773         {
6774           operands[2 * i] = dest[i];
6775           operands[2 * i + 1] = src[i];
6776         }
6777     }
6778   else
6779     {
6780       for (i = 0; i < count; i++)
6781         {
6782           operands[2 * i] = dest[count - i - 1];
6783           operands[2 * i + 1] = src[count - i - 1];
6784         }
6785     }
6786 }
6787
6788 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6789    one of VSTRUCT modes: OI, CI or XI.  */
6790 int
6791 aarch64_simd_attr_length_move (rtx insn)
6792 {
6793   enum machine_mode mode;
6794
6795   extract_insn_cached (insn);
6796
6797   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6798     {
6799       mode = GET_MODE (recog_data.operand[0]);
6800       switch (mode)
6801         {
6802         case OImode:
6803           return 8;
6804         case CImode:
6805           return 12;
6806         case XImode:
6807           return 16;
6808         default:
6809           gcc_unreachable ();
6810         }
6811     }
6812   return 4;
6813 }
6814
6815 /* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
6816    alignment of a vector to 128 bits.  */
6817 static HOST_WIDE_INT
6818 aarch64_simd_vector_alignment (const_tree type)
6819 {
6820   HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
6821   return MIN (align, 128);
6822 }
6823
6824 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
6825 static bool
6826 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6827 {
6828   if (is_packed)
6829     return false;
6830
6831   /* We guarantee alignment for vectors up to 128-bits.  */
6832   if (tree_int_cst_compare (TYPE_SIZE (type),
6833                             bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6834     return false;
6835
6836   /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
6837   return true;
6838 }
6839
6840 /* If VALS is a vector constant that can be loaded into a register
6841    using DUP, generate instructions to do so and return an RTX to
6842    assign to the register.  Otherwise return NULL_RTX.  */
6843 static rtx
6844 aarch64_simd_dup_constant (rtx vals)
6845 {
6846   enum machine_mode mode = GET_MODE (vals);
6847   enum machine_mode inner_mode = GET_MODE_INNER (mode);
6848   int n_elts = GET_MODE_NUNITS (mode);
6849   bool all_same = true;
6850   rtx x;
6851   int i;
6852
6853   if (GET_CODE (vals) != CONST_VECTOR)
6854     return NULL_RTX;
6855
6856   for (i = 1; i < n_elts; ++i)
6857     {
6858       x = CONST_VECTOR_ELT (vals, i);
6859       if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6860         all_same = false;
6861     }
6862
6863   if (!all_same)
6864     return NULL_RTX;
6865
6866   /* We can load this constant by using DUP and a constant in a
6867      single ARM register.  This will be cheaper than a vector
6868      load.  */
6869   x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6870   return gen_rtx_VEC_DUPLICATE (mode, x);
6871 }
6872
6873
6874 /* Generate code to load VALS, which is a PARALLEL containing only
6875    constants (for vec_init) or CONST_VECTOR, efficiently into a
6876    register.  Returns an RTX to copy into the register, or NULL_RTX
6877    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
6878 static rtx
6879 aarch64_simd_make_constant (rtx vals)
6880 {
6881   enum machine_mode mode = GET_MODE (vals);
6882   rtx const_dup;
6883   rtx const_vec = NULL_RTX;
6884   int n_elts = GET_MODE_NUNITS (mode);
6885   int n_const = 0;
6886   int i;
6887
6888   if (GET_CODE (vals) == CONST_VECTOR)
6889     const_vec = vals;
6890   else if (GET_CODE (vals) == PARALLEL)
6891     {
6892       /* A CONST_VECTOR must contain only CONST_INTs and
6893          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6894          Only store valid constants in a CONST_VECTOR.  */
6895       for (i = 0; i < n_elts; ++i)
6896         {
6897           rtx x = XVECEXP (vals, 0, i);
6898           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6899             n_const++;
6900         }
6901       if (n_const == n_elts)
6902         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6903     }
6904   else
6905     gcc_unreachable ();
6906
6907   if (const_vec != NULL_RTX
6908       && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
6909     /* Load using MOVI/MVNI.  */
6910     return const_vec;
6911   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6912     /* Loaded using DUP.  */
6913     return const_dup;
6914   else if (const_vec != NULL_RTX)
6915     /* Load from constant pool. We can not take advantage of single-cycle
6916        LD1 because we need a PC-relative addressing mode.  */
6917     return const_vec;
6918   else
6919     /* A PARALLEL containing something not valid inside CONST_VECTOR.
6920        We can not construct an initializer.  */
6921     return NULL_RTX;
6922 }
6923
6924 void
6925 aarch64_expand_vector_init (rtx target, rtx vals)
6926 {
6927   enum machine_mode mode = GET_MODE (target);
6928   enum machine_mode inner_mode = GET_MODE_INNER (mode);
6929   int n_elts = GET_MODE_NUNITS (mode);
6930   int n_var = 0, one_var = -1;
6931   bool all_same = true;
6932   rtx x, mem;
6933   int i;
6934
6935   x = XVECEXP (vals, 0, 0);
6936   if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6937     n_var = 1, one_var = 0;
6938
6939   for (i = 1; i < n_elts; ++i)
6940     {
6941       x = XVECEXP (vals, 0, i);
6942       if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6943         ++n_var, one_var = i;
6944
6945       if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6946         all_same = false;
6947     }
6948
6949   if (n_var == 0)
6950     {
6951       rtx constant = aarch64_simd_make_constant (vals);
6952       if (constant != NULL_RTX)
6953         {
6954           emit_move_insn (target, constant);
6955           return;
6956         }
6957     }
6958
6959   /* Splat a single non-constant element if we can.  */
6960   if (all_same)
6961     {
6962       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
6963       aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
6964       return;
6965     }
6966
6967   /* One field is non-constant.  Load constant then overwrite varying
6968      field.  This is more efficient than using the stack.  */
6969   if (n_var == 1)
6970     {
6971       rtx copy = copy_rtx (vals);
6972       rtx index = GEN_INT (one_var);
6973       enum insn_code icode;
6974
6975       /* Load constant part of vector, substitute neighboring value for
6976          varying element.  */
6977       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
6978       aarch64_expand_vector_init (target, copy);
6979
6980       /* Insert variable.  */
6981       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
6982       icode = optab_handler (vec_set_optab, mode);
6983       gcc_assert (icode != CODE_FOR_nothing);
6984       emit_insn (GEN_FCN (icode) (target, x, index));
6985       return;
6986     }
6987
6988   /* Construct the vector in memory one field at a time
6989      and load the whole vector.  */
6990   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6991   for (i = 0; i < n_elts; i++)
6992     emit_move_insn (adjust_address_nv (mem, inner_mode,
6993                                     i * GET_MODE_SIZE (inner_mode)),
6994                     XVECEXP (vals, 0, i));
6995   emit_move_insn (target, mem);
6996
6997 }
6998
6999 static unsigned HOST_WIDE_INT
7000 aarch64_shift_truncation_mask (enum machine_mode mode)
7001 {
7002   return
7003     (aarch64_vector_mode_supported_p (mode)
7004      || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7005 }
7006
7007 #ifndef TLS_SECTION_ASM_FLAG
7008 #define TLS_SECTION_ASM_FLAG 'T'
7009 #endif
7010
7011 void
7012 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7013                                tree decl ATTRIBUTE_UNUSED)
7014 {
7015   char flagchars[10], *f = flagchars;
7016
7017   /* If we have already declared this section, we can use an
7018      abbreviated form to switch back to it -- unless this section is
7019      part of a COMDAT groups, in which case GAS requires the full
7020      declaration every time.  */
7021   if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7022       && (flags & SECTION_DECLARED))
7023     {
7024       fprintf (asm_out_file, "\t.section\t%s\n", name);
7025       return;
7026     }
7027
7028   if (!(flags & SECTION_DEBUG))
7029     *f++ = 'a';
7030   if (flags & SECTION_WRITE)
7031     *f++ = 'w';
7032   if (flags & SECTION_CODE)
7033     *f++ = 'x';
7034   if (flags & SECTION_SMALL)
7035     *f++ = 's';
7036   if (flags & SECTION_MERGE)
7037     *f++ = 'M';
7038   if (flags & SECTION_STRINGS)
7039     *f++ = 'S';
7040   if (flags & SECTION_TLS)
7041     *f++ = TLS_SECTION_ASM_FLAG;
7042   if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7043     *f++ = 'G';
7044   *f = '\0';
7045
7046   fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7047
7048   if (!(flags & SECTION_NOTYPE))
7049     {
7050       const char *type;
7051       const char *format;
7052
7053       if (flags & SECTION_BSS)
7054         type = "nobits";
7055       else
7056         type = "progbits";
7057
7058 #ifdef TYPE_OPERAND_FMT
7059       format = "," TYPE_OPERAND_FMT;
7060 #else
7061       format = ",@%s";
7062 #endif
7063
7064       fprintf (asm_out_file, format, type);
7065
7066       if (flags & SECTION_ENTSIZE)
7067         fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7068       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7069         {
7070           if (TREE_CODE (decl) == IDENTIFIER_NODE)
7071             fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7072           else
7073             fprintf (asm_out_file, ",%s,comdat",
7074                      IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7075         }
7076     }
7077
7078   putc ('\n', asm_out_file);
7079 }
7080
7081 /* Select a format to encode pointers in exception handling data.  */
7082 int
7083 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7084 {
7085    int type;
7086    switch (aarch64_cmodel)
7087      {
7088      case AARCH64_CMODEL_TINY:
7089      case AARCH64_CMODEL_TINY_PIC:
7090      case AARCH64_CMODEL_SMALL:
7091      case AARCH64_CMODEL_SMALL_PIC:
7092        /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
7093           for everything.  */
7094        type = DW_EH_PE_sdata4;
7095        break;
7096      default:
7097        /* No assumptions here.  8-byte relocs required.  */
7098        type = DW_EH_PE_sdata8;
7099        break;
7100      }
7101    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7102 }
7103
7104 /* Emit load exclusive.  */
7105
7106 static void
7107 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7108                              rtx mem, rtx model_rtx)
7109 {
7110   rtx (*gen) (rtx, rtx, rtx);
7111
7112   switch (mode)
7113     {
7114     case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7115     case HImode: gen = gen_aarch64_load_exclusivehi; break;
7116     case SImode: gen = gen_aarch64_load_exclusivesi; break;
7117     case DImode: gen = gen_aarch64_load_exclusivedi; break;
7118     default:
7119       gcc_unreachable ();
7120     }
7121
7122   emit_insn (gen (rval, mem, model_rtx));
7123 }
7124
7125 /* Emit store exclusive.  */
7126
7127 static void
7128 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7129                               rtx rval, rtx mem, rtx model_rtx)
7130 {
7131   rtx (*gen) (rtx, rtx, rtx, rtx);
7132
7133   switch (mode)
7134     {
7135     case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7136     case HImode: gen = gen_aarch64_store_exclusivehi; break;
7137     case SImode: gen = gen_aarch64_store_exclusivesi; break;
7138     case DImode: gen = gen_aarch64_store_exclusivedi; break;
7139     default:
7140       gcc_unreachable ();
7141     }
7142
7143   emit_insn (gen (bval, rval, mem, model_rtx));
7144 }
7145
7146 /* Mark the previous jump instruction as unlikely.  */
7147
7148 static void
7149 aarch64_emit_unlikely_jump (rtx insn)
7150 {
7151   rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
7152
7153   insn = emit_jump_insn (insn);
7154   add_reg_note (insn, REG_BR_PROB, very_unlikely);
7155 }
7156
7157 /* Expand a compare and swap pattern.  */
7158
7159 void
7160 aarch64_expand_compare_and_swap (rtx operands[])
7161 {
7162   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7163   enum machine_mode mode, cmp_mode;
7164   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7165
7166   bval = operands[0];
7167   rval = operands[1];
7168   mem = operands[2];
7169   oldval = operands[3];
7170   newval = operands[4];
7171   is_weak = operands[5];
7172   mod_s = operands[6];
7173   mod_f = operands[7];
7174   mode = GET_MODE (mem);
7175   cmp_mode = mode;
7176
7177   /* Normally the succ memory model must be stronger than fail, but in the
7178      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7179      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
7180
7181   if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7182       && INTVAL (mod_s) == MEMMODEL_RELEASE)
7183     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7184
7185   switch (mode)
7186     {
7187     case QImode:
7188     case HImode:
7189       /* For short modes, we're going to perform the comparison in SImode,
7190          so do the zero-extension now.  */
7191       cmp_mode = SImode;
7192       rval = gen_reg_rtx (SImode);
7193       oldval = convert_modes (SImode, mode, oldval, true);
7194       /* Fall through.  */
7195
7196     case SImode:
7197     case DImode:
7198       /* Force the value into a register if needed.  */
7199       if (!aarch64_plus_operand (oldval, mode))
7200         oldval = force_reg (cmp_mode, oldval);
7201       break;
7202
7203     default:
7204       gcc_unreachable ();
7205     }
7206
7207   switch (mode)
7208     {
7209     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7210     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7211     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7212     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7213     default:
7214       gcc_unreachable ();
7215     }
7216
7217   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7218
7219   if (mode == QImode || mode == HImode)
7220     emit_move_insn (operands[1], gen_lowpart (mode, rval));
7221
7222   x = gen_rtx_REG (CCmode, CC_REGNUM);
7223   x = gen_rtx_EQ (SImode, x, const0_rtx);
7224   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7225 }
7226
7227 /* Split a compare and swap pattern.  */
7228
7229 void
7230 aarch64_split_compare_and_swap (rtx operands[])
7231 {
7232   rtx rval, mem, oldval, newval, scratch;
7233   enum machine_mode mode;
7234   bool is_weak;
7235   rtx label1, label2, x, cond;
7236
7237   rval = operands[0];
7238   mem = operands[1];
7239   oldval = operands[2];
7240   newval = operands[3];
7241   is_weak = (operands[4] != const0_rtx);
7242   scratch = operands[7];
7243   mode = GET_MODE (mem);
7244
7245   label1 = NULL_RTX;
7246   if (!is_weak)
7247     {
7248       label1 = gen_label_rtx ();
7249       emit_label (label1);
7250     }
7251   label2 = gen_label_rtx ();
7252
7253   aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7254
7255   cond = aarch64_gen_compare_reg (NE, rval, oldval);
7256   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7257   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7258                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7259   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7260
7261   aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7262
7263   if (!is_weak)
7264     {
7265       x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7266       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7267                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7268       aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7269     }
7270   else
7271     {
7272       cond = gen_rtx_REG (CCmode, CC_REGNUM);
7273       x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7274       emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7275     }
7276
7277   emit_label (label2);
7278 }
7279
7280 /* Split an atomic operation.  */
7281
7282 void
7283 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7284                      rtx value, rtx model_rtx, rtx cond)
7285 {
7286   enum machine_mode mode = GET_MODE (mem);
7287   enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7288   rtx label, x;
7289
7290   label = gen_label_rtx ();
7291   emit_label (label);
7292
7293   if (new_out)
7294     new_out = gen_lowpart (wmode, new_out);
7295   if (old_out)
7296     old_out = gen_lowpart (wmode, old_out);
7297   else
7298     old_out = new_out;
7299   value = simplify_gen_subreg (wmode, value, mode, 0);
7300
7301   aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7302
7303   switch (code)
7304     {
7305     case SET:
7306       new_out = value;
7307       break;
7308
7309     case NOT:
7310       x = gen_rtx_AND (wmode, old_out, value);
7311       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7312       x = gen_rtx_NOT (wmode, new_out);
7313       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7314       break;
7315
7316     case MINUS:
7317       if (CONST_INT_P (value))
7318         {
7319           value = GEN_INT (-INTVAL (value));
7320           code = PLUS;
7321         }
7322       /* Fall through.  */
7323
7324     default:
7325       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7326       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7327       break;
7328     }
7329
7330   aarch64_emit_store_exclusive (mode, cond, mem,
7331                                 gen_lowpart (mode, new_out), model_rtx);
7332
7333   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7334   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7335                             gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7336   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7337 }
7338
7339 static void
7340 aarch64_print_extension (void)
7341 {
7342   const struct aarch64_option_extension *opt = NULL;
7343
7344   for (opt = all_extensions; opt->name != NULL; opt++)
7345     if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7346       asm_fprintf (asm_out_file, "+%s", opt->name);
7347
7348   asm_fprintf (asm_out_file, "\n");
7349 }
7350
7351 static void
7352 aarch64_start_file (void)
7353 {
7354   if (selected_arch)
7355     {
7356       asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7357       aarch64_print_extension ();
7358     }
7359   else if (selected_cpu)
7360     {
7361       asm_fprintf (asm_out_file, "\t.cpu %s", selected_cpu->name);
7362       aarch64_print_extension ();
7363     }
7364   default_file_start();
7365 }
7366
7367 /* Target hook for c_mode_for_suffix.  */
7368 static enum machine_mode
7369 aarch64_c_mode_for_suffix (char suffix)
7370 {
7371   if (suffix == 'q')
7372     return TFmode;
7373
7374   return VOIDmode;
7375 }
7376
7377 /* We can only represent floating point constants which will fit in
7378    "quarter-precision" values.  These values are characterised by
7379    a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
7380    by:
7381
7382    (-1)^s * (n/16) * 2^r
7383
7384    Where:
7385      's' is the sign bit.
7386      'n' is an integer in the range 16 <= n <= 31.
7387      'r' is an integer in the range -3 <= r <= 4.  */
7388
7389 /* Return true iff X can be represented by a quarter-precision
7390    floating point immediate operand X.  Note, we cannot represent 0.0.  */
7391 bool
7392 aarch64_float_const_representable_p (rtx x)
7393 {
7394   /* This represents our current view of how many bits
7395      make up the mantissa.  */
7396   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7397   int exponent;
7398   unsigned HOST_WIDE_INT mantissa, mask;
7399   HOST_WIDE_INT m1, m2;
7400   REAL_VALUE_TYPE r, m;
7401
7402   if (!CONST_DOUBLE_P (x))
7403     return false;
7404
7405   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7406
7407   /* We cannot represent infinities, NaNs or +/-zero.  We won't
7408      know if we have +zero until we analyse the mantissa, but we
7409      can reject the other invalid values.  */
7410   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7411       || REAL_VALUE_MINUS_ZERO (r))
7412     return false;
7413
7414   /* Extract exponent.  */
7415   r = real_value_abs (&r);
7416   exponent = REAL_EXP (&r);
7417
7418   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7419      highest (sign) bit, with a fixed binary point at bit point_pos.
7420      m1 holds the low part of the mantissa, m2 the high part.
7421      WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7422      bits for the mantissa, this can fail (low bits will be lost).  */
7423   real_ldexp (&m, &r, point_pos - exponent);
7424   REAL_VALUE_TO_INT (&m1, &m2, m);
7425
7426   /* If the low part of the mantissa has bits set we cannot represent
7427      the value.  */
7428   if (m1 != 0)
7429     return false;
7430   /* We have rejected the lower HOST_WIDE_INT, so update our
7431      understanding of how many bits lie in the mantissa and
7432      look only at the high HOST_WIDE_INT.  */
7433   mantissa = m2;
7434   point_pos -= HOST_BITS_PER_WIDE_INT;
7435
7436   /* We can only represent values with a mantissa of the form 1.xxxx.  */
7437   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7438   if ((mantissa & mask) != 0)
7439     return false;
7440
7441   /* Having filtered unrepresentable values, we may now remove all
7442      but the highest 5 bits.  */
7443   mantissa >>= point_pos - 5;
7444
7445   /* We cannot represent the value 0.0, so reject it.  This is handled
7446      elsewhere.  */
7447   if (mantissa == 0)
7448     return false;
7449
7450   /* Then, as bit 4 is always set, we can mask it off, leaving
7451      the mantissa in the range [0, 15].  */
7452   mantissa &= ~(1 << 4);
7453   gcc_assert (mantissa <= 15);
7454
7455   /* GCC internally does not use IEEE754-like encoding (where normalized
7456      significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
7457      Our mantissa values are shifted 4 places to the left relative to
7458      normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7459      by 5 places to correct for GCC's representation.  */
7460   exponent = 5 - exponent;
7461
7462   return (exponent >= 0 && exponent <= 7);
7463 }
7464
7465 char*
7466 aarch64_output_simd_mov_immediate (rtx const_vector,
7467                                    enum machine_mode mode,
7468                                    unsigned width)
7469 {
7470   bool is_valid;
7471   static char templ[40];
7472   const char *mnemonic;
7473   const char *shift_op;
7474   unsigned int lane_count = 0;
7475   char element_char;
7476
7477   struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
7478
7479   /* This will return true to show const_vector is legal for use as either
7480      a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
7481      also update INFO to show how the immediate should be generated.  */
7482   is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
7483   gcc_assert (is_valid);
7484
7485   element_char = sizetochar (info.element_width);
7486   lane_count = width / info.element_width;
7487
7488   mode = GET_MODE_INNER (mode);
7489   if (mode == SFmode || mode == DFmode)
7490     {
7491       gcc_assert (info.shift == 0 && ! info.mvn);
7492       if (aarch64_float_const_zero_rtx_p (info.value))
7493         info.value = GEN_INT (0);
7494       else
7495         {
7496 #define buf_size 20
7497           REAL_VALUE_TYPE r;
7498           REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
7499           char float_buf[buf_size] = {'\0'};
7500           real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
7501 #undef buf_size
7502
7503           if (lane_count == 1)
7504             snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
7505           else
7506             snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
7507                       lane_count, element_char, float_buf);
7508           return templ;
7509         }
7510     }
7511
7512   mnemonic = info.mvn ? "mvni" : "movi";
7513   shift_op = info.msl ? "msl" : "lsl";
7514
7515   if (lane_count == 1)
7516     snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
7517               mnemonic, UINTVAL (info.value));
7518   else if (info.shift)
7519     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
7520               ", %s %d", mnemonic, lane_count, element_char,
7521               UINTVAL (info.value), shift_op, info.shift);
7522   else
7523     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
7524               mnemonic, lane_count, element_char, UINTVAL (info.value));
7525   return templ;
7526 }
7527
7528 char*
7529 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
7530                                           enum machine_mode mode)
7531 {
7532   enum machine_mode vmode;
7533
7534   gcc_assert (!VECTOR_MODE_P (mode));
7535   vmode = aarch64_simd_container_mode (mode, 64);
7536   rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
7537   return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
7538 }
7539
7540 /* Split operands into moves from op[1] + op[2] into op[0].  */
7541
7542 void
7543 aarch64_split_combinev16qi (rtx operands[3])
7544 {
7545   unsigned int dest = REGNO (operands[0]);
7546   unsigned int src1 = REGNO (operands[1]);
7547   unsigned int src2 = REGNO (operands[2]);
7548   enum machine_mode halfmode = GET_MODE (operands[1]);
7549   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7550   rtx destlo, desthi;
7551
7552   gcc_assert (halfmode == V16QImode);
7553
7554   if (src1 == dest && src2 == dest + halfregs)
7555     {
7556       /* No-op move.  Can't split to nothing; emit something.  */
7557       emit_note (NOTE_INSN_DELETED);
7558       return;
7559     }
7560
7561   /* Preserve register attributes for variable tracking.  */
7562   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7563   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7564                                GET_MODE_SIZE (halfmode));
7565
7566   /* Special case of reversed high/low parts.  */
7567   if (reg_overlap_mentioned_p (operands[2], destlo)
7568       && reg_overlap_mentioned_p (operands[1], desthi))
7569     {
7570       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7571       emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7572       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7573     }
7574   else if (!reg_overlap_mentioned_p (operands[2], destlo))
7575     {
7576       /* Try to avoid unnecessary moves if part of the result
7577          is in the right place already.  */
7578       if (src1 != dest)
7579         emit_move_insn (destlo, operands[1]);
7580       if (src2 != dest + halfregs)
7581         emit_move_insn (desthi, operands[2]);
7582     }
7583   else
7584     {
7585       if (src2 != dest + halfregs)
7586         emit_move_insn (desthi, operands[2]);
7587       if (src1 != dest)
7588         emit_move_insn (destlo, operands[1]);
7589     }
7590 }
7591
7592 /* vec_perm support.  */
7593
7594 #define MAX_VECT_LEN 16
7595
7596 struct expand_vec_perm_d
7597 {
7598   rtx target, op0, op1;
7599   unsigned char perm[MAX_VECT_LEN];
7600   enum machine_mode vmode;
7601   unsigned char nelt;
7602   bool one_vector_p;
7603   bool testing_p;
7604 };
7605
7606 /* Generate a variable permutation.  */
7607
7608 static void
7609 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7610 {
7611   enum machine_mode vmode = GET_MODE (target);
7612   bool one_vector_p = rtx_equal_p (op0, op1);
7613
7614   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7615   gcc_checking_assert (GET_MODE (op0) == vmode);
7616   gcc_checking_assert (GET_MODE (op1) == vmode);
7617   gcc_checking_assert (GET_MODE (sel) == vmode);
7618   gcc_checking_assert (TARGET_SIMD);
7619
7620   if (one_vector_p)
7621     {
7622       if (vmode == V8QImode)
7623         {
7624           /* Expand the argument to a V16QI mode by duplicating it.  */
7625           rtx pair = gen_reg_rtx (V16QImode);
7626           emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7627           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7628         }
7629       else
7630         {
7631           emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7632         }
7633     }
7634   else
7635     {
7636       rtx pair;
7637
7638       if (vmode == V8QImode)
7639         {
7640           pair = gen_reg_rtx (V16QImode);
7641           emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7642           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7643         }
7644       else
7645         {
7646           pair = gen_reg_rtx (OImode);
7647           emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7648           emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7649         }
7650     }
7651 }
7652
7653 void
7654 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7655 {
7656   enum machine_mode vmode = GET_MODE (target);
7657   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7658   bool one_vector_p = rtx_equal_p (op0, op1);
7659   rtx rmask[MAX_VECT_LEN], mask;
7660
7661   gcc_checking_assert (!BYTES_BIG_ENDIAN);
7662
7663   /* The TBL instruction does not use a modulo index, so we must take care
7664      of that ourselves.  */
7665   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7666   for (i = 0; i < nelt; ++i)
7667     rmask[i] = mask;
7668   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7669   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7670
7671   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7672 }
7673
7674 /* Recognize patterns suitable for the TRN instructions.  */
7675 static bool
7676 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7677 {
7678   unsigned int i, odd, mask, nelt = d->nelt;
7679   rtx out, in0, in1, x;
7680   rtx (*gen) (rtx, rtx, rtx);
7681   enum machine_mode vmode = d->vmode;
7682
7683   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7684     return false;
7685
7686   /* Note that these are little-endian tests.
7687      We correct for big-endian later.  */
7688   if (d->perm[0] == 0)
7689     odd = 0;
7690   else if (d->perm[0] == 1)
7691     odd = 1;
7692   else
7693     return false;
7694   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7695
7696   for (i = 0; i < nelt; i += 2)
7697     {
7698       if (d->perm[i] != i + odd)
7699         return false;
7700       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7701         return false;
7702     }
7703
7704   /* Success!  */
7705   if (d->testing_p)
7706     return true;
7707
7708   in0 = d->op0;
7709   in1 = d->op1;
7710   if (BYTES_BIG_ENDIAN)
7711     {
7712       x = in0, in0 = in1, in1 = x;
7713       odd = !odd;
7714     }
7715   out = d->target;
7716
7717   if (odd)
7718     {
7719       switch (vmode)
7720         {
7721         case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7722         case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7723         case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7724         case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7725         case V4SImode: gen = gen_aarch64_trn2v4si; break;
7726         case V2SImode: gen = gen_aarch64_trn2v2si; break;
7727         case V2DImode: gen = gen_aarch64_trn2v2di; break;
7728         case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7729         case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7730         case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7731         default:
7732           return false;
7733         }
7734     }
7735   else
7736     {
7737       switch (vmode)
7738         {
7739         case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7740         case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7741         case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7742         case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7743         case V4SImode: gen = gen_aarch64_trn1v4si; break;
7744         case V2SImode: gen = gen_aarch64_trn1v2si; break;
7745         case V2DImode: gen = gen_aarch64_trn1v2di; break;
7746         case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7747         case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7748         case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7749         default:
7750           return false;
7751         }
7752     }
7753
7754   emit_insn (gen (out, in0, in1));
7755   return true;
7756 }
7757
7758 /* Recognize patterns suitable for the UZP instructions.  */
7759 static bool
7760 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7761 {
7762   unsigned int i, odd, mask, nelt = d->nelt;
7763   rtx out, in0, in1, x;
7764   rtx (*gen) (rtx, rtx, rtx);
7765   enum machine_mode vmode = d->vmode;
7766
7767   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7768     return false;
7769
7770   /* Note that these are little-endian tests.
7771      We correct for big-endian later.  */
7772   if (d->perm[0] == 0)
7773     odd = 0;
7774   else if (d->perm[0] == 1)
7775     odd = 1;
7776   else
7777     return false;
7778   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7779
7780   for (i = 0; i < nelt; i++)
7781     {
7782       unsigned elt = (i * 2 + odd) & mask;
7783       if (d->perm[i] != elt)
7784         return false;
7785     }
7786
7787   /* Success!  */
7788   if (d->testing_p)
7789     return true;
7790
7791   in0 = d->op0;
7792   in1 = d->op1;
7793   if (BYTES_BIG_ENDIAN)
7794     {
7795       x = in0, in0 = in1, in1 = x;
7796       odd = !odd;
7797     }
7798   out = d->target;
7799
7800   if (odd)
7801     {
7802       switch (vmode)
7803         {
7804         case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7805         case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7806         case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7807         case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7808         case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7809         case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7810         case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7811         case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7812         case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7813         case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7814         default:
7815           return false;
7816         }
7817     }
7818   else
7819     {
7820       switch (vmode)
7821         {
7822         case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7823         case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7824         case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7825         case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7826         case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7827         case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7828         case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7829         case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7830         case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7831         case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7832         default:
7833           return false;
7834         }
7835     }
7836
7837   emit_insn (gen (out, in0, in1));
7838   return true;
7839 }
7840
7841 /* Recognize patterns suitable for the ZIP instructions.  */
7842 static bool
7843 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7844 {
7845   unsigned int i, high, mask, nelt = d->nelt;
7846   rtx out, in0, in1, x;
7847   rtx (*gen) (rtx, rtx, rtx);
7848   enum machine_mode vmode = d->vmode;
7849
7850   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7851     return false;
7852
7853   /* Note that these are little-endian tests.
7854      We correct for big-endian later.  */
7855   high = nelt / 2;
7856   if (d->perm[0] == high)
7857     /* Do Nothing.  */
7858     ;
7859   else if (d->perm[0] == 0)
7860     high = 0;
7861   else
7862     return false;
7863   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7864
7865   for (i = 0; i < nelt / 2; i++)
7866     {
7867       unsigned elt = (i + high) & mask;
7868       if (d->perm[i * 2] != elt)
7869         return false;
7870       elt = (elt + nelt) & mask;
7871       if (d->perm[i * 2 + 1] != elt)
7872         return false;
7873     }
7874
7875   /* Success!  */
7876   if (d->testing_p)
7877     return true;
7878
7879   in0 = d->op0;
7880   in1 = d->op1;
7881   if (BYTES_BIG_ENDIAN)
7882     {
7883       x = in0, in0 = in1, in1 = x;
7884       high = !high;
7885     }
7886   out = d->target;
7887
7888   if (high)
7889     {
7890       switch (vmode)
7891         {
7892         case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7893         case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7894         case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7895         case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7896         case V4SImode: gen = gen_aarch64_zip2v4si; break;
7897         case V2SImode: gen = gen_aarch64_zip2v2si; break;
7898         case V2DImode: gen = gen_aarch64_zip2v2di; break;
7899         case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7900         case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7901         case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7902         default:
7903           return false;
7904         }
7905     }
7906   else
7907     {
7908       switch (vmode)
7909         {
7910         case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7911         case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7912         case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7913         case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7914         case V4SImode: gen = gen_aarch64_zip1v4si; break;
7915         case V2SImode: gen = gen_aarch64_zip1v2si; break;
7916         case V2DImode: gen = gen_aarch64_zip1v2di; break;
7917         case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7918         case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7919         case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7920         default:
7921           return false;
7922         }
7923     }
7924
7925   emit_insn (gen (out, in0, in1));
7926   return true;
7927 }
7928
7929 static bool
7930 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
7931 {
7932   rtx rperm[MAX_VECT_LEN], sel;
7933   enum machine_mode vmode = d->vmode;
7934   unsigned int i, nelt = d->nelt;
7935
7936   /* TODO: ARM's TBL indexing is little-endian.  In order to handle GCC's
7937      numbering of elements for big-endian, we must reverse the order.  */
7938   if (BYTES_BIG_ENDIAN)
7939     return false;
7940
7941   if (d->testing_p)
7942     return true;
7943
7944   /* Generic code will try constant permutation twice.  Once with the
7945      original mode and again with the elements lowered to QImode.
7946      So wait and don't do the selector expansion ourselves.  */
7947   if (vmode != V8QImode && vmode != V16QImode)
7948     return false;
7949
7950   for (i = 0; i < nelt; ++i)
7951     rperm[i] = GEN_INT (d->perm[i]);
7952   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
7953   sel = force_reg (vmode, sel);
7954
7955   aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
7956   return true;
7957 }
7958
7959 static bool
7960 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
7961 {
7962   /* The pattern matching functions above are written to look for a small
7963      number to begin the sequence (0, 1, N/2).  If we begin with an index
7964      from the second operand, we can swap the operands.  */
7965   if (d->perm[0] >= d->nelt)
7966     {
7967       unsigned i, nelt = d->nelt;
7968       rtx x;
7969
7970       for (i = 0; i < nelt; ++i)
7971         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
7972
7973       x = d->op0;
7974       d->op0 = d->op1;
7975       d->op1 = x;
7976     }
7977
7978   if (TARGET_SIMD)
7979     {
7980       if (aarch64_evpc_zip (d))
7981         return true;
7982       else if (aarch64_evpc_uzp (d))
7983         return true;
7984       else if (aarch64_evpc_trn (d))
7985         return true;
7986       return aarch64_evpc_tbl (d);
7987     }
7988   return false;
7989 }
7990
7991 /* Expand a vec_perm_const pattern.  */
7992
7993 bool
7994 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
7995 {
7996   struct expand_vec_perm_d d;
7997   int i, nelt, which;
7998
7999   d.target = target;
8000   d.op0 = op0;
8001   d.op1 = op1;
8002
8003   d.vmode = GET_MODE (target);
8004   gcc_assert (VECTOR_MODE_P (d.vmode));
8005   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8006   d.testing_p = false;
8007
8008   for (i = which = 0; i < nelt; ++i)
8009     {
8010       rtx e = XVECEXP (sel, 0, i);
8011       int ei = INTVAL (e) & (2 * nelt - 1);
8012       which |= (ei < nelt ? 1 : 2);
8013       d.perm[i] = ei;
8014     }
8015
8016   switch (which)
8017     {
8018     default:
8019       gcc_unreachable ();
8020
8021     case 3:
8022       d.one_vector_p = false;
8023       if (!rtx_equal_p (op0, op1))
8024         break;
8025
8026       /* The elements of PERM do not suggest that only the first operand
8027          is used, but both operands are identical.  Allow easier matching
8028          of the permutation by folding the permutation into the single
8029          input vector.  */
8030       /* Fall Through.  */
8031     case 2:
8032       for (i = 0; i < nelt; ++i)
8033         d.perm[i] &= nelt - 1;
8034       d.op0 = op1;
8035       d.one_vector_p = true;
8036       break;
8037
8038     case 1:
8039       d.op1 = op0;
8040       d.one_vector_p = true;
8041       break;
8042     }
8043
8044   return aarch64_expand_vec_perm_const_1 (&d);
8045 }
8046
8047 static bool
8048 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8049                                      const unsigned char *sel)
8050 {
8051   struct expand_vec_perm_d d;
8052   unsigned int i, nelt, which;
8053   bool ret;
8054
8055   d.vmode = vmode;
8056   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8057   d.testing_p = true;
8058   memcpy (d.perm, sel, nelt);
8059
8060   /* Calculate whether all elements are in one vector.  */
8061   for (i = which = 0; i < nelt; ++i)
8062     {
8063       unsigned char e = d.perm[i];
8064       gcc_assert (e < 2 * nelt);
8065       which |= (e < nelt ? 1 : 2);
8066     }
8067
8068   /* If all elements are from the second vector, reindex as if from the
8069      first vector.  */
8070   if (which == 2)
8071     for (i = 0; i < nelt; ++i)
8072       d.perm[i] -= nelt;
8073
8074   /* Check whether the mask can be applied to a single vector.  */
8075   d.one_vector_p = (which != 3);
8076
8077   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8078   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8079   if (!d.one_vector_p)
8080     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8081
8082   start_sequence ();
8083   ret = aarch64_expand_vec_perm_const_1 (&d);
8084   end_sequence ();
8085
8086   return ret;
8087 }
8088
8089 #undef TARGET_ADDRESS_COST
8090 #define TARGET_ADDRESS_COST aarch64_address_cost
8091
8092 /* This hook will determines whether unnamed bitfields affect the alignment
8093    of the containing structure.  The hook returns true if the structure
8094    should inherit the alignment requirements of an unnamed bitfield's
8095    type.  */
8096 #undef TARGET_ALIGN_ANON_BITFIELD
8097 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8098
8099 #undef TARGET_ASM_ALIGNED_DI_OP
8100 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8101
8102 #undef TARGET_ASM_ALIGNED_HI_OP
8103 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8104
8105 #undef TARGET_ASM_ALIGNED_SI_OP
8106 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8107
8108 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8109 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8110   hook_bool_const_tree_hwi_hwi_const_tree_true
8111
8112 #undef TARGET_ASM_FILE_START
8113 #define TARGET_ASM_FILE_START aarch64_start_file
8114
8115 #undef TARGET_ASM_OUTPUT_MI_THUNK
8116 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8117
8118 #undef TARGET_ASM_SELECT_RTX_SECTION
8119 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8120
8121 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8122 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8123
8124 #undef TARGET_BUILD_BUILTIN_VA_LIST
8125 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8126
8127 #undef TARGET_CALLEE_COPIES
8128 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8129
8130 #undef TARGET_CAN_ELIMINATE
8131 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8132
8133 #undef TARGET_CANNOT_FORCE_CONST_MEM
8134 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8135
8136 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8137 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8138
8139 /* Only the least significant bit is used for initialization guard
8140    variables.  */
8141 #undef TARGET_CXX_GUARD_MASK_BIT
8142 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8143
8144 #undef TARGET_C_MODE_FOR_SUFFIX
8145 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8146
8147 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8148 #undef  TARGET_DEFAULT_TARGET_FLAGS
8149 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8150 #endif
8151
8152 #undef TARGET_CLASS_MAX_NREGS
8153 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8154
8155 #undef TARGET_BUILTIN_DECL
8156 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8157
8158 #undef  TARGET_EXPAND_BUILTIN
8159 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8160
8161 #undef TARGET_EXPAND_BUILTIN_VA_START
8162 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8163
8164 #undef TARGET_FOLD_BUILTIN
8165 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8166
8167 #undef TARGET_FUNCTION_ARG
8168 #define TARGET_FUNCTION_ARG aarch64_function_arg
8169
8170 #undef TARGET_FUNCTION_ARG_ADVANCE
8171 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8172
8173 #undef TARGET_FUNCTION_ARG_BOUNDARY
8174 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8175
8176 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8177 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8178
8179 #undef TARGET_FUNCTION_VALUE
8180 #define TARGET_FUNCTION_VALUE aarch64_function_value
8181
8182 #undef TARGET_FUNCTION_VALUE_REGNO_P
8183 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8184
8185 #undef TARGET_FRAME_POINTER_REQUIRED
8186 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8187
8188 #undef TARGET_GIMPLE_FOLD_BUILTIN
8189 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8190
8191 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8192 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8193
8194 #undef  TARGET_INIT_BUILTINS
8195 #define TARGET_INIT_BUILTINS  aarch64_init_builtins
8196
8197 #undef TARGET_LEGITIMATE_ADDRESS_P
8198 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8199
8200 #undef TARGET_LEGITIMATE_CONSTANT_P
8201 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8202
8203 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8204 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8205
8206 #undef TARGET_MANGLE_TYPE
8207 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8208
8209 #undef TARGET_MEMORY_MOVE_COST
8210 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8211
8212 #undef TARGET_MUST_PASS_IN_STACK
8213 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8214
8215 /* This target hook should return true if accesses to volatile bitfields
8216    should use the narrowest mode possible.  It should return false if these
8217    accesses should use the bitfield container type.  */
8218 #undef TARGET_NARROW_VOLATILE_BITFIELD
8219 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8220
8221 #undef  TARGET_OPTION_OVERRIDE
8222 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8223
8224 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8225 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8226   aarch64_override_options_after_change
8227
8228 #undef TARGET_PASS_BY_REFERENCE
8229 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8230
8231 #undef TARGET_PREFERRED_RELOAD_CLASS
8232 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8233
8234 #undef TARGET_SECONDARY_RELOAD
8235 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8236
8237 #undef TARGET_SHIFT_TRUNCATION_MASK
8238 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8239
8240 #undef TARGET_SETUP_INCOMING_VARARGS
8241 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8242
8243 #undef TARGET_STRUCT_VALUE_RTX
8244 #define TARGET_STRUCT_VALUE_RTX   aarch64_struct_value_rtx
8245
8246 #undef TARGET_REGISTER_MOVE_COST
8247 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8248
8249 #undef TARGET_RETURN_IN_MEMORY
8250 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8251
8252 #undef TARGET_RETURN_IN_MSB
8253 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8254
8255 #undef TARGET_RTX_COSTS
8256 #define TARGET_RTX_COSTS aarch64_rtx_costs
8257
8258 #undef TARGET_TRAMPOLINE_INIT
8259 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8260
8261 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8262 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8263
8264 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8265 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8266
8267 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8268 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8269
8270 #undef TARGET_VECTORIZE_ADD_STMT_COST
8271 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8272
8273 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8274 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8275   aarch64_builtin_vectorization_cost
8276
8277 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8278 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8279
8280 #undef TARGET_VECTORIZE_BUILTINS
8281 #define TARGET_VECTORIZE_BUILTINS
8282
8283 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8284 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8285   aarch64_builtin_vectorized_function
8286
8287 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8288 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8289   aarch64_autovectorize_vector_sizes
8290
8291 /* Section anchor support.  */
8292
8293 #undef TARGET_MIN_ANCHOR_OFFSET
8294 #define TARGET_MIN_ANCHOR_OFFSET -256
8295
8296 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8297    byte offset; we can do much more for larger data types, but have no way
8298    to determine the size of the access.  We assume accesses are aligned.  */
8299 #undef TARGET_MAX_ANCHOR_OFFSET
8300 #define TARGET_MAX_ANCHOR_OFFSET 4095
8301
8302 #undef TARGET_VECTOR_ALIGNMENT
8303 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8304
8305 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8306 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8307   aarch64_simd_vector_alignment_reachable
8308
8309 /* vec_perm support.  */
8310
8311 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8312 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8313   aarch64_vectorize_vec_perm_const_ok
8314
8315
8316 #undef TARGET_FIXED_CONDITION_CODE_REGS
8317 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8318
8319 struct gcc_target targetm = TARGET_INITIALIZER;
8320
8321 #include "gt-aarch64.h"