gdbserver/linux-aarch64-low.cc

   1 /* GNU/Linux/AArch64 specific low level interface, for the remote server for
   2    GDB.
   3
   4    Copyright (C) 2009-2022 Free Software Foundation, Inc.
   5    Contributed by ARM Ltd.
   6
   7    This file is part of GDB.
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 3 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  21
  22 #include "server.h"
  23 #include "linux-low.h"
  24 #include "nat/aarch64-linux.h"
  25 #include "nat/aarch64-linux-hw-point.h"
  26 #include "arch/aarch64-insn.h"
  27 #include "linux-aarch32-low.h"
  28 #include "elf/common.h"
  29 #include "ax.h"
  30 #include "tracepoint.h"
  31 #include "debug.h"
  32
  33 #include <signal.h>
  34 #include <sys/user.h>
  35 #include "nat/gdb_ptrace.h"
  36 #include <asm/ptrace.h>
  37 #include <inttypes.h>
  38 #include <endian.h>
  39 #include <sys/uio.h>
  40
  41 #include "gdb_proc_service.h"
  42 #include "arch/aarch64.h"
  43 #include "arch/aarch64-mte-linux.h"
  44 #include "linux-aarch32-tdesc.h"
  45 #include "linux-aarch64-tdesc.h"
  46 #include "nat/aarch64-mte-linux-ptrace.h"
  47 #include "nat/aarch64-sve-linux-ptrace.h"
  48 #include "tdesc.h"
  49
  50 #ifdef HAVE_SYS_REG_H
  51 #include <sys/reg.h>
  52 #endif
  53
  54 #ifdef HAVE_GETAUXVAL
  55 #include <sys/auxv.h>
  56 #endif
  57
  58 /* Linux target op definitions for the AArch64 architecture.  */
  59
  60 class aarch64_target : public linux_process_target
  61 {
  62 public:
  63
  64   const regs_info *get_regs_info () override;
  65
  66   int breakpoint_kind_from_pc (CORE_ADDR *pcptr) override;
  67
  68   int breakpoint_kind_from_current_state (CORE_ADDR *pcptr) override;
  69
  70   const gdb_byte *sw_breakpoint_from_kind (int kind, int *size) override;
  71
  72   bool supports_z_point_type (char z_type) override;
  73
  74   bool supports_tracepoints () override;
  75
  76   bool supports_fast_tracepoints () override;
  77
  78   int install_fast_tracepoint_jump_pad
  79     (CORE_ADDR tpoint, CORE_ADDR tpaddr, CORE_ADDR collector,
  80      CORE_ADDR lockaddr, ULONGEST orig_size, CORE_ADDR *jump_entry,
  81      CORE_ADDR *trampoline, ULONGEST *trampoline_size,
  82      unsigned char *jjump_pad_insn, ULONGEST *jjump_pad_insn_size,
  83      CORE_ADDR *adjusted_insn_addr, CORE_ADDR *adjusted_insn_addr_end,
  84      char *err) override;
  85
  86   int get_min_fast_tracepoint_insn_len () override;
  87
  88   struct emit_ops *emit_ops () override;
  89
  90   bool supports_memory_tagging () override;
  91
  92   bool fetch_memtags (CORE_ADDR address, size_t len,
  93                       gdb::byte_vector &tags, int type) override;
  94
  95   bool store_memtags (CORE_ADDR address, size_t len,
  96                       const gdb::byte_vector &tags, int type) override;
  97
  98 protected:
  99
 100   void low_arch_setup () override;
 101
 102   bool low_cannot_fetch_register (int regno) override;
 103
 104   bool low_cannot_store_register (int regno) override;
 105
 106   bool low_supports_breakpoints () override;
 107
 108   CORE_ADDR low_get_pc (regcache *regcache) override;
 109
 110   void low_set_pc (regcache *regcache, CORE_ADDR newpc) override;
 111
 112   bool low_breakpoint_at (CORE_ADDR pc) override;
 113
 114   int low_insert_point (raw_bkpt_type type, CORE_ADDR addr,
 115                         int size, raw_breakpoint *bp) override;
 116
 117   int low_remove_point (raw_bkpt_type type, CORE_ADDR addr,
 118                         int size, raw_breakpoint *bp) override;
 119
 120   bool low_stopped_by_watchpoint () override;
 121
 122   CORE_ADDR low_stopped_data_address () override;
 123
 124   bool low_siginfo_fixup (siginfo_t *native, gdb_byte *inf,
 125                           int direction) override;
 126
 127   arch_process_info *low_new_process () override;
 128
 129   void low_delete_process (arch_process_info *info) override;
 130
 131   void low_new_thread (lwp_info *) override;
 132
 133   void low_delete_thread (arch_lwp_info *) override;
 134
 135   void low_new_fork (process_info *parent, process_info *child) override;
 136
 137   void low_prepare_to_resume (lwp_info *lwp) override;
 138
 139   int low_get_thread_area (int lwpid, CORE_ADDR *addrp) override;
 140
 141   bool low_supports_range_stepping () override;
 142
 143   bool low_supports_catch_syscall () override;
 144
 145   void low_get_syscall_trapinfo (regcache *regcache, int *sysno) override;
 146 };
 147
 148 /* The singleton target ops object.  */
 149
 150 static aarch64_target the_aarch64_target;
 151
 152 bool
 153 aarch64_target::low_cannot_fetch_register (int regno)
 154 {
 155   gdb_assert_not_reached ("linux target op low_cannot_fetch_register "
 156                           "is not implemented by the target");
 157 }
 158
 159 bool
 160 aarch64_target::low_cannot_store_register (int regno)
 161 {
 162   gdb_assert_not_reached ("linux target op low_cannot_store_register "
 163                           "is not implemented by the target");
 164 }
 165
 166 void
 167 aarch64_target::low_prepare_to_resume (lwp_info *lwp)
 168 {
 169   aarch64_linux_prepare_to_resume (lwp);
 170 }
 171
 172 /* Per-process arch-specific data we want to keep.  */
 173
 174 struct arch_process_info
 175 {
 176   /* Hardware breakpoint/watchpoint data.
 177      The reason for them to be per-process rather than per-thread is
 178      due to the lack of information in the gdbserver environment;
 179      gdbserver is not told that whether a requested hardware
 180      breakpoint/watchpoint is thread specific or not, so it has to set
 181      each hw bp/wp for every thread in the current process.  The
 182      higher level bp/wp management in gdb will resume a thread if a hw
 183      bp/wp trap is not expected for it.  Since the hw bp/wp setting is
 184      same for each thread, it is reasonable for the data to live here.
 185      */
 186   struct aarch64_debug_reg_state debug_reg_state;
 187 };
 188
 189 /* Return true if the size of register 0 is 8 byte.  */
 190
 191 static int
 192 is_64bit_tdesc (void)
 193 {
 194   struct regcache *regcache = get_thread_regcache (current_thread, 0);
 195
 196   return register_size (regcache->tdesc, 0) == 8;
 197 }
 198
 199 static void
 200 aarch64_fill_gregset (struct regcache *regcache, void *buf)
 201 {
 202   struct user_pt_regs *regset = (struct user_pt_regs *) buf;
 203   int i;
 204
 205   for (i = 0; i < AARCH64_X_REGS_NUM; i++)
 206     collect_register (regcache, AARCH64_X0_REGNUM + i, &regset->regs[i]);
 207   collect_register (regcache, AARCH64_SP_REGNUM, &regset->sp);
 208   collect_register (regcache, AARCH64_PC_REGNUM, &regset->pc);
 209   collect_register (regcache, AARCH64_CPSR_REGNUM, &regset->pstate);
 210 }
 211
 212 static void
 213 aarch64_store_gregset (struct regcache *regcache, const void *buf)
 214 {
 215   const struct user_pt_regs *regset = (const struct user_pt_regs *) buf;
 216   int i;
 217
 218   for (i = 0; i < AARCH64_X_REGS_NUM; i++)
 219     supply_register (regcache, AARCH64_X0_REGNUM + i, &regset->regs[i]);
 220   supply_register (regcache, AARCH64_SP_REGNUM, &regset->sp);
 221   supply_register (regcache, AARCH64_PC_REGNUM, &regset->pc);
 222   supply_register (regcache, AARCH64_CPSR_REGNUM, &regset->pstate);
 223 }
 224
 225 static void
 226 aarch64_fill_fpregset (struct regcache *regcache, void *buf)
 227 {
 228   struct user_fpsimd_state *regset = (struct user_fpsimd_state *) buf;
 229   int i;
 230
 231   for (i = 0; i < AARCH64_V_REGS_NUM; i++)
 232     collect_register (regcache, AARCH64_V0_REGNUM + i, &regset->vregs[i]);
 233   collect_register (regcache, AARCH64_FPSR_REGNUM, &regset->fpsr);
 234   collect_register (regcache, AARCH64_FPCR_REGNUM, &regset->fpcr);
 235 }
 236
 237 static void
 238 aarch64_store_fpregset (struct regcache *regcache, const void *buf)
 239 {
 240   const struct user_fpsimd_state *regset
 241     = (const struct user_fpsimd_state *) buf;
 242   int i;
 243
 244   for (i = 0; i < AARCH64_V_REGS_NUM; i++)
 245     supply_register (regcache, AARCH64_V0_REGNUM + i, &regset->vregs[i]);
 246   supply_register (regcache, AARCH64_FPSR_REGNUM, &regset->fpsr);
 247   supply_register (regcache, AARCH64_FPCR_REGNUM, &regset->fpcr);
 248 }
 249
 250 /* Store the pauth registers to regcache.  */
 251
 252 static void
 253 aarch64_store_pauthregset (struct regcache *regcache, const void *buf)
 254 {
 255   uint64_t *pauth_regset = (uint64_t *) buf;
 256   int pauth_base = find_regno (regcache->tdesc, "pauth_dmask");
 257
 258   if (pauth_base == 0)
 259     return;
 260
 261   supply_register (regcache, AARCH64_PAUTH_DMASK_REGNUM (pauth_base),
 262                    &pauth_regset[0]);
 263   supply_register (regcache, AARCH64_PAUTH_CMASK_REGNUM (pauth_base),
 264                    &pauth_regset[1]);
 265 }
 266
 267 /* Fill BUF with the MTE registers from the regcache.  */
 268
 269 static void
 270 aarch64_fill_mteregset (struct regcache *regcache, void *buf)
 271 {
 272   uint64_t *mte_regset = (uint64_t *) buf;
 273   int mte_base = find_regno (regcache->tdesc, "tag_ctl");
 274
 275   collect_register (regcache, mte_base, mte_regset);
 276 }
 277
 278 /* Store the MTE registers to regcache.  */
 279
 280 static void
 281 aarch64_store_mteregset (struct regcache *regcache, const void *buf)
 282 {
 283   uint64_t *mte_regset = (uint64_t *) buf;
 284   int mte_base = find_regno (regcache->tdesc, "tag_ctl");
 285
 286   /* Tag Control register */
 287   supply_register (regcache, mte_base, mte_regset);
 288 }
 289
 290 bool
 291 aarch64_target::low_supports_breakpoints ()
 292 {
 293   return true;
 294 }
 295
 296 /* Implementation of linux target ops method "low_get_pc".  */
 297
 298 CORE_ADDR
 299 aarch64_target::low_get_pc (regcache *regcache)
 300 {
 301   if (register_size (regcache->tdesc, 0) == 8)
 302     return linux_get_pc_64bit (regcache);
 303   else
 304     return linux_get_pc_32bit (regcache);
 305 }
 306
 307 /* Implementation of linux target ops method "low_set_pc".  */
 308
 309 void
 310 aarch64_target::low_set_pc (regcache *regcache, CORE_ADDR pc)
 311 {
 312   if (register_size (regcache->tdesc, 0) == 8)
 313     linux_set_pc_64bit (regcache, pc);
 314   else
 315     linux_set_pc_32bit (regcache, pc);
 316 }
 317
 318 #define aarch64_breakpoint_len 4
 319
 320 /* AArch64 BRK software debug mode instruction.
 321    This instruction needs to match gdb/aarch64-tdep.c
 322    (aarch64_default_breakpoint).  */
 323 static const gdb_byte aarch64_breakpoint[] = {0x00, 0x00, 0x20, 0xd4};
 324
 325 /* Implementation of linux target ops method "low_breakpoint_at".  */
 326
 327 bool
 328 aarch64_target::low_breakpoint_at (CORE_ADDR where)
 329 {
 330   if (is_64bit_tdesc ())
 331     {
 332       gdb_byte insn[aarch64_breakpoint_len];
 333
 334       read_memory (where, (unsigned char *) &insn, aarch64_breakpoint_len);
 335       if (memcmp (insn, aarch64_breakpoint, aarch64_breakpoint_len) == 0)
 336         return true;
 337
 338       return false;
 339     }
 340   else
 341     return arm_breakpoint_at (where);
 342 }
 343
 344 static void
 345 aarch64_init_debug_reg_state (struct aarch64_debug_reg_state *state)
 346 {
 347   int i;
 348
 349   for (i = 0; i < AARCH64_HBP_MAX_NUM; ++i)
 350     {
 351       state->dr_addr_bp[i] = 0;
 352       state->dr_ctrl_bp[i] = 0;
 353       state->dr_ref_count_bp[i] = 0;
 354     }
 355
 356   for (i = 0; i < AARCH64_HWP_MAX_NUM; ++i)
 357     {
 358       state->dr_addr_wp[i] = 0;
 359       state->dr_ctrl_wp[i] = 0;
 360       state->dr_ref_count_wp[i] = 0;
 361     }
 362 }
 363
 364 /* Return the pointer to the debug register state structure in the
 365    current process' arch-specific data area.  */
 366
 367 struct aarch64_debug_reg_state *
 368 aarch64_get_debug_reg_state (pid_t pid)
 369 {
 370   struct process_info *proc = find_process_pid (pid);
 371
 372   return &proc->priv->arch_private->debug_reg_state;
 373 }
 374
 375 /* Implementation of target ops method "supports_z_point_type".  */
 376
 377 bool
 378 aarch64_target::supports_z_point_type (char z_type)
 379 {
 380   switch (z_type)
 381     {
 382     case Z_PACKET_SW_BP:
 383     case Z_PACKET_HW_BP:
 384     case Z_PACKET_WRITE_WP:
 385     case Z_PACKET_READ_WP:
 386     case Z_PACKET_ACCESS_WP:
 387       return true;
 388     default:
 389       return false;
 390     }
 391 }
 392
 393 /* Implementation of linux target ops method "low_insert_point".
 394
 395    It actually only records the info of the to-be-inserted bp/wp;
 396    the actual insertion will happen when threads are resumed.  */
 397
 398 int
 399 aarch64_target::low_insert_point (raw_bkpt_type type, CORE_ADDR addr,
 400                                   int len, raw_breakpoint *bp)
 401 {
 402   int ret;
 403   enum target_hw_bp_type targ_type;
 404   struct aarch64_debug_reg_state *state
 405     = aarch64_get_debug_reg_state (pid_of (current_thread));
 406
 407   if (show_debug_regs)
 408     fprintf (stderr, "insert_point on entry (addr=0x%08lx, len=%d)\n",
 409              (unsigned long) addr, len);
 410
 411   /* Determine the type from the raw breakpoint type.  */
 412   targ_type = raw_bkpt_type_to_target_hw_bp_type (type);
 413
 414   if (targ_type != hw_execute)
 415     {
 416       if (aarch64_linux_region_ok_for_watchpoint (addr, len))
 417         ret = aarch64_handle_watchpoint (targ_type, addr, len,
 418                                          1 /* is_insert */, state);
 419       else
 420         ret = -1;
 421     }
 422   else
 423     {
 424       if (len == 3)
 425         {
 426           /* LEN is 3 means the breakpoint is set on a 32-bit thumb
 427              instruction.   Set it to 2 to correctly encode length bit
 428              mask in hardware/watchpoint control register.  */
 429           len = 2;
 430         }
 431       ret = aarch64_handle_breakpoint (targ_type, addr, len,
 432                                        1 /* is_insert */, state);
 433     }
 434
 435   if (show_debug_regs)
 436     aarch64_show_debug_reg_state (state, "insert_point", addr, len,
 437                                   targ_type);
 438
 439   return ret;
 440 }
 441
 442 /* Implementation of linux target ops method "low_remove_point".
 443
 444    It actually only records the info of the to-be-removed bp/wp,
 445    the actual removal will be done when threads are resumed.  */
 446
 447 int
 448 aarch64_target::low_remove_point (raw_bkpt_type type, CORE_ADDR addr,
 449                                   int len, raw_breakpoint *bp)
 450 {
 451   int ret;
 452   enum target_hw_bp_type targ_type;
 453   struct aarch64_debug_reg_state *state
 454     = aarch64_get_debug_reg_state (pid_of (current_thread));
 455
 456   if (show_debug_regs)
 457     fprintf (stderr, "remove_point on entry (addr=0x%08lx, len=%d)\n",
 458              (unsigned long) addr, len);
 459
 460   /* Determine the type from the raw breakpoint type.  */
 461   targ_type = raw_bkpt_type_to_target_hw_bp_type (type);
 462
 463   /* Set up state pointers.  */
 464   if (targ_type != hw_execute)
 465     ret =
 466       aarch64_handle_watchpoint (targ_type, addr, len, 0 /* is_insert */,
 467                                  state);
 468   else
 469     {
 470       if (len == 3)
 471         {
 472           /* LEN is 3 means the breakpoint is set on a 32-bit thumb
 473              instruction.   Set it to 2 to correctly encode length bit
 474              mask in hardware/watchpoint control register.  */
 475           len = 2;
 476         }
 477       ret = aarch64_handle_breakpoint (targ_type, addr, len,
 478                                        0 /* is_insert */,  state);
 479     }
 480
 481   if (show_debug_regs)
 482     aarch64_show_debug_reg_state (state, "remove_point", addr, len,
 483                                   targ_type);
 484
 485   return ret;
 486 }
 487
 488 /* Return the address only having significant bits.  This is used to ignore
 489    the top byte (TBI).  */
 490
 491 static CORE_ADDR
 492 address_significant (CORE_ADDR addr)
 493 {
 494   /* Clear insignificant bits of a target address and sign extend resulting
 495      address.  */
 496   int addr_bit = 56;
 497
 498   CORE_ADDR sign = (CORE_ADDR) 1 << (addr_bit - 1);
 499   addr &= ((CORE_ADDR) 1 << addr_bit) - 1;
 500   addr = (addr ^ sign) - sign;
 501
 502   return addr;
 503 }
 504
 505 /* Implementation of linux target ops method "low_stopped_data_address".  */
 506
 507 CORE_ADDR
 508 aarch64_target::low_stopped_data_address ()
 509 {
 510   siginfo_t siginfo;
 511   int pid, i;
 512   struct aarch64_debug_reg_state *state;
 513
 514   pid = lwpid_of (current_thread);
 515
 516   /* Get the siginfo.  */
 517   if (ptrace (PTRACE_GETSIGINFO, pid, NULL, &siginfo) != 0)
 518     return (CORE_ADDR) 0;
 519
 520   /* Need to be a hardware breakpoint/watchpoint trap.  */
 521   if (siginfo.si_signo != SIGTRAP
 522       || (siginfo.si_code & 0xffff) != 0x0004 /* TRAP_HWBKPT */)
 523     return (CORE_ADDR) 0;
 524
 525   /* Make sure to ignore the top byte, otherwise we may not recognize a
 526      hardware watchpoint hit.  The stopped data addresses coming from the
 527      kernel can potentially be tagged addresses.  */
 528   const CORE_ADDR addr_trap
 529     = address_significant ((CORE_ADDR) siginfo.si_addr);
 530
 531   /* Check if the address matches any watched address.  */
 532   state = aarch64_get_debug_reg_state (pid_of (current_thread));
 533   for (i = aarch64_num_wp_regs - 1; i >= 0; --i)
 534     {
 535       const unsigned int offset
 536         = aarch64_watchpoint_offset (state->dr_ctrl_wp[i]);
 537       const unsigned int len = aarch64_watchpoint_length (state->dr_ctrl_wp[i]);
 538       const CORE_ADDR addr_watch = state->dr_addr_wp[i] + offset;
 539       const CORE_ADDR addr_watch_aligned = align_down (state->dr_addr_wp[i], 8);
 540       const CORE_ADDR addr_orig = state->dr_addr_orig_wp[i];
 541
 542       if (state->dr_ref_count_wp[i]
 543           && DR_CONTROL_ENABLED (state->dr_ctrl_wp[i])
 544           && addr_trap >= addr_watch_aligned
 545           && addr_trap < addr_watch + len)
 546         {
 547           /* ADDR_TRAP reports the first address of the memory range
 548              accessed by the CPU, regardless of what was the memory
 549              range watched.  Thus, a large CPU access that straddles
 550              the ADDR_WATCH..ADDR_WATCH+LEN range may result in an
 551              ADDR_TRAP that is lower than the
 552              ADDR_WATCH..ADDR_WATCH+LEN range.  E.g.:
 553
 554              addr: |   4   |   5   |   6   |   7   |   8   |
 555                                    |---- range watched ----|
 556                    |----------- range accessed ------------|
 557
 558              In this case, ADDR_TRAP will be 4.
 559
 560              To match a watchpoint known to GDB core, we must never
 561              report *ADDR_P outside of any ADDR_WATCH..ADDR_WATCH+LEN
 562              range.  ADDR_WATCH <= ADDR_TRAP < ADDR_ORIG is a false
 563              positive on kernels older than 4.10.  See PR
 564              external/20207.  */
 565           return addr_orig;
 566         }
 567     }
 568
 569   return (CORE_ADDR) 0;
 570 }
 571
 572 /* Implementation of linux target ops method "low_stopped_by_watchpoint".  */
 573
 574 bool
 575 aarch64_target::low_stopped_by_watchpoint ()
 576 {
 577   return (low_stopped_data_address () != 0);
 578 }
 579
 580 /* Fetch the thread-local storage pointer for libthread_db.  */
 581
 582 ps_err_e
 583 ps_get_thread_area (struct ps_prochandle *ph,
 584                     lwpid_t lwpid, int idx, void **base)
 585 {
 586   return aarch64_ps_get_thread_area (ph, lwpid, idx, base,
 587                                      is_64bit_tdesc ());
 588 }
 589
 590 /* Implementation of linux target ops method "low_siginfo_fixup".  */
 591
 592 bool
 593 aarch64_target::low_siginfo_fixup (siginfo_t *native, gdb_byte *inf,
 594                                    int direction)
 595 {
 596   /* Is the inferior 32-bit?  If so, then fixup the siginfo object.  */
 597   if (!is_64bit_tdesc ())
 598     {
 599       if (direction == 0)
 600         aarch64_compat_siginfo_from_siginfo ((struct compat_siginfo *) inf,
 601                                              native);
 602       else
 603         aarch64_siginfo_from_compat_siginfo (native,
 604                                              (struct compat_siginfo *) inf);
 605
 606       return true;
 607     }
 608
 609   return false;
 610 }
 611
 612 /* Implementation of linux target ops method "low_new_process".  */
 613
 614 arch_process_info *
 615 aarch64_target::low_new_process ()
 616 {
 617   struct arch_process_info *info = XCNEW (struct arch_process_info);
 618
 619   aarch64_init_debug_reg_state (&info->debug_reg_state);
 620
 621   return info;
 622 }
 623
 624 /* Implementation of linux target ops method "low_delete_process".  */
 625
 626 void
 627 aarch64_target::low_delete_process (arch_process_info *info)
 628 {
 629   xfree (info);
 630 }
 631
 632 void
 633 aarch64_target::low_new_thread (lwp_info *lwp)
 634 {
 635   aarch64_linux_new_thread (lwp);
 636 }
 637
 638 void
 639 aarch64_target::low_delete_thread (arch_lwp_info *arch_lwp)
 640 {
 641   aarch64_linux_delete_thread (arch_lwp);
 642 }
 643
 644 /* Implementation of linux target ops method "low_new_fork".  */
 645
 646 void
 647 aarch64_target::low_new_fork (process_info *parent,
 648                               process_info *child)
 649 {
 650   /* These are allocated by linux_add_process.  */
 651   gdb_assert (parent->priv != NULL
 652               && parent->priv->arch_private != NULL);
 653   gdb_assert (child->priv != NULL
 654               && child->priv->arch_private != NULL);
 655
 656   /* Linux kernel before 2.6.33 commit
 657      72f674d203cd230426437cdcf7dd6f681dad8b0d
 658      will inherit hardware debug registers from parent
 659      on fork/vfork/clone.  Newer Linux kernels create such tasks with
 660      zeroed debug registers.
 661
 662      GDB core assumes the child inherits the watchpoints/hw
 663      breakpoints of the parent, and will remove them all from the
 664      forked off process.  Copy the debug registers mirrors into the
 665      new process so that all breakpoints and watchpoints can be
 666      removed together.  The debug registers mirror will become zeroed
 667      in the end before detaching the forked off process, thus making
 668      this compatible with older Linux kernels too.  */
 669
 670   *child->priv->arch_private = *parent->priv->arch_private;
 671 }
 672
 673 /* Wrapper for aarch64_sve_regs_copy_to_reg_buf.  */
 674
 675 static void
 676 aarch64_sve_regs_copy_to_regcache (struct regcache *regcache, const void *buf)
 677 {
 678   return aarch64_sve_regs_copy_to_reg_buf (regcache, buf);
 679 }
 680
 681 /* Wrapper for aarch64_sve_regs_copy_from_reg_buf.  */
 682
 683 static void
 684 aarch64_sve_regs_copy_from_regcache (struct regcache *regcache, void *buf)
 685 {
 686   return aarch64_sve_regs_copy_from_reg_buf (regcache, buf);
 687 }
 688
 689 /* Array containing all the possible register sets for AArch64/Linux.  During
 690    architecture setup, these will be checked against the HWCAP/HWCAP2 bits for
 691    validity and enabled/disabled accordingly.
 692
 693    Their sizes are set to 0 here, but they will be adjusted later depending
 694    on whether each register set is available or not.  */
 695 static struct regset_info aarch64_regsets[] =
 696 {
 697   /* GPR registers.  */
 698   { PTRACE_GETREGSET, PTRACE_SETREGSET, NT_PRSTATUS,
 699     0, GENERAL_REGS,
 700     aarch64_fill_gregset, aarch64_store_gregset },
 701   /* Floating Point (FPU) registers.  */
 702   { PTRACE_GETREGSET, PTRACE_SETREGSET, NT_FPREGSET,
 703     0, FP_REGS,
 704     aarch64_fill_fpregset, aarch64_store_fpregset
 705   },
 706   /* Scalable Vector Extension (SVE) registers.  */
 707   { PTRACE_GETREGSET, PTRACE_SETREGSET, NT_ARM_SVE,
 708     0, EXTENDED_REGS,
 709     aarch64_sve_regs_copy_from_regcache, aarch64_sve_regs_copy_to_regcache
 710   },
 711   /* PAC registers.  */
 712   { PTRACE_GETREGSET, PTRACE_SETREGSET, NT_ARM_PAC_MASK,
 713     0, OPTIONAL_REGS,
 714     nullptr, aarch64_store_pauthregset },
 715   /* Tagged address control / MTE registers.  */
 716   { PTRACE_GETREGSET, PTRACE_SETREGSET, NT_ARM_TAGGED_ADDR_CTRL,
 717     0, OPTIONAL_REGS,
 718     aarch64_fill_mteregset, aarch64_store_mteregset },
 719   NULL_REGSET
 720 };
 721
 722 static struct regsets_info aarch64_regsets_info =
 723   {
 724     aarch64_regsets, /* regsets */
 725     0, /* num_regsets */
 726     nullptr, /* disabled_regsets */
 727   };
 728
 729 static struct regs_info regs_info_aarch64 =
 730   {
 731     nullptr, /* regset_bitmap */
 732     nullptr, /* usrregs */
 733     &aarch64_regsets_info,
 734   };
 735
 736 /* Given FEATURES, adjust the available register sets by setting their
 737    sizes.  A size of 0 means the register set is disabled and won't be
 738    used.  */
 739
 740 static void
 741 aarch64_adjust_register_sets (const struct aarch64_features &features)
 742 {
 743   struct regset_info *regset;
 744
 745   for (regset = aarch64_regsets; regset->size >= 0; regset++)
 746     {
 747       switch (regset->nt_type)
 748         {
 749         case NT_PRSTATUS:
 750           /* General purpose registers are always present.  */
 751           regset->size = sizeof (struct user_pt_regs);
 752           break;
 753         case NT_FPREGSET:
 754           /* This is unavailable when SVE is present.  */
 755           if (!features.sve)
 756             regset->size = sizeof (struct user_fpsimd_state);
 757           break;
 758         case NT_ARM_SVE:
 759           if (features.sve)
 760             regset->size = SVE_PT_SIZE (AARCH64_MAX_SVE_VQ, SVE_PT_REGS_SVE);
 761           break;
 762         case NT_ARM_PAC_MASK:
 763           if (features.pauth)
 764             regset->size = AARCH64_PAUTH_REGS_SIZE;
 765           break;
 766         case NT_ARM_TAGGED_ADDR_CTRL:
 767           if (features.mte)
 768             regset->size = AARCH64_LINUX_SIZEOF_MTE;
 769           break;
 770         default:
 771           gdb_assert_not_reached ("Unknown register set found.");
 772         }
 773     }
 774 }
 775
 776 /* Matches HWCAP_PACA in kernel header arch/arm64/include/uapi/asm/hwcap.h.  */
 777 #define AARCH64_HWCAP_PACA (1 << 30)
 778
 779 /* Implementation of linux target ops method "low_arch_setup".  */
 780
 781 void
 782 aarch64_target::low_arch_setup ()
 783 {
 784   unsigned int machine;
 785   int is_elf64;
 786   int tid;
 787
 788   tid = lwpid_of (current_thread);
 789
 790   is_elf64 = linux_pid_exe_is_elf_64_file (tid, &machine);
 791
 792   if (is_elf64)
 793     {
 794       struct aarch64_features features;
 795
 796       uint64_t vq = aarch64_sve_get_vq (tid);
 797       features.sve = (vq > 0);
 798       /* A-profile PAC is 64-bit only.  */
 799       features.pauth = linux_get_hwcap (8) & AARCH64_HWCAP_PACA;
 800       /* A-profile MTE is 64-bit only.  */
 801       features.mte = linux_get_hwcap2 (8) & HWCAP2_MTE;
 802
 803       current_process ()->tdesc
 804         = aarch64_linux_read_description (vq, features.pauth, features.mte);
 805
 806       /* Adjust the register sets we should use for this particular set of
 807          features.  */
 808       aarch64_adjust_register_sets (features);
 809     }
 810   else
 811     current_process ()->tdesc = aarch32_linux_read_description ();
 812
 813   aarch64_linux_get_debug_reg_capacity (lwpid_of (current_thread));
 814 }
 815
 816 /* Implementation of linux target ops method "get_regs_info".  */
 817
 818 const regs_info *
 819 aarch64_target::get_regs_info ()
 820 {
 821   if (!is_64bit_tdesc ())
 822     return &regs_info_aarch32;
 823
 824   /* AArch64 64-bit registers.  */
 825   return &regs_info_aarch64;
 826 }
 827
 828 /* Implementation of target ops method "supports_tracepoints".  */
 829
 830 bool
 831 aarch64_target::supports_tracepoints ()
 832 {
 833   if (current_thread == NULL)
 834     return true;
 835   else
 836     {
 837       /* We don't support tracepoints on aarch32 now.  */
 838       return is_64bit_tdesc ();
 839     }
 840 }
 841
 842 /* Implementation of linux target ops method "low_get_thread_area".  */
 843
 844 int
 845 aarch64_target::low_get_thread_area (int lwpid, CORE_ADDR *addrp)
 846 {
 847   struct iovec iovec;
 848   uint64_t reg;
 849
 850   iovec.iov_base = &reg;
 851   iovec.iov_len = sizeof (reg);
 852
 853   if (ptrace (PTRACE_GETREGSET, lwpid, NT_ARM_TLS, &iovec) != 0)
 854     return -1;
 855
 856   *addrp = reg;
 857
 858   return 0;
 859 }
 860
 861 bool
 862 aarch64_target::low_supports_catch_syscall ()
 863 {
 864   return true;
 865 }
 866
 867 /* Implementation of linux target ops method "low_get_syscall_trapinfo".  */
 868
 869 void
 870 aarch64_target::low_get_syscall_trapinfo (regcache *regcache, int *sysno)
 871 {
 872   int use_64bit = register_size (regcache->tdesc, 0) == 8;
 873
 874   if (use_64bit)
 875     {
 876       long l_sysno;
 877
 878       collect_register_by_name (regcache, "x8", &l_sysno);
 879       *sysno = (int) l_sysno;
 880     }
 881   else
 882     collect_register_by_name (regcache, "r7", sysno);
 883 }
 884
 885 /* List of condition codes that we need.  */
 886
 887 enum aarch64_condition_codes
 888 {
 889   EQ = 0x0,
 890   NE = 0x1,
 891   LO = 0x3,
 892   GE = 0xa,
 893   LT = 0xb,
 894   GT = 0xc,
 895   LE = 0xd,
 896 };
 897
 898 enum aarch64_operand_type
 899 {
 900   OPERAND_IMMEDIATE,
 901   OPERAND_REGISTER,
 902 };
 903
 904 /* Representation of an operand.  At this time, it only supports register
 905    and immediate types.  */
 906
 907 struct aarch64_operand
 908 {
 909   /* Type of the operand.  */
 910   enum aarch64_operand_type type;
 911
 912   /* Value of the operand according to the type.  */
 913   union
 914     {
 915       uint32_t imm;
 916       struct aarch64_register reg;
 917     };
 918 };
 919
 920 /* List of registers that we are currently using, we can add more here as
 921    we need to use them.  */
 922
 923 /* General purpose scratch registers (64 bit).  */
 924 static const struct aarch64_register x0 = { 0, 1 };
 925 static const struct aarch64_register x1 = { 1, 1 };
 926 static const struct aarch64_register x2 = { 2, 1 };
 927 static const struct aarch64_register x3 = { 3, 1 };
 928 static const struct aarch64_register x4 = { 4, 1 };
 929
 930 /* General purpose scratch registers (32 bit).  */
 931 static const struct aarch64_register w0 = { 0, 0 };
 932 static const struct aarch64_register w2 = { 2, 0 };
 933
 934 /* Intra-procedure scratch registers.  */
 935 static const struct aarch64_register ip0 = { 16, 1 };
 936
 937 /* Special purpose registers.  */
 938 static const struct aarch64_register fp = { 29, 1 };
 939 static const struct aarch64_register lr = { 30, 1 };
 940 static const struct aarch64_register sp = { 31, 1 };
 941 static const struct aarch64_register xzr = { 31, 1 };
 942
 943 /* Dynamically allocate a new register.  If we know the register
 944    statically, we should make it a global as above instead of using this
 945    helper function.  */
 946
 947 static struct aarch64_register
 948 aarch64_register (unsigned num, int is64)
 949 {
 950   return (struct aarch64_register) { num, is64 };
 951 }
 952
 953 /* Helper function to create a register operand, for instructions with
 954    different types of operands.
 955
 956    For example:
 957    p += emit_mov (p, x0, register_operand (x1));  */
 958
 959 static struct aarch64_operand
 960 register_operand (struct aarch64_register reg)
 961 {
 962   struct aarch64_operand operand;
 963
 964   operand.type = OPERAND_REGISTER;
 965   operand.reg = reg;
 966
 967   return operand;
 968 }
 969
 970 /* Helper function to create an immediate operand, for instructions with
 971    different types of operands.
 972
 973    For example:
 974    p += emit_mov (p, x0, immediate_operand (12));  */
 975
 976 static struct aarch64_operand
 977 immediate_operand (uint32_t imm)
 978 {
 979   struct aarch64_operand operand;
 980
 981   operand.type = OPERAND_IMMEDIATE;
 982   operand.imm = imm;
 983
 984   return operand;
 985 }
 986
 987 /* Helper function to create an offset memory operand.
 988
 989    For example:
 990    p += emit_ldr (p, x0, sp, offset_memory_operand (16));  */
 991
 992 static struct aarch64_memory_operand
 993 offset_memory_operand (int32_t offset)
 994 {
 995   return (struct aarch64_memory_operand) { MEMORY_OPERAND_OFFSET, offset };
 996 }
 997
 998 /* Helper function to create a pre-index memory operand.
 999
1000    For example:
1001    p += emit_ldr (p, x0, sp, preindex_memory_operand (16));  */
1002
1003 static struct aarch64_memory_operand
1004 preindex_memory_operand (int32_t index)
1005 {
1006   return (struct aarch64_memory_operand) { MEMORY_OPERAND_PREINDEX, index };
1007 }
1008
1009 /* Helper function to create a post-index memory operand.
1010
1011    For example:
1012    p += emit_ldr (p, x0, sp, postindex_memory_operand (16));  */
1013
1014 static struct aarch64_memory_operand
1015 postindex_memory_operand (int32_t index)
1016 {
1017   return (struct aarch64_memory_operand) { MEMORY_OPERAND_POSTINDEX, index };
1018 }
1019
1020 /* System control registers.  These special registers can be written and
1021    read with the MRS and MSR instructions.
1022
1023    - NZCV: Condition flags.  GDB refers to this register under the CPSR
1024            name.
1025    - FPSR: Floating-point status register.
1026    - FPCR: Floating-point control registers.
1027    - TPIDR_EL0: Software thread ID register.  */
1028
1029 enum aarch64_system_control_registers
1030 {
1031   /*          op0           op1           crn          crm          op2  */
1032   NZCV =      (0x1 << 14) | (0x3 << 11) | (0x4 << 7) | (0x2 << 3) | 0x0,
1033   FPSR =      (0x1 << 14) | (0x3 << 11) | (0x4 << 7) | (0x4 << 3) | 0x1,
1034   FPCR =      (0x1 << 14) | (0x3 << 11) | (0x4 << 7) | (0x4 << 3) | 0x0,
1035   TPIDR_EL0 = (0x1 << 14) | (0x3 << 11) | (0xd << 7) | (0x0 << 3) | 0x2
1036 };
1037
1038 /* Write a BLR instruction into *BUF.
1039
1040      BLR rn
1041
1042    RN is the register to branch to.  */
1043
1044 static int
1045 emit_blr (uint32_t *buf, struct aarch64_register rn)
1046 {
1047   return aarch64_emit_insn (buf, BLR | ENCODE (rn.num, 5, 5));
1048 }
1049
1050 /* Write a RET instruction into *BUF.
1051
1052      RET xn
1053
1054    RN is the register to branch to.  */
1055
1056 static int
1057 emit_ret (uint32_t *buf, struct aarch64_register rn)
1058 {
1059   return aarch64_emit_insn (buf, RET | ENCODE (rn.num, 5, 5));
1060 }
1061
1062 static int
1063 emit_load_store_pair (uint32_t *buf, enum aarch64_opcodes opcode,
1064                       struct aarch64_register rt,
1065                       struct aarch64_register rt2,
1066                       struct aarch64_register rn,
1067                       struct aarch64_memory_operand operand)
1068 {
1069   uint32_t opc;
1070   uint32_t pre_index;
1071   uint32_t write_back;
1072
1073   if (rt.is64)
1074     opc = ENCODE (2, 2, 30);
1075   else
1076     opc = ENCODE (0, 2, 30);
1077
1078   switch (operand.type)
1079     {
1080     case MEMORY_OPERAND_OFFSET:
1081       {
1082         pre_index = ENCODE (1, 1, 24);
1083         write_back = ENCODE (0, 1, 23);
1084         break;
1085       }
1086     case MEMORY_OPERAND_POSTINDEX:
1087       {
1088         pre_index = ENCODE (0, 1, 24);
1089         write_back = ENCODE (1, 1, 23);
1090         break;
1091       }
1092     case MEMORY_OPERAND_PREINDEX:
1093       {
1094         pre_index = ENCODE (1, 1, 24);
1095         write_back = ENCODE (1, 1, 23);
1096         break;
1097       }
1098     default:
1099       return 0;
1100     }
1101
1102   return aarch64_emit_insn (buf, opcode | opc | pre_index | write_back
1103                             | ENCODE (operand.index >> 3, 7, 15)
1104                             | ENCODE (rt2.num, 5, 10)
1105                             | ENCODE (rn.num, 5, 5) | ENCODE (rt.num, 5, 0));
1106 }
1107
1108 /* Write a STP instruction into *BUF.
1109
1110      STP rt, rt2, [rn, #offset]
1111      STP rt, rt2, [rn, #index]!
1112      STP rt, rt2, [rn], #index
1113
1114    RT and RT2 are the registers to store.
1115    RN is the base address register.
1116    OFFSET is the immediate to add to the base address.  It is limited to a
1117    -512 .. 504 range (7 bits << 3).  */
1118
1119 static int
1120 emit_stp (uint32_t *buf, struct aarch64_register rt,
1121           struct aarch64_register rt2, struct aarch64_register rn,
1122           struct aarch64_memory_operand operand)
1123 {
1124   return emit_load_store_pair (buf, STP, rt, rt2, rn, operand);
1125 }
1126
1127 /* Write a LDP instruction into *BUF.
1128
1129      LDP rt, rt2, [rn, #offset]
1130      LDP rt, rt2, [rn, #index]!
1131      LDP rt, rt2, [rn], #index
1132
1133    RT and RT2 are the registers to store.
1134    RN is the base address register.
1135    OFFSET is the immediate to add to the base address.  It is limited to a
1136    -512 .. 504 range (7 bits << 3).  */
1137
1138 static int
1139 emit_ldp (uint32_t *buf, struct aarch64_register rt,
1140           struct aarch64_register rt2, struct aarch64_register rn,
1141           struct aarch64_memory_operand operand)
1142 {
1143   return emit_load_store_pair (buf, LDP, rt, rt2, rn, operand);
1144 }
1145
1146 /* Write a LDP (SIMD&VFP) instruction using Q registers into *BUF.
1147
1148      LDP qt, qt2, [rn, #offset]
1149
1150    RT and RT2 are the Q registers to store.
1151    RN is the base address register.
1152    OFFSET is the immediate to add to the base address.  It is limited to
1153    -1024 .. 1008 range (7 bits << 4).  */
1154
1155 static int
1156 emit_ldp_q_offset (uint32_t *buf, unsigned rt, unsigned rt2,
1157                    struct aarch64_register rn, int32_t offset)
1158 {
1159   uint32_t opc = ENCODE (2, 2, 30);
1160   uint32_t pre_index = ENCODE (1, 1, 24);
1161
1162   return aarch64_emit_insn (buf, LDP_SIMD_VFP | opc | pre_index
1163                             | ENCODE (offset >> 4, 7, 15)
1164                             | ENCODE (rt2, 5, 10)
1165                             | ENCODE (rn.num, 5, 5) | ENCODE (rt, 5, 0));
1166 }
1167
1168 /* Write a STP (SIMD&VFP) instruction using Q registers into *BUF.
1169
1170      STP qt, qt2, [rn, #offset]
1171
1172    RT and RT2 are the Q registers to store.
1173    RN is the base address register.
1174    OFFSET is the immediate to add to the base address.  It is limited to
1175    -1024 .. 1008 range (7 bits << 4).  */
1176
1177 static int
1178 emit_stp_q_offset (uint32_t *buf, unsigned rt, unsigned rt2,
1179                    struct aarch64_register rn, int32_t offset)
1180 {
1181   uint32_t opc = ENCODE (2, 2, 30);
1182   uint32_t pre_index = ENCODE (1, 1, 24);
1183
1184   return aarch64_emit_insn (buf, STP_SIMD_VFP | opc | pre_index
1185                             | ENCODE (offset >> 4, 7, 15)
1186                             | ENCODE (rt2, 5, 10)
1187                             | ENCODE (rn.num, 5, 5) | ENCODE (rt, 5, 0));
1188 }
1189
1190 /* Write a LDRH instruction into *BUF.
1191
1192      LDRH wt, [xn, #offset]
1193      LDRH wt, [xn, #index]!
1194      LDRH wt, [xn], #index
1195
1196    RT is the register to store.
1197    RN is the base address register.
1198    OFFSET is the immediate to add to the base address.  It is limited to
1199    0 .. 32760 range (12 bits << 3).  */
1200
1201 static int
1202 emit_ldrh (uint32_t *buf, struct aarch64_register rt,
1203            struct aarch64_register rn,
1204            struct aarch64_memory_operand operand)
1205 {
1206   return aarch64_emit_load_store (buf, 1, LDR, rt, rn, operand);
1207 }
1208
1209 /* Write a LDRB instruction into *BUF.
1210
1211      LDRB wt, [xn, #offset]
1212      LDRB wt, [xn, #index]!
1213      LDRB wt, [xn], #index
1214
1215    RT is the register to store.
1216    RN is the base address register.
1217    OFFSET is the immediate to add to the base address.  It is limited to
1218    0 .. 32760 range (12 bits << 3).  */
1219
1220 static int
1221 emit_ldrb (uint32_t *buf, struct aarch64_register rt,
1222            struct aarch64_register rn,
1223            struct aarch64_memory_operand operand)
1224 {
1225   return aarch64_emit_load_store (buf, 0, LDR, rt, rn, operand);
1226 }
1227
1228
1229
1230 /* Write a STR instruction into *BUF.
1231
1232      STR rt, [rn, #offset]
1233      STR rt, [rn, #index]!
1234      STR rt, [rn], #index
1235
1236    RT is the register to store.
1237    RN is the base address register.
1238    OFFSET is the immediate to add to the base address.  It is limited to
1239    0 .. 32760 range (12 bits << 3).  */
1240
1241 static int
1242 emit_str (uint32_t *buf, struct aarch64_register rt,
1243           struct aarch64_register rn,
1244           struct aarch64_memory_operand operand)
1245 {
1246   return aarch64_emit_load_store (buf, rt.is64 ? 3 : 2, STR, rt, rn, operand);
1247 }
1248
1249 /* Helper function emitting an exclusive load or store instruction.  */
1250
1251 static int
1252 emit_load_store_exclusive (uint32_t *buf, uint32_t size,
1253                            enum aarch64_opcodes opcode,
1254                            struct aarch64_register rs,
1255                            struct aarch64_register rt,
1256                            struct aarch64_register rt2,
1257                            struct aarch64_register rn)
1258 {
1259   return aarch64_emit_insn (buf, opcode | ENCODE (size, 2, 30)
1260                             | ENCODE (rs.num, 5, 16) | ENCODE (rt2.num, 5, 10)
1261                             | ENCODE (rn.num, 5, 5) | ENCODE (rt.num, 5, 0));
1262 }
1263
1264 /* Write a LAXR instruction into *BUF.
1265
1266      LDAXR rt, [xn]
1267
1268    RT is the destination register.
1269    RN is the base address register.  */
1270
1271 static int
1272 emit_ldaxr (uint32_t *buf, struct aarch64_register rt,
1273             struct aarch64_register rn)
1274 {
1275   return emit_load_store_exclusive (buf, rt.is64 ? 3 : 2, LDAXR, xzr, rt,
1276                                     xzr, rn);
1277 }
1278
1279 /* Write a STXR instruction into *BUF.
1280
1281      STXR ws, rt, [xn]
1282
1283    RS is the result register, it indicates if the store succeeded or not.
1284    RT is the destination register.
1285    RN is the base address register.  */
1286
1287 static int
1288 emit_stxr (uint32_t *buf, struct aarch64_register rs,
1289            struct aarch64_register rt, struct aarch64_register rn)
1290 {
1291   return emit_load_store_exclusive (buf, rt.is64 ? 3 : 2, STXR, rs, rt,
1292                                     xzr, rn);
1293 }
1294
1295 /* Write a STLR instruction into *BUF.
1296
1297      STLR rt, [xn]
1298
1299    RT is the register to store.
1300    RN is the base address register.  */
1301
1302 static int
1303 emit_stlr (uint32_t *buf, struct aarch64_register rt,
1304            struct aarch64_register rn)
1305 {
1306   return emit_load_store_exclusive (buf, rt.is64 ? 3 : 2, STLR, xzr, rt,
1307                                     xzr, rn);
1308 }
1309
1310 /* Helper function for data processing instructions with register sources.  */
1311
1312 static int
1313 emit_data_processing_reg (uint32_t *buf, uint32_t opcode,
1314                           struct aarch64_register rd,
1315                           struct aarch64_register rn,
1316                           struct aarch64_register rm)
1317 {
1318   uint32_t size = ENCODE (rd.is64, 1, 31);
1319
1320   return aarch64_emit_insn (buf, opcode | size | ENCODE (rm.num, 5, 16)
1321                             | ENCODE (rn.num, 5, 5) | ENCODE (rd.num, 5, 0));
1322 }
1323
1324 /* Helper function for data processing instructions taking either a register
1325    or an immediate.  */
1326
1327 static int
1328 emit_data_processing (uint32_t *buf, enum aarch64_opcodes opcode,
1329                       struct aarch64_register rd,
1330                       struct aarch64_register rn,
1331                       struct aarch64_operand operand)
1332 {
1333   uint32_t size = ENCODE (rd.is64, 1, 31);
1334   /* The opcode is different for register and immediate source operands.  */
1335   uint32_t operand_opcode;
1336
1337   if (operand.type == OPERAND_IMMEDIATE)
1338     {
1339       /* xxx1 000x xxxx xxxx xxxx xxxx xxxx xxxx */
1340       operand_opcode = ENCODE (8, 4, 25);
1341
1342       return aarch64_emit_insn (buf, opcode | operand_opcode | size
1343                                 | ENCODE (operand.imm, 12, 10)
1344                                 | ENCODE (rn.num, 5, 5)
1345                                 | ENCODE (rd.num, 5, 0));
1346     }
1347   else
1348     {
1349       /* xxx0 101x xxxx xxxx xxxx xxxx xxxx xxxx */
1350       operand_opcode = ENCODE (5, 4, 25);
1351
1352       return emit_data_processing_reg (buf, opcode | operand_opcode, rd,
1353                                        rn, operand.reg);
1354     }
1355 }
1356
1357 /* Write an ADD instruction into *BUF.
1358
1359      ADD rd, rn, #imm
1360      ADD rd, rn, rm
1361
1362    This function handles both an immediate and register add.
1363
1364    RD is the destination register.
1365    RN is the input register.
1366    OPERAND is the source operand, either of type OPERAND_IMMEDIATE or
1367    OPERAND_REGISTER.  */
1368
1369 static int
1370 emit_add (uint32_t *buf, struct aarch64_register rd,
1371           struct aarch64_register rn, struct aarch64_operand operand)
1372 {
1373   return emit_data_processing (buf, ADD, rd, rn, operand);
1374 }
1375
1376 /* Write a SUB instruction into *BUF.
1377
1378      SUB rd, rn, #imm
1379      SUB rd, rn, rm
1380
1381    This function handles both an immediate and register sub.
1382
1383    RD is the destination register.
1384    RN is the input register.
1385    IMM is the immediate to substract to RN.  */
1386
1387 static int
1388 emit_sub (uint32_t *buf, struct aarch64_register rd,
1389           struct aarch64_register rn, struct aarch64_operand operand)
1390 {
1391   return emit_data_processing (buf, SUB, rd, rn, operand);
1392 }
1393
1394 /* Write a MOV instruction into *BUF.
1395
1396      MOV rd, #imm
1397      MOV rd, rm
1398
1399    This function handles both a wide immediate move and a register move,
1400    with the condition that the source register is not xzr.  xzr and the
1401    stack pointer share the same encoding and this function only supports
1402    the stack pointer.
1403
1404    RD is the destination register.
1405    OPERAND is the source operand, either of type OPERAND_IMMEDIATE or
1406    OPERAND_REGISTER.  */
1407
1408 static int
1409 emit_mov (uint32_t *buf, struct aarch64_register rd,
1410           struct aarch64_operand operand)
1411 {
1412   if (operand.type == OPERAND_IMMEDIATE)
1413     {
1414       uint32_t size = ENCODE (rd.is64, 1, 31);
1415       /* Do not shift the immediate.  */
1416       uint32_t shift = ENCODE (0, 2, 21);
1417
1418       return aarch64_emit_insn (buf, MOV | size | shift
1419                                 | ENCODE (operand.imm, 16, 5)
1420                                 | ENCODE (rd.num, 5, 0));
1421     }
1422   else
1423     return emit_add (buf, rd, operand.reg, immediate_operand (0));
1424 }
1425
1426 /* Write a MOVK instruction into *BUF.
1427
1428      MOVK rd, #imm, lsl #shift
1429
1430    RD is the destination register.
1431    IMM is the immediate.
1432    SHIFT is the logical shift left to apply to IMM.   */
1433
1434 static int
1435 emit_movk (uint32_t *buf, struct aarch64_register rd, uint32_t imm,
1436            unsigned shift)
1437 {
1438   uint32_t size = ENCODE (rd.is64, 1, 31);
1439
1440   return aarch64_emit_insn (buf, MOVK | size | ENCODE (shift, 2, 21) |
1441                             ENCODE (imm, 16, 5) | ENCODE (rd.num, 5, 0));
1442 }
1443
1444 /* Write instructions into *BUF in order to move ADDR into a register.
1445    ADDR can be a 64-bit value.
1446
1447    This function will emit a series of MOV and MOVK instructions, such as:
1448
1449      MOV  xd, #(addr)
1450      MOVK xd, #(addr >> 16), lsl #16
1451      MOVK xd, #(addr >> 32), lsl #32
1452      MOVK xd, #(addr >> 48), lsl #48  */
1453
1454 static int
1455 emit_mov_addr (uint32_t *buf, struct aarch64_register rd, CORE_ADDR addr)
1456 {
1457   uint32_t *p = buf;
1458
1459   /* The MOV (wide immediate) instruction clears to top bits of the
1460      register.  */
1461   p += emit_mov (p, rd, immediate_operand (addr & 0xffff));
1462
1463   if ((addr >> 16) != 0)
1464     p += emit_movk (p, rd, (addr >> 16) & 0xffff, 1);
1465   else
1466     return p - buf;
1467
1468   if ((addr >> 32) != 0)
1469     p += emit_movk (p, rd, (addr >> 32) & 0xffff, 2);
1470   else
1471     return p - buf;
1472
1473   if ((addr >> 48) != 0)
1474     p += emit_movk (p, rd, (addr >> 48) & 0xffff, 3);
1475
1476   return p - buf;
1477 }
1478
1479 /* Write a SUBS instruction into *BUF.
1480
1481      SUBS rd, rn, rm
1482
1483    This instruction update the condition flags.
1484
1485    RD is the destination register.
1486    RN and RM are the source registers.  */
1487
1488 static int
1489 emit_subs (uint32_t *buf, struct aarch64_register rd,
1490            struct aarch64_register rn, struct aarch64_operand operand)
1491 {
1492   return emit_data_processing (buf, SUBS, rd, rn, operand);
1493 }
1494
1495 /* Write a CMP instruction into *BUF.
1496
1497      CMP rn, rm
1498
1499    This instruction is an alias of SUBS xzr, rn, rm.
1500
1501    RN and RM are the registers to compare.  */
1502
1503 static int
1504 emit_cmp (uint32_t *buf, struct aarch64_register rn,
1505               struct aarch64_operand operand)
1506 {
1507   return emit_subs (buf, xzr, rn, operand);
1508 }
1509
1510 /* Write a AND instruction into *BUF.
1511
1512      AND rd, rn, rm
1513
1514    RD is the destination register.
1515    RN and RM are the source registers.  */
1516
1517 static int
1518 emit_and (uint32_t *buf, struct aarch64_register rd,
1519           struct aarch64_register rn, struct aarch64_register rm)
1520 {
1521   return emit_data_processing_reg (buf, AND, rd, rn, rm);
1522 }
1523
1524 /* Write a ORR instruction into *BUF.
1525
1526      ORR rd, rn, rm
1527
1528    RD is the destination register.
1529    RN and RM are the source registers.  */
1530
1531 static int
1532 emit_orr (uint32_t *buf, struct aarch64_register rd,
1533           struct aarch64_register rn, struct aarch64_register rm)
1534 {
1535   return emit_data_processing_reg (buf, ORR, rd, rn, rm);
1536 }
1537
1538 /* Write a ORN instruction into *BUF.
1539
1540      ORN rd, rn, rm
1541
1542    RD is the destination register.
1543    RN and RM are the source registers.  */
1544
1545 static int
1546 emit_orn (uint32_t *buf, struct aarch64_register rd,
1547           struct aarch64_register rn, struct aarch64_register rm)
1548 {
1549   return emit_data_processing_reg (buf, ORN, rd, rn, rm);
1550 }
1551
1552 /* Write a EOR instruction into *BUF.
1553
1554      EOR rd, rn, rm
1555
1556    RD is the destination register.
1557    RN and RM are the source registers.  */
1558
1559 static int
1560 emit_eor (uint32_t *buf, struct aarch64_register rd,
1561           struct aarch64_register rn, struct aarch64_register rm)
1562 {
1563   return emit_data_processing_reg (buf, EOR, rd, rn, rm);
1564 }
1565
1566 /* Write a MVN instruction into *BUF.
1567
1568      MVN rd, rm
1569
1570    This is an alias for ORN rd, xzr, rm.
1571
1572    RD is the destination register.
1573    RM is the source register.  */
1574
1575 static int
1576 emit_mvn (uint32_t *buf, struct aarch64_register rd,
1577           struct aarch64_register rm)
1578 {
1579   return emit_orn (buf, rd, xzr, rm);
1580 }
1581
1582 /* Write a LSLV instruction into *BUF.
1583
1584      LSLV rd, rn, rm
1585
1586    RD is the destination register.
1587    RN and RM are the source registers.  */
1588
1589 static int
1590 emit_lslv (uint32_t *buf, struct aarch64_register rd,
1591            struct aarch64_register rn, struct aarch64_register rm)
1592 {
1593   return emit_data_processing_reg (buf, LSLV, rd, rn, rm);
1594 }
1595
1596 /* Write a LSRV instruction into *BUF.
1597
1598      LSRV rd, rn, rm
1599
1600    RD is the destination register.
1601    RN and RM are the source registers.  */
1602
1603 static int
1604 emit_lsrv (uint32_t *buf, struct aarch64_register rd,
1605            struct aarch64_register rn, struct aarch64_register rm)
1606 {
1607   return emit_data_processing_reg (buf, LSRV, rd, rn, rm);
1608 }
1609
1610 /* Write a ASRV instruction into *BUF.
1611
1612      ASRV rd, rn, rm
1613
1614    RD is the destination register.
1615    RN and RM are the source registers.  */
1616
1617 static int
1618 emit_asrv (uint32_t *buf, struct aarch64_register rd,
1619            struct aarch64_register rn, struct aarch64_register rm)
1620 {
1621   return emit_data_processing_reg (buf, ASRV, rd, rn, rm);
1622 }
1623
1624 /* Write a MUL instruction into *BUF.
1625
1626      MUL rd, rn, rm
1627
1628    RD is the destination register.
1629    RN and RM are the source registers.  */
1630
1631 static int
1632 emit_mul (uint32_t *buf, struct aarch64_register rd,
1633           struct aarch64_register rn, struct aarch64_register rm)
1634 {
1635   return emit_data_processing_reg (buf, MUL, rd, rn, rm);
1636 }
1637
1638 /* Write a MRS instruction into *BUF.  The register size is 64-bit.
1639
1640      MRS xt, system_reg
1641
1642    RT is the destination register.
1643    SYSTEM_REG is special purpose register to read.  */
1644
1645 static int
1646 emit_mrs (uint32_t *buf, struct aarch64_register rt,
1647           enum aarch64_system_control_registers system_reg)
1648 {
1649   return aarch64_emit_insn (buf, MRS | ENCODE (system_reg, 15, 5)
1650                             | ENCODE (rt.num, 5, 0));
1651 }
1652
1653 /* Write a MSR instruction into *BUF.  The register size is 64-bit.
1654
1655      MSR system_reg, xt
1656
1657    SYSTEM_REG is special purpose register to write.
1658    RT is the input register.  */
1659
1660 static int
1661 emit_msr (uint32_t *buf, enum aarch64_system_control_registers system_reg,
1662           struct aarch64_register rt)
1663 {
1664   return aarch64_emit_insn (buf, MSR | ENCODE (system_reg, 15, 5)
1665                             | ENCODE (rt.num, 5, 0));
1666 }
1667
1668 /* Write a SEVL instruction into *BUF.
1669
1670    This is a hint instruction telling the hardware to trigger an event.  */
1671
1672 static int
1673 emit_sevl (uint32_t *buf)
1674 {
1675   return aarch64_emit_insn (buf, SEVL);
1676 }
1677
1678 /* Write a WFE instruction into *BUF.
1679
1680    This is a hint instruction telling the hardware to wait for an event.  */
1681
1682 static int
1683 emit_wfe (uint32_t *buf)
1684 {
1685   return aarch64_emit_insn (buf, WFE);
1686 }
1687
1688 /* Write a SBFM instruction into *BUF.
1689
1690      SBFM rd, rn, #immr, #imms
1691
1692    This instruction moves the bits from #immr to #imms into the
1693    destination, sign extending the result.
1694
1695    RD is the destination register.
1696    RN is the source register.
1697    IMMR is the bit number to start at (least significant bit).
1698    IMMS is the bit number to stop at (most significant bit).  */
1699
1700 static int
1701 emit_sbfm (uint32_t *buf, struct aarch64_register rd,
1702            struct aarch64_register rn, uint32_t immr, uint32_t imms)
1703 {
1704   uint32_t size = ENCODE (rd.is64, 1, 31);
1705   uint32_t n = ENCODE (rd.is64, 1, 22);
1706
1707   return aarch64_emit_insn (buf, SBFM | size | n | ENCODE (immr, 6, 16)
1708                             | ENCODE (imms, 6, 10) | ENCODE (rn.num, 5, 5)
1709                             | ENCODE (rd.num, 5, 0));
1710 }
1711
1712 /* Write a SBFX instruction into *BUF.
1713
1714      SBFX rd, rn, #lsb, #width
1715
1716    This instruction moves #width bits from #lsb into the destination, sign
1717    extending the result.  This is an alias for:
1718
1719      SBFM rd, rn, #lsb, #(lsb + width - 1)
1720
1721    RD is the destination register.
1722    RN is the source register.
1723    LSB is the bit number to start at (least significant bit).
1724    WIDTH is the number of bits to move.  */
1725
1726 static int
1727 emit_sbfx (uint32_t *buf, struct aarch64_register rd,
1728            struct aarch64_register rn, uint32_t lsb, uint32_t width)
1729 {
1730   return emit_sbfm (buf, rd, rn, lsb, lsb + width - 1);
1731 }
1732
1733 /* Write a UBFM instruction into *BUF.
1734
1735      UBFM rd, rn, #immr, #imms
1736
1737    This instruction moves the bits from #immr to #imms into the
1738    destination, extending the result with zeros.
1739
1740    RD is the destination register.
1741    RN is the source register.
1742    IMMR is the bit number to start at (least significant bit).
1743    IMMS is the bit number to stop at (most significant bit).  */
1744
1745 static int
1746 emit_ubfm (uint32_t *buf, struct aarch64_register rd,
1747            struct aarch64_register rn, uint32_t immr, uint32_t imms)
1748 {
1749   uint32_t size = ENCODE (rd.is64, 1, 31);
1750   uint32_t n = ENCODE (rd.is64, 1, 22);
1751
1752   return aarch64_emit_insn (buf, UBFM | size | n | ENCODE (immr, 6, 16)
1753                             | ENCODE (imms, 6, 10) | ENCODE (rn.num, 5, 5)
1754                             | ENCODE (rd.num, 5, 0));
1755 }
1756
1757 /* Write a UBFX instruction into *BUF.
1758
1759      UBFX rd, rn, #lsb, #width
1760
1761    This instruction moves #width bits from #lsb into the destination,
1762    extending the result with zeros.  This is an alias for:
1763
1764      UBFM rd, rn, #lsb, #(lsb + width - 1)
1765
1766    RD is the destination register.
1767    RN is the source register.
1768    LSB is the bit number to start at (least significant bit).
1769    WIDTH is the number of bits to move.  */
1770
1771 static int
1772 emit_ubfx (uint32_t *buf, struct aarch64_register rd,
1773            struct aarch64_register rn, uint32_t lsb, uint32_t width)
1774 {
1775   return emit_ubfm (buf, rd, rn, lsb, lsb + width - 1);
1776 }
1777
1778 /* Write a CSINC instruction into *BUF.
1779
1780      CSINC rd, rn, rm, cond
1781
1782    This instruction conditionally increments rn or rm and places the result
1783    in rd.  rn is chosen is the condition is true.
1784
1785    RD is the destination register.
1786    RN and RM are the source registers.
1787    COND is the encoded condition.  */
1788
1789 static int
1790 emit_csinc (uint32_t *buf, struct aarch64_register rd,
1791             struct aarch64_register rn, struct aarch64_register rm,
1792             unsigned cond)
1793 {
1794   uint32_t size = ENCODE (rd.is64, 1, 31);
1795
1796   return aarch64_emit_insn (buf, CSINC | size | ENCODE (rm.num, 5, 16)
1797                             | ENCODE (cond, 4, 12) | ENCODE (rn.num, 5, 5)
1798                             | ENCODE (rd.num, 5, 0));
1799 }
1800
1801 /* Write a CSET instruction into *BUF.
1802
1803      CSET rd, cond
1804
1805    This instruction conditionally write 1 or 0 in the destination register.
1806    1 is written if the condition is true.  This is an alias for:
1807
1808      CSINC rd, xzr, xzr, !cond
1809
1810    Note that the condition needs to be inverted.
1811
1812    RD is the destination register.
1813    RN and RM are the source registers.
1814    COND is the encoded condition.  */
1815
1816 static int
1817 emit_cset (uint32_t *buf, struct aarch64_register rd, unsigned cond)
1818 {
1819   /* The least significant bit of the condition needs toggling in order to
1820      invert it.  */
1821   return emit_csinc (buf, rd, xzr, xzr, cond ^ 0x1);
1822 }
1823
1824 /* Write LEN instructions from BUF into the inferior memory at *TO.
1825
1826    Note instructions are always little endian on AArch64, unlike data.  */
1827
1828 static void
1829 append_insns (CORE_ADDR *to, size_t len, const uint32_t *buf)
1830 {
1831   size_t byte_len = len * sizeof (uint32_t);
1832 #if (__BYTE_ORDER == __BIG_ENDIAN)
1833   uint32_t *le_buf = (uint32_t *) xmalloc (byte_len);
1834   size_t i;
1835
1836   for (i = 0; i < len; i++)
1837     le_buf[i] = htole32 (buf[i]);
1838
1839   target_write_memory (*to, (const unsigned char *) le_buf, byte_len);
1840
1841   xfree (le_buf);
1842 #else
1843   target_write_memory (*to, (const unsigned char *) buf, byte_len);
1844 #endif
1845
1846   *to += byte_len;
1847 }
1848
1849 /* Sub-class of struct aarch64_insn_data, store information of
1850    instruction relocation for fast tracepoint.  Visitor can
1851    relocate an instruction from BASE.INSN_ADDR to NEW_ADDR and save
1852    the relocated instructions in buffer pointed by INSN_PTR.  */
1853
1854 struct aarch64_insn_relocation_data
1855 {
1856   struct aarch64_insn_data base;
1857
1858   /* The new address the instruction is relocated to.  */
1859   CORE_ADDR new_addr;
1860   /* Pointer to the buffer of relocated instruction(s).  */
1861   uint32_t *insn_ptr;
1862 };
1863
1864 /* Implementation of aarch64_insn_visitor method "b".  */
1865
1866 static void
1867 aarch64_ftrace_insn_reloc_b (const int is_bl, const int32_t offset,
1868                              struct aarch64_insn_data *data)
1869 {
1870   struct aarch64_insn_relocation_data *insn_reloc
1871     = (struct aarch64_insn_relocation_data *) data;
1872   int64_t new_offset
1873     = insn_reloc->base.insn_addr - insn_reloc->new_addr + offset;
1874
1875   if (can_encode_int32 (new_offset, 28))
1876     insn_reloc->insn_ptr += emit_b (insn_reloc->insn_ptr, is_bl, new_offset);
1877 }
1878
1879 /* Implementation of aarch64_insn_visitor method "b_cond".  */
1880
1881 static void
1882 aarch64_ftrace_insn_reloc_b_cond (const unsigned cond, const int32_t offset,
1883                                   struct aarch64_insn_data *data)
1884 {
1885   struct aarch64_insn_relocation_data *insn_reloc
1886     = (struct aarch64_insn_relocation_data *) data;
1887   int64_t new_offset
1888     = insn_reloc->base.insn_addr - insn_reloc->new_addr + offset;
1889
1890   if (can_encode_int32 (new_offset, 21))
1891     {
1892       insn_reloc->insn_ptr += emit_bcond (insn_reloc->insn_ptr, cond,
1893                                           new_offset);
1894     }
1895   else if (can_encode_int32 (new_offset, 28))
1896     {
1897       /* The offset is out of range for a conditional branch
1898          instruction but not for a unconditional branch.  We can use
1899          the following instructions instead:
1900
1901          B.COND TAKEN    ; If cond is true, then jump to TAKEN.
1902          B NOT_TAKEN     ; Else jump over TAKEN and continue.
1903          TAKEN:
1904          B #(offset - 8)
1905          NOT_TAKEN:
1906
1907       */
1908
1909       insn_reloc->insn_ptr += emit_bcond (insn_reloc->insn_ptr, cond, 8);
1910       insn_reloc->insn_ptr += emit_b (insn_reloc->insn_ptr, 0, 8);
1911       insn_reloc->insn_ptr += emit_b (insn_reloc->insn_ptr, 0, new_offset - 8);
1912     }
1913 }
1914
1915 /* Implementation of aarch64_insn_visitor method "cb".  */
1916
1917 static void
1918 aarch64_ftrace_insn_reloc_cb (const int32_t offset, const int is_cbnz,
1919                               const unsigned rn, int is64,
1920                               struct aarch64_insn_data *data)
1921 {
1922   struct aarch64_insn_relocation_data *insn_reloc
1923     = (struct aarch64_insn_relocation_data *) data;
1924   int64_t new_offset
1925     = insn_reloc->base.insn_addr - insn_reloc->new_addr + offset;
1926
1927   if (can_encode_int32 (new_offset, 21))
1928     {
1929       insn_reloc->insn_ptr += emit_cb (insn_reloc->insn_ptr, is_cbnz,
1930                                        aarch64_register (rn, is64), new_offset);
1931     }
1932   else if (can_encode_int32 (new_offset, 28))
1933     {
1934       /* The offset is out of range for a compare and branch
1935          instruction but not for a unconditional branch.  We can use
1936          the following instructions instead:
1937
1938          CBZ xn, TAKEN   ; xn == 0, then jump to TAKEN.
1939          B NOT_TAKEN     ; Else jump over TAKEN and continue.
1940          TAKEN:
1941          B #(offset - 8)
1942          NOT_TAKEN:
1943
1944       */
1945       insn_reloc->insn_ptr += emit_cb (insn_reloc->insn_ptr, is_cbnz,
1946                                        aarch64_register (rn, is64), 8);
1947       insn_reloc->insn_ptr += emit_b (insn_reloc->insn_ptr, 0, 8);
1948       insn_reloc->insn_ptr += emit_b (insn_reloc->insn_ptr, 0, new_offset - 8);
1949     }
1950 }
1951
1952 /* Implementation of aarch64_insn_visitor method "tb".  */
1953
1954 static void
1955 aarch64_ftrace_insn_reloc_tb (const int32_t offset, int is_tbnz,
1956                               const unsigned rt, unsigned bit,
1957                               struct aarch64_insn_data *data)
1958 {
1959   struct aarch64_insn_relocation_data *insn_reloc
1960     = (struct aarch64_insn_relocation_data *) data;
1961   int64_t new_offset
1962     = insn_reloc->base.insn_addr - insn_reloc->new_addr + offset;
1963
1964   if (can_encode_int32 (new_offset, 16))
1965     {
1966       insn_reloc->insn_ptr += emit_tb (insn_reloc->insn_ptr, is_tbnz, bit,
1967                                        aarch64_register (rt, 1), new_offset);
1968     }
1969   else if (can_encode_int32 (new_offset, 28))
1970     {
1971       /* The offset is out of range for a test bit and branch
1972          instruction but not for a unconditional branch.  We can use
1973          the following instructions instead:
1974
1975          TBZ xn, #bit, TAKEN ; xn[bit] == 0, then jump to TAKEN.
1976          B NOT_TAKEN         ; Else jump over TAKEN and continue.
1977          TAKEN:
1978          B #(offset - 8)
1979          NOT_TAKEN:
1980
1981       */
1982       insn_reloc->insn_ptr += emit_tb (insn_reloc->insn_ptr, is_tbnz, bit,
1983                                        aarch64_register (rt, 1), 8);
1984       insn_reloc->insn_ptr += emit_b (insn_reloc->insn_ptr, 0, 8);
1985       insn_reloc->insn_ptr += emit_b (insn_reloc->insn_ptr, 0,
1986                                       new_offset - 8);
1987     }
1988 }
1989
1990 /* Implementation of aarch64_insn_visitor method "adr".  */
1991
1992 static void
1993 aarch64_ftrace_insn_reloc_adr (const int32_t offset, const unsigned rd,
1994                                const int is_adrp,
1995                                struct aarch64_insn_data *data)
1996 {
1997   struct aarch64_insn_relocation_data *insn_reloc
1998     = (struct aarch64_insn_relocation_data *) data;
1999   /* We know exactly the address the ADR{P,} instruction will compute.
2000      We can just write it to the destination register.  */
2001   CORE_ADDR address = data->insn_addr + offset;
2002
2003   if (is_adrp)
2004     {
2005       /* Clear the lower 12 bits of the offset to get the 4K page.  */
2006       insn_reloc->insn_ptr += emit_mov_addr (insn_reloc->insn_ptr,
2007                                              aarch64_register (rd, 1),
2008                                              address & ~0xfff);
2009     }
2010   else
2011     insn_reloc->insn_ptr += emit_mov_addr (insn_reloc->insn_ptr,
2012                                            aarch64_register (rd, 1), address);
2013 }
2014
2015 /* Implementation of aarch64_insn_visitor method "ldr_literal".  */
2016
2017 static void
2018 aarch64_ftrace_insn_reloc_ldr_literal (const int32_t offset, const int is_sw,
2019                                        const unsigned rt, const int is64,
2020                                        struct aarch64_insn_data *data)
2021 {
2022   struct aarch64_insn_relocation_data *insn_reloc
2023     = (struct aarch64_insn_relocation_data *) data;
2024   CORE_ADDR address = data->insn_addr + offset;
2025
2026   insn_reloc->insn_ptr += emit_mov_addr (insn_reloc->insn_ptr,
2027                                          aarch64_register (rt, 1), address);
2028
2029   /* We know exactly what address to load from, and what register we
2030      can use:
2031
2032      MOV xd, #(oldloc + offset)
2033      MOVK xd, #((oldloc + offset) >> 16), lsl #16
2034      ...
2035
2036      LDR xd, [xd] ; or LDRSW xd, [xd]
2037
2038   */
2039
2040   if (is_sw)
2041     insn_reloc->insn_ptr += emit_ldrsw (insn_reloc->insn_ptr,
2042                                         aarch64_register (rt, 1),
2043                                         aarch64_register (rt, 1),
2044                                         offset_memory_operand (0));
2045   else
2046     insn_reloc->insn_ptr += emit_ldr (insn_reloc->insn_ptr,
2047                                       aarch64_register (rt, is64),
2048                                       aarch64_register (rt, 1),
2049                                       offset_memory_operand (0));
2050 }
2051
2052 /* Implementation of aarch64_insn_visitor method "others".  */
2053
2054 static void
2055 aarch64_ftrace_insn_reloc_others (const uint32_t insn,
2056                                   struct aarch64_insn_data *data)
2057 {
2058   struct aarch64_insn_relocation_data *insn_reloc
2059     = (struct aarch64_insn_relocation_data *) data;
2060
2061   /* The instruction is not PC relative.  Just re-emit it at the new
2062      location.  */
2063   insn_reloc->insn_ptr += aarch64_emit_insn (insn_reloc->insn_ptr, insn);
2064 }
2065
2066 static const struct aarch64_insn_visitor visitor =
2067 {
2068   aarch64_ftrace_insn_reloc_b,
2069   aarch64_ftrace_insn_reloc_b_cond,
2070   aarch64_ftrace_insn_reloc_cb,
2071   aarch64_ftrace_insn_reloc_tb,
2072   aarch64_ftrace_insn_reloc_adr,
2073   aarch64_ftrace_insn_reloc_ldr_literal,
2074   aarch64_ftrace_insn_reloc_others,
2075 };
2076
2077 bool
2078 aarch64_target::supports_fast_tracepoints ()
2079 {
2080   return true;
2081 }
2082
2083 /* Implementation of target ops method
2084    "install_fast_tracepoint_jump_pad".  */
2085
2086 int
2087 aarch64_target::install_fast_tracepoint_jump_pad
2088   (CORE_ADDR tpoint, CORE_ADDR tpaddr, CORE_ADDR collector,
2089    CORE_ADDR lockaddr, ULONGEST orig_size, CORE_ADDR *jump_entry,
2090    CORE_ADDR *trampoline, ULONGEST *trampoline_size,
2091    unsigned char *jjump_pad_insn, ULONGEST *jjump_pad_insn_size,
2092    CORE_ADDR *adjusted_insn_addr, CORE_ADDR *adjusted_insn_addr_end,
2093    char *err)
2094 {
2095   uint32_t buf[256];
2096   uint32_t *p = buf;
2097   int64_t offset;
2098   int i;
2099   uint32_t insn;
2100   CORE_ADDR buildaddr = *jump_entry;
2101   struct aarch64_insn_relocation_data insn_data;
2102
2103   /* We need to save the current state on the stack both to restore it
2104      later and to collect register values when the tracepoint is hit.
2105
2106      The saved registers are pushed in a layout that needs to be in sync
2107      with aarch64_ft_collect_regmap (see linux-aarch64-ipa.c).  Later on
2108      the supply_fast_tracepoint_registers function will fill in the
2109      register cache from a pointer to saved registers on the stack we build
2110      here.
2111
2112      For simplicity, we set the size of each cell on the stack to 16 bytes.
2113      This way one cell can hold any register type, from system registers
2114      to the 128 bit SIMD&FP registers.  Furthermore, the stack pointer
2115      has to be 16 bytes aligned anyway.
2116
2117      Note that the CPSR register does not exist on AArch64.  Instead we
2118      can access system bits describing the process state with the
2119      MRS/MSR instructions, namely the condition flags.  We save them as
2120      if they are part of a CPSR register because that's how GDB
2121      interprets these system bits.  At the moment, only the condition
2122      flags are saved in CPSR (NZCV).
2123
2124      Stack layout, each cell is 16 bytes (descending):
2125
2126      High *-------- SIMD&FP registers from 31 down to 0. --------*
2127           | q31                                                  |
2128           .                                                      .
2129           .                                                      . 32 cells
2130           .                                                      .
2131           | q0                                                   |
2132           *---- General purpose registers from 30 down to 0. ----*
2133           | x30                                                  |
2134           .                                                      .
2135           .                                                      . 31 cells
2136           .                                                      .
2137           | x0                                                   |
2138           *------------- Special purpose registers. -------------*
2139           | SP                                                   |
2140           | PC                                                   |
2141           | CPSR (NZCV)                                          | 5 cells
2142           | FPSR                                                 |
2143           | FPCR                                                 | <- SP + 16
2144           *------------- collecting_t object --------------------*
2145           | TPIDR_EL0               | struct tracepoint *        |
2146      Low  *------------------------------------------------------*
2147
2148      After this stack is set up, we issue a call to the collector, passing
2149      it the saved registers at (SP + 16).  */
2150
2151   /* Push SIMD&FP registers on the stack:
2152
2153        SUB sp, sp, #(32 * 16)
2154
2155        STP q30, q31, [sp, #(30 * 16)]
2156        ...
2157        STP q0, q1, [sp]
2158
2159      */
2160   p += emit_sub (p, sp, sp, immediate_operand (32 * 16));
2161   for (i = 30; i >= 0; i -= 2)
2162     p += emit_stp_q_offset (p, i, i + 1, sp, i * 16);
2163
2164   /* Push general purpose registers on the stack.  Note that we do not need
2165      to push x31 as it represents the xzr register and not the stack
2166      pointer in a STR instruction.
2167
2168        SUB sp, sp, #(31 * 16)
2169
2170        STR x30, [sp, #(30 * 16)]
2171        ...
2172        STR x0, [sp]
2173
2174      */
2175   p += emit_sub (p, sp, sp, immediate_operand (31 * 16));
2176   for (i = 30; i >= 0; i -= 1)
2177     p += emit_str (p, aarch64_register (i, 1), sp,
2178                    offset_memory_operand (i * 16));
2179
2180   /* Make space for 5 more cells.
2181
2182        SUB sp, sp, #(5 * 16)
2183
2184      */
2185   p += emit_sub (p, sp, sp, immediate_operand (5 * 16));
2186
2187
2188   /* Save SP:
2189
2190        ADD x4, sp, #((32 + 31 + 5) * 16)
2191        STR x4, [sp, #(4 * 16)]
2192
2193      */
2194   p += emit_add (p, x4, sp, immediate_operand ((32 + 31 + 5) * 16));
2195   p += emit_str (p, x4, sp, offset_memory_operand (4 * 16));
2196
2197   /* Save PC (tracepoint address):
2198
2199        MOV  x3, #(tpaddr)
2200        ...
2201
2202        STR x3, [sp, #(3 * 16)]
2203
2204      */
2205
2206   p += emit_mov_addr (p, x3, tpaddr);
2207   p += emit_str (p, x3, sp, offset_memory_operand (3 * 16));
2208
2209   /* Save CPSR (NZCV), FPSR and FPCR:
2210
2211        MRS x2, nzcv
2212        MRS x1, fpsr
2213        MRS x0, fpcr
2214
2215        STR x2, [sp, #(2 * 16)]
2216        STR x1, [sp, #(1 * 16)]
2217        STR x0, [sp, #(0 * 16)]
2218
2219      */
2220   p += emit_mrs (p, x2, NZCV);
2221   p += emit_mrs (p, x1, FPSR);
2222   p += emit_mrs (p, x0, FPCR);
2223   p += emit_str (p, x2, sp, offset_memory_operand (2 * 16));
2224   p += emit_str (p, x1, sp, offset_memory_operand (1 * 16));
2225   p += emit_str (p, x0, sp, offset_memory_operand (0 * 16));
2226
2227   /* Push the collecting_t object.  It consist of the address of the
2228      tracepoint and an ID for the current thread.  We get the latter by
2229      reading the tpidr_el0 system register.  It corresponds to the
2230      NT_ARM_TLS register accessible with ptrace.
2231
2232        MOV x0, #(tpoint)
2233        ...
2234
2235        MRS x1, tpidr_el0
2236
2237        STP x0, x1, [sp, #-16]!
2238
2239      */
2240
2241   p += emit_mov_addr (p, x0, tpoint);
2242   p += emit_mrs (p, x1, TPIDR_EL0);
2243   p += emit_stp (p, x0, x1, sp, preindex_memory_operand (-16));
2244
2245   /* Spin-lock:
2246
2247      The shared memory for the lock is at lockaddr.  It will hold zero
2248      if no-one is holding the lock, otherwise it contains the address of
2249      the collecting_t object on the stack of the thread which acquired it.
2250
2251      At this stage, the stack pointer points to this thread's collecting_t
2252      object.
2253
2254      We use the following registers:
2255      - x0: Address of the lock.
2256      - x1: Pointer to collecting_t object.
2257      - x2: Scratch register.
2258
2259        MOV x0, #(lockaddr)
2260        ...
2261        MOV x1, sp
2262
2263        ; Trigger an event local to this core.  So the following WFE
2264        ; instruction is ignored.
2265        SEVL
2266      again:
2267        ; Wait for an event.  The event is triggered by either the SEVL
2268        ; or STLR instructions (store release).
2269        WFE
2270
2271        ; Atomically read at lockaddr.  This marks the memory location as
2272        ; exclusive.  This instruction also has memory constraints which
2273        ; make sure all previous data reads and writes are done before
2274        ; executing it.
2275        LDAXR x2, [x0]
2276
2277        ; Try again if another thread holds the lock.
2278        CBNZ x2, again
2279
2280        ; We can lock it!  Write the address of the collecting_t object.
2281        ; This instruction will fail if the memory location is not marked
2282        ; as exclusive anymore.  If it succeeds, it will remove the
2283        ; exclusive mark on the memory location.  This way, if another
2284        ; thread executes this instruction before us, we will fail and try
2285        ; all over again.
2286        STXR w2, x1, [x0]
2287        CBNZ w2, again
2288
2289      */
2290
2291   p += emit_mov_addr (p, x0, lockaddr);
2292   p += emit_mov (p, x1, register_operand (sp));
2293
2294   p += emit_sevl (p);
2295   p += emit_wfe (p);
2296   p += emit_ldaxr (p, x2, x0);
2297   p += emit_cb (p, 1, w2, -2 * 4);
2298   p += emit_stxr (p, w2, x1, x0);
2299   p += emit_cb (p, 1, x2, -4 * 4);
2300
2301   /* Call collector (struct tracepoint *, unsigned char *):
2302
2303        MOV x0, #(tpoint)
2304        ...
2305
2306        ; Saved registers start after the collecting_t object.
2307        ADD x1, sp, #16
2308
2309        ; We use an intra-procedure-call scratch register.
2310        MOV ip0, #(collector)
2311        ...
2312
2313        ; And call back to C!
2314        BLR ip0
2315
2316      */
2317
2318   p += emit_mov_addr (p, x0, tpoint);
2319   p += emit_add (p, x1, sp, immediate_operand (16));
2320
2321   p += emit_mov_addr (p, ip0, collector);
2322   p += emit_blr (p, ip0);
2323
2324   /* Release the lock.
2325
2326        MOV x0, #(lockaddr)
2327        ...
2328
2329        ; This instruction is a normal store with memory ordering
2330        ; constraints.  Thanks to this we do not have to put a data
2331        ; barrier instruction to make sure all data read and writes are done
2332        ; before this instruction is executed.  Furthermore, this instruction
2333        ; will trigger an event, letting other threads know they can grab
2334        ; the lock.
2335        STLR xzr, [x0]
2336
2337      */
2338   p += emit_mov_addr (p, x0, lockaddr);
2339   p += emit_stlr (p, xzr, x0);
2340
2341   /* Free collecting_t object:
2342
2343        ADD sp, sp, #16
2344
2345      */
2346   p += emit_add (p, sp, sp, immediate_operand (16));
2347
2348   /* Restore CPSR (NZCV), FPSR and FPCR.  And free all special purpose
2349      registers from the stack.
2350
2351        LDR x2, [sp, #(2 * 16)]
2352        LDR x1, [sp, #(1 * 16)]
2353        LDR x0, [sp, #(0 * 16)]
2354
2355        MSR NZCV, x2
2356        MSR FPSR, x1
2357        MSR FPCR, x0
2358
2359        ADD sp, sp #(5 * 16)
2360
2361      */
2362   p += emit_ldr (p, x2, sp, offset_memory_operand (2 * 16));
2363   p += emit_ldr (p, x1, sp, offset_memory_operand (1 * 16));
2364   p += emit_ldr (p, x0, sp, offset_memory_operand (0 * 16));
2365   p += emit_msr (p, NZCV, x2);
2366   p += emit_msr (p, FPSR, x1);
2367   p += emit_msr (p, FPCR, x0);
2368
2369   p += emit_add (p, sp, sp, immediate_operand (5 * 16));
2370
2371   /* Pop general purpose registers:
2372
2373        LDR x0, [sp]
2374        ...
2375        LDR x30, [sp, #(30 * 16)]
2376
2377        ADD sp, sp, #(31 * 16)
2378
2379      */
2380   for (i = 0; i <= 30; i += 1)
2381     p += emit_ldr (p, aarch64_register (i, 1), sp,
2382                    offset_memory_operand (i * 16));
2383   p += emit_add (p, sp, sp, immediate_operand (31 * 16));
2384
2385   /* Pop SIMD&FP registers:
2386
2387        LDP q0, q1, [sp]
2388        ...
2389        LDP q30, q31, [sp, #(30 * 16)]
2390
2391        ADD sp, sp, #(32 * 16)
2392
2393      */
2394   for (i = 0; i <= 30; i += 2)
2395     p += emit_ldp_q_offset (p, i, i + 1, sp, i * 16);
2396   p += emit_add (p, sp, sp, immediate_operand (32 * 16));
2397
2398   /* Write the code into the inferior memory.  */
2399   append_insns (&buildaddr, p - buf, buf);
2400
2401   /* Now emit the relocated instruction.  */
2402   *adjusted_insn_addr = buildaddr;
2403   target_read_uint32 (tpaddr, &insn);
2404
2405   insn_data.base.insn_addr = tpaddr;
2406   insn_data.new_addr = buildaddr;
2407   insn_data.insn_ptr = buf;
2408
2409   aarch64_relocate_instruction (insn, &visitor,
2410                                 (struct aarch64_insn_data *) &insn_data);
2411
2412   /* We may not have been able to relocate the instruction.  */
2413   if (insn_data.insn_ptr == buf)
2414     {
2415       sprintf (err,
2416                "E.Could not relocate instruction from %s to %s.",
2417                core_addr_to_string_nz (tpaddr),
2418                core_addr_to_string_nz (buildaddr));
2419       return 1;
2420     }
2421   else
2422     append_insns (&buildaddr, insn_data.insn_ptr - buf, buf);
2423   *adjusted_insn_addr_end = buildaddr;
2424
2425   /* Go back to the start of the buffer.  */
2426   p = buf;
2427
2428   /* Emit a branch back from the jump pad.  */
2429   offset = (tpaddr + orig_size - buildaddr);
2430   if (!can_encode_int32 (offset, 28))
2431     {
2432       sprintf (err,
2433                "E.Jump back from jump pad too far from tracepoint "
2434                "(offset 0x%" PRIx64 " cannot be encoded in 28 bits).",
2435                offset);
2436       return 1;
2437     }
2438
2439   p += emit_b (p, 0, offset);
2440   append_insns (&buildaddr, p - buf, buf);
2441
2442   /* Give the caller a branch instruction into the jump pad.  */
2443   offset = (*jump_entry - tpaddr);
2444   if (!can_encode_int32 (offset, 28))
2445     {
2446       sprintf (err,
2447                "E.Jump pad too far from tracepoint "
2448                "(offset 0x%" PRIx64 " cannot be encoded in 28 bits).",
2449                offset);
2450       return 1;
2451     }
2452
2453   emit_b ((uint32_t *) jjump_pad_insn, 0, offset);
2454   *jjump_pad_insn_size = 4;
2455
2456   /* Return the end address of our pad.  */
2457   *jump_entry = buildaddr;
2458
2459   return 0;
2460 }
2461
2462 /* Helper function writing LEN instructions from START into
2463    current_insn_ptr.  */
2464
2465 static void
2466 emit_ops_insns (const uint32_t *start, int len)
2467 {
2468   CORE_ADDR buildaddr = current_insn_ptr;
2469
2470   threads_debug_printf ("Adding %d instrucions at %s",
2471                         len, paddress (buildaddr));
2472
2473   append_insns (&buildaddr, len, start);
2474   current_insn_ptr = buildaddr;
2475 }
2476
2477 /* Pop a register from the stack.  */
2478
2479 static int
2480 emit_pop (uint32_t *buf, struct aarch64_register rt)
2481 {
2482   return emit_ldr (buf, rt, sp, postindex_memory_operand (1 * 16));
2483 }
2484
2485 /* Push a register on the stack.  */
2486
2487 static int
2488 emit_push (uint32_t *buf, struct aarch64_register rt)
2489 {
2490   return emit_str (buf, rt, sp, preindex_memory_operand (-1 * 16));
2491 }
2492
2493 /* Implementation of emit_ops method "emit_prologue".  */
2494
2495 static void
2496 aarch64_emit_prologue (void)
2497 {
2498   uint32_t buf[16];
2499   uint32_t *p = buf;
2500
2501   /* This function emit a prologue for the following function prototype:
2502
2503      enum eval_result_type f (unsigned char *regs,
2504                               ULONGEST *value);
2505
2506      The first argument is a buffer of raw registers.  The second
2507      argument is the result of
2508      evaluating the expression, which will be set to whatever is on top of
2509      the stack at the end.
2510
2511      The stack set up by the prologue is as such:
2512
2513      High *------------------------------------------------------*
2514           | LR                                                   |
2515           | FP                                                   | <- FP
2516           | x1  (ULONGEST *value)                                |
2517           | x0  (unsigned char *regs)                            |
2518      Low  *------------------------------------------------------*
2519
2520      As we are implementing a stack machine, each opcode can expand the
2521      stack so we never know how far we are from the data saved by this
2522      prologue.  In order to be able refer to value and regs later, we save
2523      the current stack pointer in the frame pointer.  This way, it is not
2524      clobbered when calling C functions.
2525
2526      Finally, throughout every operation, we are using register x0 as the
2527      top of the stack, and x1 as a scratch register.  */
2528
2529   p += emit_stp (p, x0, x1, sp, preindex_memory_operand (-2 * 16));
2530   p += emit_str (p, lr, sp, offset_memory_operand (3 * 8));
2531   p += emit_str (p, fp, sp, offset_memory_operand (2 * 8));
2532
2533   p += emit_add (p, fp, sp, immediate_operand (2 * 8));
2534
2535
2536   emit_ops_insns (buf, p - buf);
2537 }
2538
2539 /* Implementation of emit_ops method "emit_epilogue".  */
2540
2541 static void
2542 aarch64_emit_epilogue (void)
2543 {
2544   uint32_t buf[16];
2545   uint32_t *p = buf;
2546
2547   /* Store the result of the expression (x0) in *value.  */
2548   p += emit_sub (p, x1, fp, immediate_operand (1 * 8));
2549   p += emit_ldr (p, x1, x1, offset_memory_operand (0));
2550   p += emit_str (p, x0, x1, offset_memory_operand (0));
2551
2552   /* Restore the previous state.  */
2553   p += emit_add (p, sp, fp, immediate_operand (2 * 8));
2554   p += emit_ldp (p, fp, lr, fp, offset_memory_operand (0));
2555
2556   /* Return expr_eval_no_error.  */
2557   p += emit_mov (p, x0, immediate_operand (expr_eval_no_error));
2558   p += emit_ret (p, lr);
2559
2560   emit_ops_insns (buf, p - buf);
2561 }
2562
2563 /* Implementation of emit_ops method "emit_add".  */
2564
2565 static void
2566 aarch64_emit_add (void)
2567 {
2568   uint32_t buf[16];
2569   uint32_t *p = buf;
2570
2571   p += emit_pop (p, x1);
2572   p += emit_add (p, x0, x1, register_operand (x0));
2573
2574   emit_ops_insns (buf, p - buf);
2575 }
2576
2577 /* Implementation of emit_ops method "emit_sub".  */
2578
2579 static void
2580 aarch64_emit_sub (void)
2581 {
2582   uint32_t buf[16];
2583   uint32_t *p = buf;
2584
2585   p += emit_pop (p, x1);
2586   p += emit_sub (p, x0, x1, register_operand (x0));
2587
2588   emit_ops_insns (buf, p - buf);
2589 }
2590
2591 /* Implementation of emit_ops method "emit_mul".  */
2592
2593 static void
2594 aarch64_emit_mul (void)
2595 {
2596   uint32_t buf[16];
2597   uint32_t *p = buf;
2598
2599   p += emit_pop (p, x1);
2600   p += emit_mul (p, x0, x1, x0);
2601
2602   emit_ops_insns (buf, p - buf);
2603 }
2604
2605 /* Implementation of emit_ops method "emit_lsh".  */
2606
2607 static void
2608 aarch64_emit_lsh (void)
2609 {
2610   uint32_t buf[16];
2611   uint32_t *p = buf;
2612
2613   p += emit_pop (p, x1);
2614   p += emit_lslv (p, x0, x1, x0);
2615
2616   emit_ops_insns (buf, p - buf);
2617 }
2618
2619 /* Implementation of emit_ops method "emit_rsh_signed".  */
2620
2621 static void
2622 aarch64_emit_rsh_signed (void)
2623 {
2624   uint32_t buf[16];
2625   uint32_t *p = buf;
2626
2627   p += emit_pop (p, x1);
2628   p += emit_asrv (p, x0, x1, x0);
2629
2630   emit_ops_insns (buf, p - buf);
2631 }
2632
2633 /* Implementation of emit_ops method "emit_rsh_unsigned".  */
2634
2635 static void
2636 aarch64_emit_rsh_unsigned (void)
2637 {
2638   uint32_t buf[16];
2639   uint32_t *p = buf;
2640
2641   p += emit_pop (p, x1);
2642   p += emit_lsrv (p, x0, x1, x0);
2643
2644   emit_ops_insns (buf, p - buf);
2645 }
2646
2647 /* Implementation of emit_ops method "emit_ext".  */
2648
2649 static void
2650 aarch64_emit_ext (int arg)
2651 {
2652   uint32_t buf[16];
2653   uint32_t *p = buf;
2654
2655   p += emit_sbfx (p, x0, x0, 0, arg);
2656
2657   emit_ops_insns (buf, p - buf);
2658 }
2659
2660 /* Implementation of emit_ops method "emit_log_not".  */
2661
2662 static void
2663 aarch64_emit_log_not (void)
2664 {
2665   uint32_t buf[16];
2666   uint32_t *p = buf;
2667
2668   /* If the top of the stack is 0, replace it with 1.  Else replace it with
2669      0.  */
2670
2671   p += emit_cmp (p, x0, immediate_operand (0));
2672   p += emit_cset (p, x0, EQ);
2673
2674   emit_ops_insns (buf, p - buf);
2675 }
2676
2677 /* Implementation of emit_ops method "emit_bit_and".  */
2678
2679 static void
2680 aarch64_emit_bit_and (void)
2681 {
2682   uint32_t buf[16];
2683   uint32_t *p = buf;
2684
2685   p += emit_pop (p, x1);
2686   p += emit_and (p, x0, x0, x1);
2687
2688   emit_ops_insns (buf, p - buf);
2689 }
2690
2691 /* Implementation of emit_ops method "emit_bit_or".  */
2692
2693 static void
2694 aarch64_emit_bit_or (void)
2695 {
2696   uint32_t buf[16];
2697   uint32_t *p = buf;
2698
2699   p += emit_pop (p, x1);
2700   p += emit_orr (p, x0, x0, x1);
2701
2702   emit_ops_insns (buf, p - buf);
2703 }
2704
2705 /* Implementation of emit_ops method "emit_bit_xor".  */
2706
2707 static void
2708 aarch64_emit_bit_xor (void)
2709 {
2710   uint32_t buf[16];
2711   uint32_t *p = buf;
2712
2713   p += emit_pop (p, x1);
2714   p += emit_eor (p, x0, x0, x1);
2715
2716   emit_ops_insns (buf, p - buf);
2717 }
2718
2719 /* Implementation of emit_ops method "emit_bit_not".  */
2720
2721 static void
2722 aarch64_emit_bit_not (void)
2723 {
2724   uint32_t buf[16];
2725   uint32_t *p = buf;
2726
2727   p += emit_mvn (p, x0, x0);
2728
2729   emit_ops_insns (buf, p - buf);
2730 }
2731
2732 /* Implementation of emit_ops method "emit_equal".  */
2733
2734 static void
2735 aarch64_emit_equal (void)
2736 {
2737   uint32_t buf[16];
2738   uint32_t *p = buf;
2739
2740   p += emit_pop (p, x1);
2741   p += emit_cmp (p, x0, register_operand (x1));
2742   p += emit_cset (p, x0, EQ);
2743
2744   emit_ops_insns (buf, p - buf);
2745 }
2746
2747 /* Implementation of emit_ops method "emit_less_signed".  */
2748
2749 static void
2750 aarch64_emit_less_signed (void)
2751 {
2752   uint32_t buf[16];
2753   uint32_t *p = buf;
2754
2755   p += emit_pop (p, x1);
2756   p += emit_cmp (p, x1, register_operand (x0));
2757   p += emit_cset (p, x0, LT);
2758
2759   emit_ops_insns (buf, p - buf);
2760 }
2761
2762 /* Implementation of emit_ops method "emit_less_unsigned".  */
2763
2764 static void
2765 aarch64_emit_less_unsigned (void)
2766 {
2767   uint32_t buf[16];
2768   uint32_t *p = buf;
2769
2770   p += emit_pop (p, x1);
2771   p += emit_cmp (p, x1, register_operand (x0));
2772   p += emit_cset (p, x0, LO);
2773
2774   emit_ops_insns (buf, p - buf);
2775 }
2776
2777 /* Implementation of emit_ops method "emit_ref".  */
2778
2779 static void
2780 aarch64_emit_ref (int size)
2781 {
2782   uint32_t buf[16];
2783   uint32_t *p = buf;
2784
2785   switch (size)
2786     {
2787     case 1:
2788       p += emit_ldrb (p, w0, x0, offset_memory_operand (0));
2789       break;
2790     case 2:
2791       p += emit_ldrh (p, w0, x0, offset_memory_operand (0));
2792       break;
2793     case 4:
2794       p += emit_ldr (p, w0, x0, offset_memory_operand (0));
2795       break;
2796     case 8:
2797       p += emit_ldr (p, x0, x0, offset_memory_operand (0));
2798       break;
2799     default:
2800       /* Unknown size, bail on compilation.  */
2801       emit_error = 1;
2802       break;
2803     }
2804
2805   emit_ops_insns (buf, p - buf);
2806 }
2807
2808 /* Implementation of emit_ops method "emit_if_goto".  */
2809
2810 static void
2811 aarch64_emit_if_goto (int *offset_p, int *size_p)
2812 {
2813   uint32_t buf[16];
2814   uint32_t *p = buf;
2815
2816   /* The Z flag is set or cleared here.  */
2817   p += emit_cmp (p, x0, immediate_operand (0));
2818   /* This instruction must not change the Z flag.  */
2819   p += emit_pop (p, x0);
2820   /* Branch over the next instruction if x0 == 0.  */
2821   p += emit_bcond (p, EQ, 8);
2822
2823   /* The NOP instruction will be patched with an unconditional branch.  */
2824   if (offset_p)
2825     *offset_p = (p - buf) * 4;
2826   if (size_p)
2827     *size_p = 4;
2828   p += emit_nop (p);
2829
2830   emit_ops_insns (buf, p - buf);
2831 }
2832
2833 /* Implementation of emit_ops method "emit_goto".  */
2834
2835 static void
2836 aarch64_emit_goto (int *offset_p, int *size_p)
2837 {
2838   uint32_t buf[16];
2839   uint32_t *p = buf;
2840
2841   /* The NOP instruction will be patched with an unconditional branch.  */
2842   if (offset_p)
2843     *offset_p = 0;
2844   if (size_p)
2845     *size_p = 4;
2846   p += emit_nop (p);
2847
2848   emit_ops_insns (buf, p - buf);
2849 }
2850
2851 /* Implementation of emit_ops method "write_goto_address".  */
2852
2853 static void
2854 aarch64_write_goto_address (CORE_ADDR from, CORE_ADDR to, int size)
2855 {
2856   uint32_t insn;
2857
2858   emit_b (&insn, 0, to - from);
2859   append_insns (&from, 1, &insn);
2860 }
2861
2862 /* Implementation of emit_ops method "emit_const".  */
2863
2864 static void
2865 aarch64_emit_const (LONGEST num)
2866 {
2867   uint32_t buf[16];
2868   uint32_t *p = buf;
2869
2870   p += emit_mov_addr (p, x0, num);
2871
2872   emit_ops_insns (buf, p - buf);
2873 }
2874
2875 /* Implementation of emit_ops method "emit_call".  */
2876
2877 static void
2878 aarch64_emit_call (CORE_ADDR fn)
2879 {
2880   uint32_t buf[16];
2881   uint32_t *p = buf;
2882
2883   p += emit_mov_addr (p, ip0, fn);
2884   p += emit_blr (p, ip0);
2885
2886   emit_ops_insns (buf, p - buf);
2887 }
2888
2889 /* Implementation of emit_ops method "emit_reg".  */
2890
2891 static void
2892 aarch64_emit_reg (int reg)
2893 {
2894   uint32_t buf[16];
2895   uint32_t *p = buf;
2896
2897   /* Set x0 to unsigned char *regs.  */
2898   p += emit_sub (p, x0, fp, immediate_operand (2 * 8));
2899   p += emit_ldr (p, x0, x0, offset_memory_operand (0));
2900   p += emit_mov (p, x1, immediate_operand (reg));
2901
2902   emit_ops_insns (buf, p - buf);
2903
2904   aarch64_emit_call (get_raw_reg_func_addr ());
2905 }
2906
2907 /* Implementation of emit_ops method "emit_pop".  */
2908
2909 static void
2910 aarch64_emit_pop (void)
2911 {
2912   uint32_t buf[16];
2913   uint32_t *p = buf;
2914
2915   p += emit_pop (p, x0);
2916
2917   emit_ops_insns (buf, p - buf);
2918 }
2919
2920 /* Implementation of emit_ops method "emit_stack_flush".  */
2921
2922 static void
2923 aarch64_emit_stack_flush (void)
2924 {
2925   uint32_t buf[16];
2926   uint32_t *p = buf;
2927
2928   p += emit_push (p, x0);
2929
2930   emit_ops_insns (buf, p - buf);
2931 }
2932
2933 /* Implementation of emit_ops method "emit_zero_ext".  */
2934
2935 static void
2936 aarch64_emit_zero_ext (int arg)
2937 {
2938   uint32_t buf[16];
2939   uint32_t *p = buf;
2940
2941   p += emit_ubfx (p, x0, x0, 0, arg);
2942
2943   emit_ops_insns (buf, p - buf);
2944 }
2945
2946 /* Implementation of emit_ops method "emit_swap".  */
2947
2948 static void
2949 aarch64_emit_swap (void)
2950 {
2951   uint32_t buf[16];
2952   uint32_t *p = buf;
2953
2954   p += emit_ldr (p, x1, sp, offset_memory_operand (0 * 16));
2955   p += emit_str (p, x0, sp, offset_memory_operand (0 * 16));
2956   p += emit_mov (p, x0, register_operand (x1));
2957
2958   emit_ops_insns (buf, p - buf);
2959 }
2960
2961 /* Implementation of emit_ops method "emit_stack_adjust".  */
2962
2963 static void
2964 aarch64_emit_stack_adjust (int n)
2965 {
2966   /* This is not needed with our design.  */
2967   uint32_t buf[16];
2968   uint32_t *p = buf;
2969
2970   p += emit_add (p, sp, sp, immediate_operand (n * 16));
2971
2972   emit_ops_insns (buf, p - buf);
2973 }
2974
2975 /* Implementation of emit_ops method "emit_int_call_1".  */
2976
2977 static void
2978 aarch64_emit_int_call_1 (CORE_ADDR fn, int arg1)
2979 {
2980   uint32_t buf[16];
2981   uint32_t *p = buf;
2982
2983   p += emit_mov (p, x0, immediate_operand (arg1));
2984
2985   emit_ops_insns (buf, p - buf);
2986
2987   aarch64_emit_call (fn);
2988 }
2989
2990 /* Implementation of emit_ops method "emit_void_call_2".  */
2991
2992 static void
2993 aarch64_emit_void_call_2 (CORE_ADDR fn, int arg1)
2994 {
2995   uint32_t buf[16];
2996   uint32_t *p = buf;
2997
2998   /* Push x0 on the stack.  */
2999   aarch64_emit_stack_flush ();
3000
3001   /* Setup arguments for the function call:
3002
3003      x0: arg1
3004      x1: top of the stack
3005
3006        MOV x1, x0
3007        MOV x0, #arg1  */
3008
3009   p += emit_mov (p, x1, register_operand (x0));
3010   p += emit_mov (p, x0, immediate_operand (arg1));
3011
3012   emit_ops_insns (buf, p - buf);
3013
3014   aarch64_emit_call (fn);
3015
3016   /* Restore x0.  */
3017   aarch64_emit_pop ();
3018 }
3019
3020 /* Implementation of emit_ops method "emit_eq_goto".  */
3021
3022 static void
3023 aarch64_emit_eq_goto (int *offset_p, int *size_p)
3024 {
3025   uint32_t buf[16];
3026   uint32_t *p = buf;
3027
3028   p += emit_pop (p, x1);
3029   p += emit_cmp (p, x1, register_operand (x0));
3030   /* Branch over the next instruction if x0 != x1.  */
3031   p += emit_bcond (p, NE, 8);
3032   /* The NOP instruction will be patched with an unconditional branch.  */
3033   if (offset_p)
3034     *offset_p = (p - buf) * 4;
3035   if (size_p)
3036     *size_p = 4;
3037   p += emit_nop (p);
3038
3039   emit_ops_insns (buf, p - buf);
3040 }
3041
3042 /* Implementation of emit_ops method "emit_ne_goto".  */
3043
3044 static void
3045 aarch64_emit_ne_goto (int *offset_p, int *size_p)
3046 {
3047   uint32_t buf[16];
3048   uint32_t *p = buf;
3049
3050   p += emit_pop (p, x1);
3051   p += emit_cmp (p, x1, register_operand (x0));
3052   /* Branch over the next instruction if x0 == x1.  */
3053   p += emit_bcond (p, EQ, 8);
3054   /* The NOP instruction will be patched with an unconditional branch.  */
3055   if (offset_p)
3056     *offset_p = (p - buf) * 4;
3057   if (size_p)
3058     *size_p = 4;
3059   p += emit_nop (p);
3060
3061   emit_ops_insns (buf, p - buf);
3062 }
3063
3064 /* Implementation of emit_ops method "emit_lt_goto".  */
3065
3066 static void
3067 aarch64_emit_lt_goto (int *offset_p, int *size_p)
3068 {
3069   uint32_t buf[16];
3070   uint32_t *p = buf;
3071
3072   p += emit_pop (p, x1);
3073   p += emit_cmp (p, x1, register_operand (x0));
3074   /* Branch over the next instruction if x0 >= x1.  */
3075   p += emit_bcond (p, GE, 8);
3076   /* The NOP instruction will be patched with an unconditional branch.  */
3077   if (offset_p)
3078     *offset_p = (p - buf) * 4;
3079   if (size_p)
3080     *size_p = 4;
3081   p += emit_nop (p);
3082
3083   emit_ops_insns (buf, p - buf);
3084 }
3085
3086 /* Implementation of emit_ops method "emit_le_goto".  */
3087
3088 static void
3089 aarch64_emit_le_goto (int *offset_p, int *size_p)
3090 {
3091   uint32_t buf[16];
3092   uint32_t *p = buf;
3093
3094   p += emit_pop (p, x1);
3095   p += emit_cmp (p, x1, register_operand (x0));
3096   /* Branch over the next instruction if x0 > x1.  */
3097   p += emit_bcond (p, GT, 8);
3098   /* The NOP instruction will be patched with an unconditional branch.  */
3099   if (offset_p)
3100     *offset_p = (p - buf) * 4;
3101   if (size_p)
3102     *size_p = 4;
3103   p += emit_nop (p);
3104
3105   emit_ops_insns (buf, p - buf);
3106 }
3107
3108 /* Implementation of emit_ops method "emit_gt_goto".  */
3109
3110 static void
3111 aarch64_emit_gt_goto (int *offset_p, int *size_p)
3112 {
3113   uint32_t buf[16];
3114   uint32_t *p = buf;
3115
3116   p += emit_pop (p, x1);
3117   p += emit_cmp (p, x1, register_operand (x0));
3118   /* Branch over the next instruction if x0 <= x1.  */
3119   p += emit_bcond (p, LE, 8);
3120   /* The NOP instruction will be patched with an unconditional branch.  */
3121   if (offset_p)
3122     *offset_p = (p - buf) * 4;
3123   if (size_p)
3124     *size_p = 4;
3125   p += emit_nop (p);
3126
3127   emit_ops_insns (buf, p - buf);
3128 }
3129
3130 /* Implementation of emit_ops method "emit_ge_got".  */
3131
3132 static void
3133 aarch64_emit_ge_got (int *offset_p, int *size_p)
3134 {
3135   uint32_t buf[16];
3136   uint32_t *p = buf;
3137
3138   p += emit_pop (p, x1);
3139   p += emit_cmp (p, x1, register_operand (x0));
3140   /* Branch over the next instruction if x0 <= x1.  */
3141   p += emit_bcond (p, LT, 8);
3142   /* The NOP instruction will be patched with an unconditional branch.  */
3143   if (offset_p)
3144     *offset_p = (p - buf) * 4;
3145   if (size_p)
3146     *size_p = 4;
3147   p += emit_nop (p);
3148
3149   emit_ops_insns (buf, p - buf);
3150 }
3151
3152 static struct emit_ops aarch64_emit_ops_impl =
3153 {
3154   aarch64_emit_prologue,
3155   aarch64_emit_epilogue,
3156   aarch64_emit_add,
3157   aarch64_emit_sub,
3158   aarch64_emit_mul,
3159   aarch64_emit_lsh,
3160   aarch64_emit_rsh_signed,
3161   aarch64_emit_rsh_unsigned,
3162   aarch64_emit_ext,
3163   aarch64_emit_log_not,
3164   aarch64_emit_bit_and,
3165   aarch64_emit_bit_or,
3166   aarch64_emit_bit_xor,
3167   aarch64_emit_bit_not,
3168   aarch64_emit_equal,
3169   aarch64_emit_less_signed,
3170   aarch64_emit_less_unsigned,
3171   aarch64_emit_ref,
3172   aarch64_emit_if_goto,
3173   aarch64_emit_goto,
3174   aarch64_write_goto_address,
3175   aarch64_emit_const,
3176   aarch64_emit_call,
3177   aarch64_emit_reg,
3178   aarch64_emit_pop,
3179   aarch64_emit_stack_flush,
3180   aarch64_emit_zero_ext,
3181   aarch64_emit_swap,
3182   aarch64_emit_stack_adjust,
3183   aarch64_emit_int_call_1,
3184   aarch64_emit_void_call_2,
3185   aarch64_emit_eq_goto,
3186   aarch64_emit_ne_goto,
3187   aarch64_emit_lt_goto,
3188   aarch64_emit_le_goto,
3189   aarch64_emit_gt_goto,
3190   aarch64_emit_ge_got,
3191 };
3192
3193 /* Implementation of target ops method "emit_ops".  */
3194
3195 emit_ops *
3196 aarch64_target::emit_ops ()
3197 {
3198   return &aarch64_emit_ops_impl;
3199 }
3200
3201 /* Implementation of target ops method
3202    "get_min_fast_tracepoint_insn_len".  */
3203
3204 int
3205 aarch64_target::get_min_fast_tracepoint_insn_len ()
3206 {
3207   return 4;
3208 }
3209
3210 /* Implementation of linux target ops method "low_supports_range_stepping".  */
3211
3212 bool
3213 aarch64_target::low_supports_range_stepping ()
3214 {
3215   return true;
3216 }
3217
3218 /* Implementation of target ops method "sw_breakpoint_from_kind".  */
3219
3220 const gdb_byte *
3221 aarch64_target::sw_breakpoint_from_kind (int kind, int *size)
3222 {
3223   if (is_64bit_tdesc ())
3224     {
3225       *size = aarch64_breakpoint_len;
3226       return aarch64_breakpoint;
3227     }
3228   else
3229     return arm_sw_breakpoint_from_kind (kind, size);
3230 }
3231
3232 /* Implementation of target ops method "breakpoint_kind_from_pc".  */
3233
3234 int
3235 aarch64_target::breakpoint_kind_from_pc (CORE_ADDR *pcptr)
3236 {
3237   if (is_64bit_tdesc ())
3238     return aarch64_breakpoint_len;
3239   else
3240     return arm_breakpoint_kind_from_pc (pcptr);
3241 }
3242
3243 /* Implementation of the target ops method
3244    "breakpoint_kind_from_current_state".  */
3245
3246 int
3247 aarch64_target::breakpoint_kind_from_current_state (CORE_ADDR *pcptr)
3248 {
3249   if (is_64bit_tdesc ())
3250     return aarch64_breakpoint_len;
3251   else
3252     return arm_breakpoint_kind_from_current_state (pcptr);
3253 }
3254
3255 /* Returns true if memory tagging is supported.  */
3256 bool
3257 aarch64_target::supports_memory_tagging ()
3258 {
3259   if (current_thread == NULL)
3260     {
3261       /* We don't have any processes running, so don't attempt to
3262          use linux_get_hwcap2 as it will try to fetch the current
3263          thread id.  Instead, just fetch the auxv from the self
3264          PID.  */
3265 #ifdef HAVE_GETAUXVAL
3266       return (getauxval (AT_HWCAP2) & HWCAP2_MTE) != 0;
3267 #else
3268       return true;
3269 #endif
3270     }
3271
3272   return (linux_get_hwcap2 (8) & HWCAP2_MTE) != 0;
3273 }
3274
3275 bool
3276 aarch64_target::fetch_memtags (CORE_ADDR address, size_t len,
3277                                gdb::byte_vector &tags, int type)
3278 {
3279   /* Allocation tags are per-process, so any tid is fine.  */
3280   int tid = lwpid_of (current_thread);
3281
3282   /* Allocation tag?  */
3283   if (type == static_cast <int> (aarch64_memtag_type::mte_allocation))
3284     return aarch64_mte_fetch_memtags (tid, address, len, tags);
3285
3286   return false;
3287 }
3288
3289 bool
3290 aarch64_target::store_memtags (CORE_ADDR address, size_t len,
3291                                const gdb::byte_vector &tags, int type)
3292 {
3293   /* Allocation tags are per-process, so any tid is fine.  */
3294   int tid = lwpid_of (current_thread);
3295
3296   /* Allocation tag?  */
3297   if (type == static_cast <int> (aarch64_memtag_type::mte_allocation))
3298     return aarch64_mte_store_memtags (tid, address, len, tags);
3299
3300   return false;
3301 }
3302
3303 /* The linux target ops object.  */
3304
3305 linux_process_target *the_linux_target = &the_aarch64_target;
3306
3307 void
3308 initialize_low_arch (void)
3309 {
3310   initialize_low_arch_aarch32 ();
3311
3312   initialize_regsets_info (&aarch64_regsets_info);
3313 }