Add support for fast tracepoints

author Pierre Langlois <pierre.langlois@arm.com>

Mon, 21 Sep 2015 14:01:04 +0000 (15:01 +0100)

committer Yao Qi <yao.qi@linaro.org>

Mon, 21 Sep 2015 14:01:04 +0000 (15:01 +0100)
author Pierre Langlois <pierre.langlois@arm.com>
Mon, 21 Sep 2015 14:01:04 +0000 (15:01 +0100)
committer Yao Qi <yao.qi@linaro.org>
Mon, 21 Sep 2015 14:01:04 +0000 (15:01 +0100)
diff --git a/gdb/gdbserver/ChangeLog b/gdb/gdbserver/ChangeLog

index 83a7848f83371a28205ddd97afaad9363013f048..d9ad6bb3567f5506d45ffcd55047e42a16fc97b4 100644 (file)
--- a/gdb/gdbserver/ChangeLog
+++ b/gdb/gdbserver/ChangeLog
@@ -1,3 +1,70 @@
+2015-09-21  Pierre Langlois  <pierre.langlois@arm.com>
+
+       * Makefile.in (linux-aarch64-ipa.o, aarch64-ipa.o): New rules.
+       * configure.srv (aarch64*-*-linux*): Add linux-aarch64-ipa.o and
+       aarch64-ipa.o.
+       * linux-aarch64-ipa.c: New file.
+       * linux-aarch64-low.c: Include arch/aarch64-insn.h, inttypes.h
+       and endian.h.
+       (aarch64_get_thread_area): New target method.
+       (extract_signed_bitfield): New helper function.
+       (aarch64_decode_ldr_literal): New function.
+       (enum aarch64_opcodes): New enum.
+       (struct aarch64_register): New struct.
+       (struct aarch64_operand): New struct.
+       (x0): New static global.
+       (x1): Likewise.
+       (x2): Likewise.
+       (x3): Likewise.
+       (x4): Likewise.
+       (w2): Likewise.
+       (ip0): Likewise.
+       (sp): Likewise.
+       (xzr): Likewise.
+       (aarch64_register): New helper function.
+       (register_operand): Likewise.
+       (immediate_operand): Likewise.
+       (struct aarch64_memory_operand): New struct.
+       (offset_memory_operand): New helper function.
+       (preindex_memory_operand): Likewise.
+       (enum aarch64_system_control_registers): New enum.
+       (ENCODE): New macro.
+       (emit_insn): New helper function.
+       (emit_b): New function.
+       (emit_bcond): Likewise.
+       (emit_cb): Likewise.
+       (emit_tb): Likewise.
+       (emit_blr): Likewise.
+       (emit_stp): Likewise.
+       (emit_ldp_q_offset): Likewise.
+       (emit_stp_q_offset): Likewise.
+       (emit_load_store): Likewise.
+       (emit_ldr): Likewise.
+       (emit_ldrsw): Likewise.
+       (emit_str): Likewise.
+       (emit_ldaxr): Likewise.
+       (emit_stxr): Likewise.
+       (emit_stlr): Likewise.
+       (emit_data_processing_reg): Likewise.
+       (emit_data_processing): Likewise.
+       (emit_add): Likewise.
+       (emit_sub): Likewise.
+       (emit_mov): Likewise.
+       (emit_movk): Likewise.
+       (emit_mov_addr): Likewise.
+       (emit_mrs): Likewise.
+       (emit_msr): Likewise.
+       (emit_sevl): Likewise.
+       (emit_wfe): Likewise.
+       (append_insns): Likewise.
+       (can_encode_int32_in): New helper function.
+       (aarch64_relocate_instruction): New function.
+       (aarch64_install_fast_tracepoint_jump_pad): Likewise.
+       (aarch64_get_min_fast_tracepoint_insn_len): Likewise.
+       (struct linux_target_ops): Install aarch64_get_thread_area,
+       aarch64_install_fast_tracepoint_jump_pad and
+       aarch64_get_min_fast_tracepoint_insn_len.
+
  2015-09-21  Pierre Langlois  <pierre.langlois@arm.com>
  
         * Makefile.in (aarch64-insn.o): New rule.
diff --git a/gdb/gdbserver/Makefile.in b/gdb/gdbserver/Makefile.in

index d096663645a8589d7b074be5292f6640bcf9ff69..cd146f4abd2066424541f19ed3456ab4f931ff70 100644 (file)
--- a/gdb/gdbserver/Makefile.in
+++ b/gdb/gdbserver/Makefile.in
@@ -499,6 +499,12 @@ linux-amd64-ipa.o: linux-amd64-ipa.c
  amd64-linux-ipa.o: amd64-linux.c
         $(IPAGENT_COMPILE) $<
         $(POSTCOMPILE)
+linux-aarch64-ipa.o: linux-aarch64-ipa.c
+       $(IPAGENT_COMPILE) $<
+       $(POSTCOMPILE)
+aarch64-ipa.o: aarch64.c
+       $(IPAGENT_COMPILE) $<
+       $(POSTCOMPILE)
  tdesc-ipa.o: tdesc.c
         $(IPAGENT_COMPILE) $<
         $(POSTCOMPILE)
diff --git a/gdb/gdbserver/configure.srv b/gdb/gdbserver/configure.srv

index a62df83d29afda73bca2279303b6f568e57bdfce..f187c9de0e65471b8629fb71425b75a0dc2623fb 100644 (file)
--- a/gdb/gdbserver/configure.srv
+++ b/gdb/gdbserver/configure.srv
@@ -62,6 +62,7 @@ case "${target}" in
                         srv_xmlfiles="${srv_xmlfiles} arm-with-neon.xml"
                         srv_linux_regsets=yes
                         srv_linux_thread_db=yes
+                       ipa_obj="linux-aarch64-ipa.o aarch64-ipa.o"
                         ;;
    arm*-*-linux*)       srv_regobj="reg-arm.o arm-with-iwmmxt.o"
                         srv_regobj="${srv_regobj} arm-with-vfpv2.o"
diff --git a/gdb/gdbserver/linux-aarch64-ipa.c b/gdb/gdbserver/linux-aarch64-ipa.c

new file mode 100644 (file)

index 0000000..1aafc5f
--- /dev/null
+++ b/gdb/gdbserver/linux-aarch64-ipa.c
@@ -0,0 +1,151 @@
+/* GNU/Linux/AArch64 specific low level interface, for the in-process
+   agent library for GDB.
+
+   Copyright (C) 2015 Free Software Foundation, Inc.
+
+   This file is part of GDB.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include "server.h"
+#include "tracepoint.h"
+
+/* Defined in auto-generated file aarch64.c.  */
+void init_registers_aarch64 (void);
+extern const struct target_desc *tdesc_aarch64;
+
+/* Each register saved by the jump pad is in a 16 byte cell.  */
+#define FT_CR_SIZE 16
+
+#define FT_CR_FPCR     0
+#define FT_CR_FPSR     1
+#define FT_CR_CPSR     2
+#define FT_CR_PC       3
+#define FT_CR_SP       4
+#define FT_CR_X0       5
+#define FT_CR_GPR(n)   (FT_CR_X0 + (n))
+#define FT_CR_FPR(n)   (FT_CR_GPR (31) + (n))
+
+/* Mapping between registers collected by the jump pad and GDB's register
+   array layout used by regcache.
+
+   See linux-aarch64-low.c (aarch64_install_fast_tracepoint_jump_pad) for
+   more details.  */
+
+static const int aarch64_ft_collect_regmap[] = {
+  FT_CR_GPR (0),
+  FT_CR_GPR (1),
+  FT_CR_GPR (2),
+  FT_CR_GPR (3),
+  FT_CR_GPR (4),
+  FT_CR_GPR (5),
+  FT_CR_GPR (6),
+  FT_CR_GPR (7),
+  FT_CR_GPR (8),
+  FT_CR_GPR (9),
+  FT_CR_GPR (10),
+  FT_CR_GPR (11),
+  FT_CR_GPR (12),
+  FT_CR_GPR (13),
+  FT_CR_GPR (14),
+  FT_CR_GPR (15),
+  FT_CR_GPR (16),
+  FT_CR_GPR (17),
+  FT_CR_GPR (18),
+  FT_CR_GPR (19),
+  FT_CR_GPR (20),
+  FT_CR_GPR (21),
+  FT_CR_GPR (22),
+  FT_CR_GPR (23),
+  FT_CR_GPR (24),
+  FT_CR_GPR (25),
+  FT_CR_GPR (26),
+  FT_CR_GPR (27),
+  FT_CR_GPR (28),
+  /* FP */
+  FT_CR_GPR (29),
+  /* LR */
+  FT_CR_GPR (30),
+  FT_CR_SP,
+  FT_CR_PC,
+  FT_CR_CPSR,
+  FT_CR_FPR (0),
+  FT_CR_FPR (1),
+  FT_CR_FPR (2),
+  FT_CR_FPR (3),
+  FT_CR_FPR (4),
+  FT_CR_FPR (5),
+  FT_CR_FPR (6),
+  FT_CR_FPR (7),
+  FT_CR_FPR (8),
+  FT_CR_FPR (9),
+  FT_CR_FPR (10),
+  FT_CR_FPR (11),
+  FT_CR_FPR (12),
+  FT_CR_FPR (13),
+  FT_CR_FPR (14),
+  FT_CR_FPR (15),
+  FT_CR_FPR (16),
+  FT_CR_FPR (17),
+  FT_CR_FPR (18),
+  FT_CR_FPR (19),
+  FT_CR_FPR (20),
+  FT_CR_FPR (21),
+  FT_CR_FPR (22),
+  FT_CR_FPR (23),
+  FT_CR_FPR (24),
+  FT_CR_FPR (25),
+  FT_CR_FPR (26),
+  FT_CR_FPR (27),
+  FT_CR_FPR (28),
+  FT_CR_FPR (29),
+  FT_CR_FPR (30),
+  FT_CR_FPR (31),
+  FT_CR_FPSR,
+  FT_CR_FPCR
+};
+
+#define AARCH64_NUM_FT_COLLECT_GREGS \
+  (sizeof (aarch64_ft_collect_regmap) / sizeof(aarch64_ft_collect_regmap[0]))
+
+/* Fill in REGCACHE with registers saved by the jump pad in BUF.  */
+
+void
+supply_fast_tracepoint_registers (struct regcache *regcache,
+                                 const unsigned char *buf)
+{
+  int i;
+
+  for (i = 0; i < AARCH64_NUM_FT_COLLECT_GREGS; i++)
+    supply_register (regcache, i,
+                    ((char *) buf)
+                    + (aarch64_ft_collect_regmap[i] * FT_CR_SIZE));
+}
+
+IP_AGENT_EXPORT_FUNC ULONGEST
+gdb_agent_get_raw_reg (const unsigned char *raw_regs, int regnum)
+{
+  if (regnum >= AARCH64_NUM_FT_COLLECT_GREGS)
+    return 0;
+
+  return *(ULONGEST *) (raw_regs
+                       + aarch64_ft_collect_regmap[regnum] * FT_CR_SIZE);
+}
+
+void
+initialize_low_tracepoint (void)
+{
+  init_registers_aarch64 ();
+  ipa_tdesc = tdesc_aarch64;
+}
diff --git a/gdb/gdbserver/linux-aarch64-low.c b/gdb/gdbserver/linux-aarch64-low.c

index 0ba58ddcb8498ed63b2bab42f12df95cd681252d..8e007052e957694fe5cc6455416905eafa16f3c7 100644 (file)
--- a/gdb/gdbserver/linux-aarch64-low.c
+++ b/gdb/gdbserver/linux-aarch64-low.c
@@ -23,6 +23,7 @@
  #include "linux-low.h"
  #include "nat/aarch64-linux.h"
  #include "nat/aarch64-linux-hw-point.h"
+#include "arch/aarch64-insn.h"
  #include "linux-aarch32-low.h"
  #include "elf/common.h"
  
@@ -30,6 +31,9 @@
  #include <sys/user.h>
  #include "nat/gdb_ptrace.h"
  #include <asm/ptrace.h>
+#include <inttypes.h>
+#include <endian.h>
+#include <sys/uio.h>
  
  #include "gdb_proc_service.h"
  
@@ -559,6 +563,1463 @@ aarch64_supports_tracepoints (void)
      }
  }
  
+/* Implementation of linux_target_ops method "get_thread_area".  */
+
+static int
+aarch64_get_thread_area (int lwpid, CORE_ADDR *addrp)
+{
+  struct iovec iovec;
+  uint64_t reg;
+
+  iovec.iov_base = &reg;
+  iovec.iov_len = sizeof (reg);
+
+  if (ptrace (PTRACE_GETREGSET, lwpid, NT_ARM_TLS, &iovec) != 0)
+    return -1;
+
+  *addrp = reg;
+
+  return 0;
+}
+
+/* Extract a signed value from a bit field within an instruction
+   encoding.
+
+   INSN is the instruction opcode.
+
+   WIDTH specifies the width of the bit field to extract (in bits).
+
+   OFFSET specifies the least significant bit of the field where bits
+   are numbered zero counting from least to most significant.  */
+
+static int32_t
+extract_signed_bitfield (uint32_t insn, unsigned width, unsigned offset)
+{
+  unsigned shift_l = sizeof (int32_t) * 8 - (offset + width);
+  unsigned shift_r = sizeof (int32_t) * 8 - width;
+
+  return ((int32_t) insn << shift_l) >> shift_r;
+}
+
+/* Decode an opcode if it represents an LDR or LDRSW instruction taking a
+   literal offset from the current PC.
+
+   ADDR specifies the address of the opcode.
+   INSN specifies the opcode to test.
+   IS_W is set if the instruction is LDRSW.
+   IS64 receives size field from the decoded instruction.
+   RT receives the 'rt' field from the decoded instruction.
+   OFFSET receives the 'imm' field from the decoded instruction.
+
+   Return 1 if the opcodes matches and is decoded, otherwise 0.  */
+
+int
+aarch64_decode_ldr_literal (CORE_ADDR addr, uint32_t insn, int *is_w,
+                           int *is64, unsigned *rt, int32_t *offset)
+{
+  /* LDR    0T01 1000 iiii iiii iiii iiii iiir rrrr */
+  /* LDRSW  1001 1000 iiii iiii iiii iiii iiir rrrr */
+  if ((insn & 0x3f000000) == 0x18000000)
+    {
+      *is_w = (insn >> 31) & 0x1;
+
+      if (*is_w)
+       {
+         /* LDRSW always takes a 64-bit destination registers.  */
+         *is64 = 1;
+       }
+      else
+       *is64 = (insn >> 30) & 0x1;
+
+      *rt = (insn >> 0) & 0x1f;
+      *offset = extract_signed_bitfield (insn, 19, 5) << 2;
+
+      if (aarch64_debug)
+       debug_printf ("decode: %s 0x%x %s %s%u, #?\n",
+                     core_addr_to_string_nz (addr), insn,
+                     *is_w ? "ldrsw" : "ldr",
+                     *is64 ? "x" : "w", *rt);
+
+      return 1;
+    }
+
+  return 0;
+}
+
+/* List of opcodes that we need for building the jump pad and relocating
+   an instruction.  */
+
+enum aarch64_opcodes
+{
+  /* B              0001 01ii iiii iiii iiii iiii iiii iiii */
+  /* BL             1001 01ii iiii iiii iiii iiii iiii iiii */
+  /* B.COND         0101 0100 iiii iiii iiii iiii iii0 cccc */
+  /* CBZ            s011 0100 iiii iiii iiii iiii iiir rrrr */
+  /* CBNZ           s011 0101 iiii iiii iiii iiii iiir rrrr */
+  /* TBZ            b011 0110 bbbb biii iiii iiii iiir rrrr */
+  /* TBNZ           b011 0111 bbbb biii iiii iiii iiir rrrr */
+  B               = 0x14000000,
+  BL              = 0x80000000 | B,
+  BCOND           = 0x40000000 | B,
+  CBZ             = 0x20000000 | B,
+  CBNZ            = 0x21000000 | B,
+  TBZ             = 0x36000000 | B,
+  TBNZ            = 0x37000000 | B,
+  /* BLR            1101 0110 0011 1111 0000 00rr rrr0 0000 */
+  BLR             = 0xd63f0000,
+  /* STP            s010 100o o0ii iiii irrr rrrr rrrr rrrr */
+  /* LDP            s010 100o o1ii iiii irrr rrrr rrrr rrrr */
+  /* STP (SIMD&VFP) ss10 110o o0ii iiii irrr rrrr rrrr rrrr */
+  /* LDP (SIMD&VFP) ss10 110o o1ii iiii irrr rrrr rrrr rrrr */
+  STP             = 0x28000000,
+  LDP             = 0x28400000,
+  STP_SIMD_VFP    = 0x04000000 | STP,
+  LDP_SIMD_VFP    = 0x04000000 | LDP,
+  /* STR            ss11 100o 00xi iiii iiii xxrr rrrr rrrr */
+  /* LDR            ss11 100o 01xi iiii iiii xxrr rrrr rrrr */
+  /* LDRSW          1011 100o 10xi iiii iiii xxrr rrrr rrrr */
+  STR             = 0x38000000,
+  LDR             = 0x00400000 | STR,
+  LDRSW           = 0x80800000 | STR,
+  /* LDAXR          ss00 1000 0101 1111 1111 11rr rrrr rrrr */
+  LDAXR           = 0x085ffc00,
+  /* STXR           ss00 1000 000r rrrr 0111 11rr rrrr rrrr */
+  STXR            = 0x08007c00,
+  /* STLR           ss00 1000 1001 1111 1111 11rr rrrr rrrr */
+  STLR            = 0x089ffc00,
+  /* MOV            s101 0010 1xxi iiii iiii iiii iiir rrrr */
+  /* MOVK           s111 0010 1xxi iiii iiii iiii iiir rrrr */
+  MOV             = 0x52800000,
+  MOVK            = 0x20000000 | MOV,
+  /* ADD            s00o ooo1 xxxx xxxx xxxx xxxx xxxx xxxx */
+  /* SUB            s10o ooo1 xxxx xxxx xxxx xxxx xxxx xxxx */
+  /* SUBS           s11o ooo1 xxxx xxxx xxxx xxxx xxxx xxxx */
+  ADD             = 0x01000000,
+  SUB             = 0x40000000 | ADD,
+  /* MSR (register) 1101 0101 0001 oooo oooo oooo ooor rrrr */
+  /* MRS            1101 0101 0011 oooo oooo oooo ooor rrrr */
+  MSR             = 0xd5100000,
+  MRS             = 0x00200000 | MSR,
+  /* HINT           1101 0101 0000 0011 0010 oooo ooo1 1111 */
+  HINT            = 0xd503201f,
+  SEVL            = (5 << 5) | HINT,
+  WFE             = (2 << 5) | HINT,
+};
+
+/* Representation of a general purpose register of the form xN or wN.
+
+   This type is used by emitting functions that take registers as operands.  */
+
+struct aarch64_register
+{
+  unsigned num;
+  int is64;
+};
+
+/* Representation of an operand.  At this time, it only supports register
+   and immediate types.  */
+
+struct aarch64_operand
+{
+  /* Type of the operand.  */
+  enum
+    {
+      OPERAND_IMMEDIATE,
+      OPERAND_REGISTER,
+    } type;
+  /* Value of the operand according to the type.  */
+  union
+    {
+      uint32_t imm;
+      struct aarch64_register reg;
+    };
+};
+
+/* List of registers that we are currently using, we can add more here as
+   we need to use them.  */
+
+/* General purpose scratch registers (64 bit).  */
+static const struct aarch64_register x0 = { 0, 1 };
+static const struct aarch64_register x1 = { 1, 1 };
+static const struct aarch64_register x2 = { 2, 1 };
+static const struct aarch64_register x3 = { 3, 1 };
+static const struct aarch64_register x4 = { 4, 1 };
+
+/* General purpose scratch registers (32 bit).  */
+static const struct aarch64_register w2 = { 2, 0 };
+
+/* Intra-procedure scratch registers.  */
+static const struct aarch64_register ip0 = { 16, 1 };
+
+/* Special purpose registers.  */
+static const struct aarch64_register sp = { 31, 1 };
+static const struct aarch64_register xzr = { 31, 1 };
+
+/* Dynamically allocate a new register.  If we know the register
+   statically, we should make it a global as above instead of using this
+   helper function.  */
+
+static struct aarch64_register
+aarch64_register (unsigned num, int is64)
+{
+  return (struct aarch64_register) { num, is64 };
+}
+
+/* Helper function to create a register operand, for instructions with
+   different types of operands.
+
+   For example:
+   p += emit_mov (p, x0, register_operand (x1));  */
+
+static struct aarch64_operand
+register_operand (struct aarch64_register reg)
+{
+  struct aarch64_operand operand;
+
+  operand.type = OPERAND_REGISTER;
+  operand.reg = reg;
+
+  return operand;
+}
+
+/* Helper function to create an immediate operand, for instructions with
+   different types of operands.
+
+   For example:
+   p += emit_mov (p, x0, immediate_operand (12));  */
+
+static struct aarch64_operand
+immediate_operand (uint32_t imm)
+{
+  struct aarch64_operand operand;
+
+  operand.type = OPERAND_IMMEDIATE;
+  operand.imm = imm;
+
+  return operand;
+}
+
+/* Representation of a memory operand, used for load and store
+   instructions.
+
+   The types correspond to the following variants:
+
+   MEMORY_OPERAND_OFFSET:   LDR rt, [rn, #offset]
+   MEMORY_OPERAND_PREINDEX: LDR rt, [rn, #index]!  */
+
+struct aarch64_memory_operand
+{
+  /* Type of the operand.  */
+  enum
+    {
+      MEMORY_OPERAND_OFFSET,
+      MEMORY_OPERAND_PREINDEX,
+    } type;
+  /* Index from the base register.  */
+  int32_t index;
+};
+
+/* Helper function to create an offset memory operand.
+
+   For example:
+   p += emit_ldr (p, x0, sp, offset_memory_operand (16));  */
+
+static struct aarch64_memory_operand
+offset_memory_operand (int32_t offset)
+{
+  return (struct aarch64_memory_operand) { MEMORY_OPERAND_OFFSET, offset };
+}
+
+/* Helper function to create a pre-index memory operand.
+
+   For example:
+   p += emit_ldr (p, x0, sp, preindex_memory_operand (16));  */
+
+static struct aarch64_memory_operand
+preindex_memory_operand (int32_t index)
+{
+  return (struct aarch64_memory_operand) { MEMORY_OPERAND_PREINDEX, index };
+}
+
+/* System control registers.  These special registers can be written and
+   read with the MRS and MSR instructions.
+
+   - NZCV: Condition flags.  GDB refers to this register under the CPSR
+          name.
+   - FPSR: Floating-point status register.
+   - FPCR: Floating-point control registers.
+   - TPIDR_EL0: Software thread ID register.  */
+
+enum aarch64_system_control_registers
+{
+  /*          op0           op1           crn          crm          op2  */
+  NZCV =      (0x1 << 14) | (0x3 << 11) | (0x4 << 7) | (0x2 << 3) | 0x0,
+  FPSR =      (0x1 << 14) | (0x3 << 11) | (0x4 << 7) | (0x4 << 3) | 0x1,
+  FPCR =      (0x1 << 14) | (0x3 << 11) | (0x4 << 7) | (0x4 << 3) | 0x0,
+  TPIDR_EL0 = (0x1 << 14) | (0x3 << 11) | (0xd << 7) | (0x0 << 3) | 0x2
+};
+
+/* Helper macro to mask and shift a value into a bitfield.  */
+
+#define ENCODE(val, size, offset) \
+  ((uint32_t) ((val & ((1ULL << size) - 1)) << offset))
+
+/* Write a 32-bit unsigned integer INSN info *BUF.  Return the number of
+   instructions written (aka. 1).  */
+
+static int
+emit_insn (uint32_t *buf, uint32_t insn)
+{
+  *buf = insn;
+  return 1;
+}
+
+/* Write a B or BL instruction into *BUF.
+
+     B  #offset
+     BL #offset
+
+   IS_BL specifies if the link register should be updated.
+   OFFSET is the immediate offset from the current PC.  It is
+   byte-addressed but should be 4 bytes aligned.  It has a limited range of
+   +/- 128MB (26 bits << 2).  */
+
+static int
+emit_b (uint32_t *buf, int is_bl, int32_t offset)
+{
+  uint32_t imm26 = ENCODE (offset >> 2, 26, 0);
+
+  if (is_bl)
+    return emit_insn (buf, BL | imm26);
+  else
+    return emit_insn (buf, B | imm26);
+}
+
+/* Write a BCOND instruction into *BUF.
+
+     B.COND #offset
+
+   COND specifies the condition field.
+   OFFSET is the immediate offset from the current PC.  It is
+   byte-addressed but should be 4 bytes aligned.  It has a limited range of
+   +/- 1MB (19 bits << 2).  */
+
+static int
+emit_bcond (uint32_t *buf, unsigned cond, int32_t offset)
+{
+  return emit_insn (buf, BCOND | ENCODE (offset >> 2, 19, 5)
+                   | ENCODE (cond, 4, 0));
+}
+
+/* Write a CBZ or CBNZ instruction into *BUF.
+
+     CBZ  rt, #offset
+     CBNZ rt, #offset
+
+   IS_CBNZ distinguishes between CBZ and CBNZ instructions.
+   RN is the register to test.
+   OFFSET is the immediate offset from the current PC.  It is
+   byte-addressed but should be 4 bytes aligned.  It has a limited range of
+   +/- 1MB (19 bits << 2).  */
+
+static int
+emit_cb (uint32_t *buf, int is_cbnz, struct aarch64_register rt,
+        int32_t offset)
+{
+  uint32_t imm19 = ENCODE (offset >> 2, 19, 5);
+  uint32_t sf = ENCODE (rt.is64, 1, 31);
+
+  if (is_cbnz)
+    return emit_insn (buf, CBNZ | sf | imm19 | ENCODE (rt.num, 5, 0));
+  else
+    return emit_insn (buf, CBZ | sf | imm19 | ENCODE (rt.num, 5, 0));
+}
+
+/* Write a TBZ or TBNZ instruction into *BUF.
+
+     TBZ  rt, #bit, #offset
+     TBNZ rt, #bit, #offset
+
+   IS_TBNZ distinguishes between TBZ and TBNZ instructions.
+   RT is the register to test.
+   BIT is the index of the bit to test in register RT.
+   OFFSET is the immediate offset from the current PC.  It is
+   byte-addressed but should be 4 bytes aligned.  It has a limited range of
+   +/- 32KB (14 bits << 2).  */
+
+static int
+emit_tb (uint32_t *buf, int is_tbnz, unsigned bit,
+        struct aarch64_register rt, int32_t offset)
+{
+  uint32_t imm14 = ENCODE (offset >> 2, 14, 5);
+  uint32_t b40 = ENCODE (bit, 5, 19);
+  uint32_t b5 = ENCODE (bit >> 5, 1, 31);
+
+  if (is_tbnz)
+    return emit_insn (buf, TBNZ | b5 | b40 | imm14 | ENCODE (rt.num, 5, 0));
+  else
+    return emit_insn (buf, TBZ | b5 | b40 | imm14 | ENCODE (rt.num, 5, 0));
+}
+
+/* Write a BLR instruction into *BUF.
+
+     BLR rn
+
+   RN is the register to branch to.  */
+
+static int
+emit_blr (uint32_t *buf, struct aarch64_register rn)
+{
+  return emit_insn (buf, BLR | ENCODE (rn.num, 5, 5));
+}
+
+/* Write a STP instruction into *BUF.
+
+     STP rt, rt2, [rn, #offset]
+     STP rt, rt2, [rn, #index]!
+
+   RT and RT2 are the registers to store.
+   RN is the base address register.
+   OFFSET is the immediate to add to the base address.  It is limited to a
+   -512 .. 504 range (7 bits << 3).  */
+
+static int
+emit_stp (uint32_t *buf, struct aarch64_register rt,
+         struct aarch64_register rt2, struct aarch64_register rn,
+         struct aarch64_memory_operand operand)
+{
+  uint32_t opc;
+  uint32_t pre_index;
+  uint32_t write_back;
+
+  if (rt.is64)
+    opc = ENCODE (2, 2, 30);
+  else
+    opc = ENCODE (0, 2, 30);
+
+  switch (operand.type)
+    {
+    case MEMORY_OPERAND_OFFSET:
+      {
+       pre_index = ENCODE (1, 1, 24);
+       write_back = ENCODE (0, 1, 23);
+       break;
+      }
+    case MEMORY_OPERAND_PREINDEX:
+      {
+       pre_index = ENCODE (1, 1, 24);
+       write_back = ENCODE (1, 1, 23);
+       break;
+      }
+    default:
+      return 0;
+    }
+
+  return emit_insn (buf, STP | opc | pre_index | write_back
+                   | ENCODE (operand.index >> 3, 7, 15) | ENCODE (rt2.num, 5, 10)
+                   | ENCODE (rn.num, 5, 5) | ENCODE (rt.num, 5, 0));
+}
+
+/* Write a LDP (SIMD&VFP) instruction using Q registers into *BUF.
+
+     LDP qt, qt2, [rn, #offset]
+
+   RT and RT2 are the Q registers to store.
+   RN is the base address register.
+   OFFSET is the immediate to add to the base address.  It is limited to
+   -1024 .. 1008 range (7 bits << 4).  */
+
+static int
+emit_ldp_q_offset (uint32_t *buf, unsigned rt, unsigned rt2,
+                  struct aarch64_register rn, int32_t offset)
+{
+  uint32_t opc = ENCODE (2, 2, 30);
+  uint32_t pre_index = ENCODE (1, 1, 24);
+
+  return emit_insn (buf, LDP_SIMD_VFP | opc | pre_index
+                   | ENCODE (offset >> 4, 7, 15) | ENCODE (rt2, 5, 10)
+                   | ENCODE (rn.num, 5, 5) | ENCODE (rt, 5, 0));
+}
+
+/* Write a STP (SIMD&VFP) instruction using Q registers into *BUF.
+
+     STP qt, qt2, [rn, #offset]
+
+   RT and RT2 are the Q registers to store.
+   RN is the base address register.
+   OFFSET is the immediate to add to the base address.  It is limited to
+   -1024 .. 1008 range (7 bits << 4).  */
+
+static int
+emit_stp_q_offset (uint32_t *buf, unsigned rt, unsigned rt2,
+                  struct aarch64_register rn, int32_t offset)
+{
+  uint32_t opc = ENCODE (2, 2, 30);
+  uint32_t pre_index = ENCODE (1, 1, 24);
+
+  return emit_insn (buf, STP_SIMD_VFP | opc | pre_index
+                   | ENCODE (offset >> 4, 7, 15) | ENCODE (rt2, 5, 10)
+                   | ENCODE (rn.num, 5, 5) | ENCODE (rt, 5, 0));
+}
+
+/* Helper function emitting a load or store instruction.  */
+
+static int
+emit_load_store (uint32_t *buf, uint32_t size, enum aarch64_opcodes opcode,
+                struct aarch64_register rt, struct aarch64_register rn,
+                struct aarch64_memory_operand operand)
+{
+  uint32_t op;
+
+  switch (operand.type)
+    {
+    case MEMORY_OPERAND_OFFSET:
+      {
+       op = ENCODE (1, 1, 24);
+
+       return emit_insn (buf, opcode | ENCODE (size, 2, 30) | op
+                         | ENCODE (operand.index >> 3, 12, 10)
+                         | ENCODE (rn.num, 5, 5) | ENCODE (rt.num, 5, 0));
+      }
+    case MEMORY_OPERAND_PREINDEX:
+      {
+       uint32_t pre_index = ENCODE (3, 2, 10);
+
+       op = ENCODE (0, 1, 24);
+
+       return emit_insn (buf, opcode | ENCODE (size, 2, 30) | op
+                         | pre_index | ENCODE (operand.index, 9, 12)
+                         | ENCODE (rn.num, 5, 5) | ENCODE (rt.num, 5, 0));
+      }
+    default:
+      return 0;
+    }
+}
+
+/* Write a LDR instruction into *BUF.
+
+     LDR rt, [rn, #offset]
+     LDR rt, [rn, #index]!
+
+   RT is the register to store.
+   RN is the base address register.
+   OFFSET is the immediate to add to the base address.  It is limited to
+   0 .. 32760 range (12 bits << 3).  */
+
+static int
+emit_ldr (uint32_t *buf, struct aarch64_register rt,
+         struct aarch64_register rn, struct aarch64_memory_operand operand)
+{
+  return emit_load_store (buf, rt.is64 ? 3 : 2, LDR, rt, rn, operand);
+}
+
+/* Write a LDRSW instruction into *BUF.  The register size is 64-bit.
+
+     LDRSW xt, [rn, #offset]
+     LDRSW xt, [rn, #index]!
+
+   RT is the register to store.
+   RN is the base address register.
+   OFFSET is the immediate to add to the base address.  It is limited to
+   0 .. 16380 range (12 bits << 2).  */
+
+static int
+emit_ldrsw (uint32_t *buf, struct aarch64_register rt,
+                  struct aarch64_register rn,
+                  struct aarch64_memory_operand operand)
+{
+  return emit_load_store (buf, 3, LDRSW, rt, rn, operand);
+}
+
+/* Write a STR instruction into *BUF.
+
+     STR rt, [rn, #offset]
+     STR rt, [rn, #index]!
+
+   RT is the register to store.
+   RN is the base address register.
+   OFFSET is the immediate to add to the base address.  It is limited to
+   0 .. 32760 range (12 bits << 3).  */
+
+static int
+emit_str (uint32_t *buf, struct aarch64_register rt,
+         struct aarch64_register rn,
+         struct aarch64_memory_operand operand)
+{
+  return emit_load_store (buf, rt.is64 ? 3 : 2, STR, rt, rn, operand);
+}
+
+/* Helper function emitting an exclusive load or store instruction.  */
+
+static int
+emit_load_store_exclusive (uint32_t *buf, uint32_t size,
+                          enum aarch64_opcodes opcode,
+                          struct aarch64_register rs,
+                          struct aarch64_register rt,
+                          struct aarch64_register rt2,
+                          struct aarch64_register rn)
+{
+  return emit_insn (buf, opcode | ENCODE (size, 2, 30)
+                   | ENCODE (rs.num, 5, 16) | ENCODE (rt2.num, 5, 10)
+                   | ENCODE (rn.num, 5, 5) | ENCODE (rt.num, 5, 0));
+}
+
+/* Write a LAXR instruction into *BUF.
+
+     LDAXR rt, [xn]
+
+   RT is the destination register.
+   RN is the base address register.  */
+
+static int
+emit_ldaxr (uint32_t *buf, struct aarch64_register rt,
+           struct aarch64_register rn)
+{
+  return emit_load_store_exclusive (buf, rt.is64 ? 3 : 2, LDAXR, xzr, rt,
+                                   xzr, rn);
+}
+
+/* Write a STXR instruction into *BUF.
+
+     STXR ws, rt, [xn]
+
+   RS is the result register, it indicates if the store succeeded or not.
+   RT is the destination register.
+   RN is the base address register.  */
+
+static int
+emit_stxr (uint32_t *buf, struct aarch64_register rs,
+          struct aarch64_register rt, struct aarch64_register rn)
+{
+  return emit_load_store_exclusive (buf, rt.is64 ? 3 : 2, STXR, rs, rt,
+                                   xzr, rn);
+}
+
+/* Write a STLR instruction into *BUF.
+
+     STLR rt, [xn]
+
+   RT is the register to store.
+   RN is the base address register.  */
+
+static int
+emit_stlr (uint32_t *buf, struct aarch64_register rt,
+          struct aarch64_register rn)
+{
+  return emit_load_store_exclusive (buf, rt.is64 ? 3 : 2, STLR, xzr, rt,
+                                   xzr, rn);
+}
+
+/* Helper function for data processing instructions with register sources.  */
+
+static int
+emit_data_processing_reg (uint32_t *buf, enum aarch64_opcodes opcode,
+                         struct aarch64_register rd,
+                         struct aarch64_register rn,
+                         struct aarch64_register rm)
+{
+  uint32_t size = ENCODE (rd.is64, 1, 31);
+
+  return emit_insn (buf, opcode | size | ENCODE (rm.num, 5, 16)
+                   | ENCODE (rn.num, 5, 5) | ENCODE (rd.num, 5, 0));
+}
+
+/* Helper function for data processing instructions taking either a register
+   or an immediate.  */
+
+static int
+emit_data_processing (uint32_t *buf, enum aarch64_opcodes opcode,
+                     struct aarch64_register rd,
+                     struct aarch64_register rn,
+                     struct aarch64_operand operand)
+{
+  uint32_t size = ENCODE (rd.is64, 1, 31);
+  /* The opcode is different for register and immediate source operands.  */
+  uint32_t operand_opcode;
+
+  if (operand.type == OPERAND_IMMEDIATE)
+    {
+      /* xxx1 000x xxxx xxxx xxxx xxxx xxxx xxxx */
+      operand_opcode = ENCODE (8, 4, 25);
+
+      return emit_insn (buf, opcode | operand_opcode | size
+                       | ENCODE (operand.imm, 12, 10)
+                       | ENCODE (rn.num, 5, 5) | ENCODE (rd.num, 5, 0));
+    }
+  else
+    {
+      /* xxx0 101x xxxx xxxx xxxx xxxx xxxx xxxx */
+      operand_opcode = ENCODE (5, 4, 25);
+
+      return emit_data_processing_reg (buf, opcode | operand_opcode, rd,
+                                      rn, operand.reg);
+    }
+}
+
+/* Write an ADD instruction into *BUF.
+
+     ADD rd, rn, #imm
+     ADD rd, rn, rm
+
+   This function handles both an immediate and register add.
+
+   RD is the destination register.
+   RN is the input register.
+   OPERAND is the source operand, either of type OPERAND_IMMEDIATE or
+   OPERAND_REGISTER.  */
+
+static int
+emit_add (uint32_t *buf, struct aarch64_register rd,
+         struct aarch64_register rn, struct aarch64_operand operand)
+{
+  return emit_data_processing (buf, ADD, rd, rn, operand);
+}
+
+/* Write a SUB instruction into *BUF.
+
+     SUB rd, rn, #imm
+     SUB rd, rn, rm
+
+   This function handles both an immediate and register sub.
+
+   RD is the destination register.
+   RN is the input register.
+   IMM is the immediate to substract to RN.  */
+
+static int
+emit_sub (uint32_t *buf, struct aarch64_register rd,
+         struct aarch64_register rn, struct aarch64_operand operand)
+{
+  return emit_data_processing (buf, SUB, rd, rn, operand);
+}
+
+/* Write a MOV instruction into *BUF.
+
+     MOV rd, #imm
+     MOV rd, rm
+
+   This function handles both a wide immediate move and a register move,
+   with the condition that the source register is not xzr.  xzr and the
+   stack pointer share the same encoding and this function only supports
+   the stack pointer.
+
+   RD is the destination register.
+   OPERAND is the source operand, either of type OPERAND_IMMEDIATE or
+   OPERAND_REGISTER.  */
+
+static int
+emit_mov (uint32_t *buf, struct aarch64_register rd,
+         struct aarch64_operand operand)
+{
+  if (operand.type == OPERAND_IMMEDIATE)
+    {
+      uint32_t size = ENCODE (rd.is64, 1, 31);
+      /* Do not shift the immediate.  */
+      uint32_t shift = ENCODE (0, 2, 21);
+
+      return emit_insn (buf, MOV | size | shift
+                       | ENCODE (operand.imm, 16, 5)
+                       | ENCODE (rd.num, 5, 0));
+    }
+  else
+    return emit_add (buf, rd, operand.reg, immediate_operand (0));
+}
+
+/* Write a MOVK instruction into *BUF.
+
+     MOVK rd, #imm, lsl #shift
+
+   RD is the destination register.
+   IMM is the immediate.
+   SHIFT is the logical shift left to apply to IMM.   */
+
+static int
+emit_movk (uint32_t *buf, struct aarch64_register rd, uint32_t imm, unsigned shift)
+{
+  uint32_t size = ENCODE (rd.is64, 1, 31);
+
+  return emit_insn (buf, MOVK | size | ENCODE (shift, 2, 21) |
+                   ENCODE (imm, 16, 5) | ENCODE (rd.num, 5, 0));
+}
+
+/* Write instructions into *BUF in order to move ADDR into a register.
+   ADDR can be a 64-bit value.
+
+   This function will emit a series of MOV and MOVK instructions, such as:
+
+     MOV  xd, #(addr)
+     MOVK xd, #(addr >> 16), lsl #16
+     MOVK xd, #(addr >> 32), lsl #32
+     MOVK xd, #(addr >> 48), lsl #48  */
+
+static int
+emit_mov_addr (uint32_t *buf, struct aarch64_register rd, CORE_ADDR addr)
+{
+  uint32_t *p = buf;
+
+  /* The MOV (wide immediate) instruction clears to top bits of the
+     register.  */
+  p += emit_mov (p, rd, immediate_operand (addr & 0xffff));
+
+  if ((addr >> 16) != 0)
+    p += emit_movk (p, rd, (addr >> 16) & 0xffff, 1);
+  else
+    return p - buf;
+
+  if ((addr >> 32) != 0)
+    p += emit_movk (p, rd, (addr >> 32) & 0xffff, 2);
+  else
+    return p - buf;
+
+  if ((addr >> 48) != 0)
+    p += emit_movk (p, rd, (addr >> 48) & 0xffff, 3);
+
+  return p - buf;
+}
+
+/* Write a MRS instruction into *BUF.  The register size is 64-bit.
+
+     MRS xt, system_reg
+
+   RT is the destination register.
+   SYSTEM_REG is special purpose register to read.  */
+
+static int
+emit_mrs (uint32_t *buf, struct aarch64_register rt,
+         enum aarch64_system_control_registers system_reg)
+{
+  return emit_insn (buf, MRS | ENCODE (system_reg, 15, 5)
+                   | ENCODE (rt.num, 5, 0));
+}
+
+/* Write a MSR instruction into *BUF.  The register size is 64-bit.
+
+     MSR system_reg, xt
+
+   SYSTEM_REG is special purpose register to write.
+   RT is the input register.  */
+
+static int
+emit_msr (uint32_t *buf, enum aarch64_system_control_registers system_reg,
+         struct aarch64_register rt)
+{
+  return emit_insn (buf, MSR | ENCODE (system_reg, 15, 5)
+                   | ENCODE (rt.num, 5, 0));
+}
+
+/* Write a SEVL instruction into *BUF.
+
+   This is a hint instruction telling the hardware to trigger an event.  */
+
+static int
+emit_sevl (uint32_t *buf)
+{
+  return emit_insn (buf, SEVL);
+}
+
+/* Write a WFE instruction into *BUF.
+
+   This is a hint instruction telling the hardware to wait for an event.  */
+
+static int
+emit_wfe (uint32_t *buf)
+{
+  return emit_insn (buf, WFE);
+}
+
+/* Write LEN instructions from BUF into the inferior memory at *TO.
+
+   Note instructions are always little endian on AArch64, unlike data.  */
+
+static void
+append_insns (CORE_ADDR *to, size_t len, const uint32_t *buf)
+{
+  size_t byte_len = len * sizeof (uint32_t);
+#if (__BYTE_ORDER == __BIG_ENDIAN)
+  uint32_t *le_buf = xmalloc (byte_len);
+  size_t i;
+
+  for (i = 0; i < len; i++)
+    le_buf[i] = htole32 (buf[i]);
+
+  write_inferior_memory (*to, (const unsigned char *) le_buf, byte_len);
+
+  xfree (le_buf);
+#else
+  write_inferior_memory (*to, (const unsigned char *) buf, byte_len);
+#endif
+
+  *to += byte_len;
+}
+
+/* Helper function.  Return 1 if VAL can be encoded in BITS bits.  */
+
+static int
+can_encode_int32 (int32_t val, unsigned bits)
+{
+  /* This must be an arithemic shift.  */
+  int32_t rest = val >> bits;
+
+  return rest == 0 || rest == -1;
+}
+
+/* Relocate an instruction from OLDLOC to *TO.  This function will also
+   increment TO by the number of bytes the new instruction(s) take(s).
+
+   PC relative instructions need to be handled specifically:
+
+   - B/BL
+   - B.COND
+   - CBZ/CBNZ
+   - TBZ/TBNZ
+   - ADR/ADRP
+   - LDR/LDRSW (literal)  */
+
+static void
+aarch64_relocate_instruction (CORE_ADDR *to, CORE_ADDR oldloc)
+{
+  uint32_t buf[32];
+  uint32_t *p = buf;
+  uint32_t insn;
+
+  int is_bl;
+  int is64;
+  int is_sw;
+  int is_cbnz;
+  int is_tbnz;
+  int is_adrp;
+  unsigned rn;
+  unsigned rt;
+  unsigned rd;
+  unsigned cond;
+  unsigned bit;
+  int32_t offset;
+
+  target_read_uint32 (oldloc, &insn);
+
+  if (aarch64_decode_b (oldloc, insn, &is_bl, &offset))
+    {
+      offset = (oldloc - *to + offset);
+
+      if (can_encode_int32 (offset, 28))
+       p += emit_b (p, is_bl, offset);
+      else
+       return;
+    }
+  else if (aarch64_decode_bcond (oldloc, insn, &cond, &offset))
+    {
+      offset = (oldloc - *to + offset);
+
+      if (can_encode_int32 (offset, 21))
+       p += emit_bcond (p, cond, offset);
+      else if (can_encode_int32 (offset, 28))
+       {
+         /* The offset is out of range for a conditional branch
+            instruction but not for a unconditional branch.  We can use
+            the following instructions instead:
+
+              B.COND TAKEN    ; If cond is true, then jump to TAKEN.
+              B NOT_TAKEN     ; Else jump over TAKEN and continue.
+            TAKEN:
+              B #(offset - 8)
+            NOT_TAKEN:
+
+            */
+
+         p += emit_bcond (p, cond, 8);
+         p += emit_b (p, 0, 8);
+         p += emit_b (p, 0, offset - 8);
+       }
+      else
+       return;
+    }
+  else if (aarch64_decode_cb (oldloc, insn, &is64, &is_cbnz, &rn, &offset))
+    {
+      offset = (oldloc - *to + offset);
+
+      if (can_encode_int32 (offset, 21))
+       p += emit_cb (p, is_cbnz, aarch64_register (rn, is64), offset);
+      else if (can_encode_int32 (offset, 28))
+       {
+         /* The offset is out of range for a compare and branch
+            instruction but not for a unconditional branch.  We can use
+            the following instructions instead:
+
+              CBZ xn, TAKEN   ; xn == 0, then jump to TAKEN.
+              B NOT_TAKEN     ; Else jump over TAKEN and continue.
+            TAKEN:
+              B #(offset - 8)
+            NOT_TAKEN:
+
+            */
+         p += emit_cb (p, is_cbnz, aarch64_register (rn, is64), 8);
+         p += emit_b (p, 0, 8);
+         p += emit_b (p, 0, offset - 8);
+       }
+      else
+       return;
+    }
+  else if (aarch64_decode_tb (oldloc, insn, &is_tbnz, &bit, &rt, &offset))
+    {
+      offset = (oldloc - *to + offset);
+
+      if (can_encode_int32 (offset, 16))
+       p += emit_tb (p, is_tbnz, bit, aarch64_register (rt, 1), offset);
+      else if (can_encode_int32 (offset, 28))
+       {
+         /* The offset is out of range for a test bit and branch
+            instruction but not for a unconditional branch.  We can use
+            the following instructions instead:
+
+              TBZ xn, #bit, TAKEN ; xn[bit] == 0, then jump to TAKEN.
+              B NOT_TAKEN         ; Else jump over TAKEN and continue.
+            TAKEN:
+              B #(offset - 8)
+            NOT_TAKEN:
+
+            */
+         p += emit_tb (p, is_tbnz, bit, aarch64_register (rt, 1), 8);
+         p += emit_b (p, 0, 8);
+         p += emit_b (p, 0, offset - 8);
+       }
+      else
+       return;
+    }
+  else if (aarch64_decode_adr (oldloc, insn, &is_adrp, &rd, &offset))
+    {
+
+      /* We know exactly the address the ADR{P,} instruction will compute.
+        We can just write it to the destination register.  */
+      CORE_ADDR address = oldloc + offset;
+
+      if (is_adrp)
+       {
+         /* Clear the lower 12 bits of the offset to get the 4K page.  */
+         p += emit_mov_addr (p, aarch64_register (rd, 1),
+                             address & ~0xfff);
+       }
+      else
+       p += emit_mov_addr (p, aarch64_register (rd, 1), address);
+    }
+  else if (aarch64_decode_ldr_literal (oldloc, insn, &is_sw, &is64, &rt,
+                                      &offset))
+    {
+      /* We know exactly what address to load from, and what register we
+        can use:
+
+          MOV xd, #(oldloc + offset)
+          MOVK xd, #((oldloc + offset) >> 16), lsl #16
+          ...
+
+          LDR xd, [xd] ; or LDRSW xd, [xd]
+
+        */
+      CORE_ADDR address = oldloc + offset;
+
+      p += emit_mov_addr (p, aarch64_register (rt, 1), address);
+
+      if (is_sw)
+       p += emit_ldrsw (p, aarch64_register (rt, 1),
+                        aarch64_register (rt, 1),
+                        offset_memory_operand (0));
+      else
+       p += emit_ldr (p, aarch64_register (rt, is64),
+                      aarch64_register (rt, 1),
+                      offset_memory_operand (0));
+    }
+  else
+    {
+      /* The instruction is not PC relative.  Just re-emit it at the new
+        location.  */
+      p += emit_insn (p, insn);
+    }
+
+  append_insns (to, p - buf, buf);
+}
+
+/* Implementation of linux_target_ops method
+   "install_fast_tracepoint_jump_pad".  */
+
+static int
+aarch64_install_fast_tracepoint_jump_pad (CORE_ADDR tpoint,
+                                         CORE_ADDR tpaddr,
+                                         CORE_ADDR collector,
+                                         CORE_ADDR lockaddr,
+                                         ULONGEST orig_size,
+                                         CORE_ADDR *jump_entry,
+                                         CORE_ADDR *trampoline,
+                                         ULONGEST *trampoline_size,
+                                         unsigned char *jjump_pad_insn,
+                                         ULONGEST *jjump_pad_insn_size,
+                                         CORE_ADDR *adjusted_insn_addr,
+                                         CORE_ADDR *adjusted_insn_addr_end,
+                                         char *err)
+{
+  uint32_t buf[256];
+  uint32_t *p = buf;
+  int32_t offset;
+  int i;
+  CORE_ADDR buildaddr = *jump_entry;
+
+  /* We need to save the current state on the stack both to restore it
+     later and to collect register values when the tracepoint is hit.
+
+     The saved registers are pushed in a layout that needs to be in sync
+     with aarch64_ft_collect_regmap (see linux-aarch64-ipa.c).  Later on
+     the supply_fast_tracepoint_registers function will fill in the
+     register cache from a pointer to saved registers on the stack we build
+     here.
+
+     For simplicity, we set the size of each cell on the stack to 16 bytes.
+     This way one cell can hold any register type, from system registers
+     to the 128 bit SIMD&FP registers.  Furthermore, the stack pointer
+     has to be 16 bytes aligned anyway.
+
+     Note that the CPSR register does not exist on AArch64.  Instead we
+     can access system bits describing the process state with the
+     MRS/MSR instructions, namely the condition flags.  We save them as
+     if they are part of a CPSR register because that's how GDB
+     interprets these system bits.  At the moment, only the condition
+     flags are saved in CPSR (NZCV).
+
+     Stack layout, each cell is 16 bytes (descending):
+
+     High *-------- SIMD&FP registers from 31 down to 0. --------*
+         | q31                                                  |
+         .                                                      .
+         .                                                      . 32 cells
+         .                                                      .
+         | q0                                                   |
+         *---- General purpose registers from 30 down to 0. ----*
+         | x30                                                  |
+         .                                                      .
+         .                                                      . 31 cells
+         .                                                      .
+         | x0                                                   |
+         *------------- Special purpose registers. -------------*
+         | SP                                                   |
+         | PC                                                   |
+         | CPSR (NZCV)                                          | 5 cells
+         | FPSR                                                 |
+         | FPCR                                                 | <- SP + 16
+         *------------- collecting_t object --------------------*
+         | TPIDR_EL0               | struct tracepoint *        |
+     Low  *------------------------------------------------------*
+
+     After this stack is set up, we issue a call to the collector, passing
+     it the saved registers at (SP + 16).  */
+
+  /* Push SIMD&FP registers on the stack:
+
+       SUB sp, sp, #(32 * 16)
+
+       STP q30, q31, [sp, #(30 * 16)]
+       ...
+       STP q0, q1, [sp]
+
+     */
+  p += emit_sub (p, sp, sp, immediate_operand (32 * 16));
+  for (i = 30; i >= 0; i -= 2)
+    p += emit_stp_q_offset (p, i, i + 1, sp, i * 16);
+
+  /* Push general puspose registers on the stack.  Note that we do not need
+     to push x31 as it represents the xzr register and not the stack
+     pointer in a STR instruction.
+
+       SUB sp, sp, #(31 * 16)
+
+       STR x30, [sp, #(30 * 16)]
+       ...
+       STR x0, [sp]
+
+     */
+  p += emit_sub (p, sp, sp, immediate_operand (31 * 16));
+  for (i = 30; i >= 0; i -= 1)
+    p += emit_str (p, aarch64_register (i, 1), sp,
+                  offset_memory_operand (i * 16));
+
+  /* Make space for 5 more cells.
+
+       SUB sp, sp, #(5 * 16)
+
+     */
+  p += emit_sub (p, sp, sp, immediate_operand (5 * 16));
+
+
+  /* Save SP:
+
+       ADD x4, sp, #((32 + 31 + 5) * 16)
+       STR x4, [sp, #(4 * 16)]
+
+     */
+  p += emit_add (p, x4, sp, immediate_operand ((32 + 31 + 5) * 16));
+  p += emit_str (p, x4, sp, offset_memory_operand (4 * 16));
+
+  /* Save PC (tracepoint address):
+
+       MOV  x3, #(tpaddr)
+       ...
+
+       STR x3, [sp, #(3 * 16)]
+
+     */
+
+  p += emit_mov_addr (p, x3, tpaddr);
+  p += emit_str (p, x3, sp, offset_memory_operand (3 * 16));
+
+  /* Save CPSR (NZCV), FPSR and FPCR:
+
+       MRS x2, nzcv
+       MRS x1, fpsr
+       MRS x0, fpcr
+
+       STR x2, [sp, #(2 * 16)]
+       STR x1, [sp, #(1 * 16)]
+       STR x0, [sp, #(0 * 16)]
+
+     */
+  p += emit_mrs (p, x2, NZCV);
+  p += emit_mrs (p, x1, FPSR);
+  p += emit_mrs (p, x0, FPCR);
+  p += emit_str (p, x2, sp, offset_memory_operand (2 * 16));
+  p += emit_str (p, x1, sp, offset_memory_operand (1 * 16));
+  p += emit_str (p, x0, sp, offset_memory_operand (0 * 16));
+
+  /* Push the collecting_t object.  It consist of the address of the
+     tracepoint and an ID for the current thread.  We get the latter by
+     reading the tpidr_el0 system register.  It corresponds to the
+     NT_ARM_TLS register accessible with ptrace.
+
+       MOV x0, #(tpoint)
+       ...
+
+       MRS x1, tpidr_el0
+
+       STP x0, x1, [sp, #-16]!
+
+     */
+
+  p += emit_mov_addr (p, x0, tpoint);
+  p += emit_mrs (p, x1, TPIDR_EL0);
+  p += emit_stp (p, x0, x1, sp, preindex_memory_operand (-16));
+
+  /* Spin-lock:
+
+     The shared memory for the lock is at lockaddr.  It will hold zero
+     if no-one is holding the lock, otherwise it contains the address of
+     the collecting_t object on the stack of the thread which acquired it.
+
+     At this stage, the stack pointer points to this thread's collecting_t
+     object.
+
+     We use the following registers:
+     - x0: Address of the lock.
+     - x1: Pointer to collecting_t object.
+     - x2: Scratch register.
+
+       MOV x0, #(lockaddr)
+       ...
+       MOV x1, sp
+
+       ; Trigger an event local to this core.  So the following WFE
+       ; instruction is ignored.
+       SEVL
+     again:
+       ; Wait for an event.  The event is triggered by either the SEVL
+       ; or STLR instructions (store release).
+       WFE
+
+       ; Atomically read at lockaddr.  This marks the memory location as
+       ; exclusive.  This instruction also has memory constraints which
+       ; make sure all previous data reads and writes are done before
+       ; executing it.
+       LDAXR x2, [x0]
+
+       ; Try again if another thread holds the lock.
+       CBNZ x2, again
+
+       ; We can lock it!  Write the address of the collecting_t object.
+       ; This instruction will fail if the memory location is not marked
+       ; as exclusive anymore.  If it succeeds, it will remove the
+       ; exclusive mark on the memory location.  This way, if another
+       ; thread executes this instruction before us, we will fail and try
+       ; all over again.
+       STXR w2, x1, [x0]
+       CBNZ w2, again
+
+     */
+
+  p += emit_mov_addr (p, x0, lockaddr);
+  p += emit_mov (p, x1, register_operand (sp));
+
+  p += emit_sevl (p);
+  p += emit_wfe (p);
+  p += emit_ldaxr (p, x2, x0);
+  p += emit_cb (p, 1, w2, -2 * 4);
+  p += emit_stxr (p, w2, x1, x0);
+  p += emit_cb (p, 1, x2, -4 * 4);
+
+  /* Call collector (struct tracepoint *, unsigned char *):
+
+       MOV x0, #(tpoint)
+       ...
+
+       ; Saved registers start after the collecting_t object.
+       ADD x1, sp, #16
+
+       ; We use an intra-procedure-call scratch register.
+       MOV ip0, #(collector)
+       ...
+
+       ; And call back to C!
+       BLR ip0
+
+     */
+
+  p += emit_mov_addr (p, x0, tpoint);
+  p += emit_add (p, x1, sp, immediate_operand (16));
+
+  p += emit_mov_addr (p, ip0, collector);
+  p += emit_blr (p, ip0);
+
+  /* Release the lock.
+
+       MOV x0, #(lockaddr)
+       ...
+
+       ; This instruction is a normal store with memory ordering
+       ; constraints.  Thanks to this we do not have to put a data
+       ; barrier instruction to make sure all data read and writes are done
+       ; before this instruction is executed.  Furthermore, this instrucion
+       ; will trigger an event, letting other threads know they can grab
+       ; the lock.
+       STLR xzr, [x0]
+
+     */
+  p += emit_mov_addr (p, x0, lockaddr);
+  p += emit_stlr (p, xzr, x0);
+
+  /* Free collecting_t object:
+
+       ADD sp, sp, #16
+
+     */
+  p += emit_add (p, sp, sp, immediate_operand (16));
+
+  /* Restore CPSR (NZCV), FPSR and FPCR.  And free all special purpose
+     registers from the stack.
+
+       LDR x2, [sp, #(2 * 16)]
+       LDR x1, [sp, #(1 * 16)]
+       LDR x0, [sp, #(0 * 16)]
+
+       MSR NZCV, x2
+       MSR FPSR, x1
+       MSR FPCR, x0
+
+       ADD sp, sp #(5 * 16)
+
+     */
+  p += emit_ldr (p, x2, sp, offset_memory_operand (2 * 16));
+  p += emit_ldr (p, x1, sp, offset_memory_operand (1 * 16));
+  p += emit_ldr (p, x0, sp, offset_memory_operand (0 * 16));
+  p += emit_msr (p, NZCV, x2);
+  p += emit_msr (p, FPSR, x1);
+  p += emit_msr (p, FPCR, x0);
+
+  p += emit_add (p, sp, sp, immediate_operand (5 * 16));
+
+  /* Pop general purpose registers:
+
+       LDR x0, [sp]
+       ...
+       LDR x30, [sp, #(30 * 16)]
+
+       ADD sp, sp, #(31 * 16)
+
+     */
+  for (i = 0; i <= 30; i += 1)
+    p += emit_ldr (p, aarch64_register (i, 1), sp,
+                  offset_memory_operand (i * 16));
+  p += emit_add (p, sp, sp, immediate_operand (31 * 16));
+
+  /* Pop SIMD&FP registers:
+
+       LDP q0, q1, [sp]
+       ...
+       LDP q30, q31, [sp, #(30 * 16)]
+
+       ADD sp, sp, #(32 * 16)
+
+     */
+  for (i = 0; i <= 30; i += 2)
+    p += emit_ldp_q_offset (p, i, i + 1, sp, i * 16);
+  p += emit_add (p, sp, sp, immediate_operand (32 * 16));
+
+  /* Write the code into the inferior memory.  */
+  append_insns (&buildaddr, p - buf, buf);
+
+  /* Now emit the relocated instruction.  */
+  *adjusted_insn_addr = buildaddr;
+  aarch64_relocate_instruction (&buildaddr, tpaddr);
+  *adjusted_insn_addr_end = buildaddr;
+
+  /* We may not have been able to relocate the instruction.  */
+  if (*adjusted_insn_addr == *adjusted_insn_addr_end)
+    {
+      sprintf (err,
+              "E.Could not relocate instruction from %s to %s.",
+              core_addr_to_string_nz (tpaddr),
+              core_addr_to_string_nz (buildaddr));
+      return 1;
+    }
+
+  /* Go back to the start of the buffer.  */
+  p = buf;
+
+  /* Emit a branch back from the jump pad.  */
+  offset = (tpaddr + orig_size - buildaddr);
+  if (!can_encode_int32 (offset, 28))
+    {
+      sprintf (err,
+              "E.Jump back from jump pad too far from tracepoint "
+              "(offset 0x%" PRIx32 " cannot be encoded in 28 bits).",
+              offset);
+      return 1;
+    }
+
+  p += emit_b (p, 0, offset);
+  append_insns (&buildaddr, p - buf, buf);
+
+  /* Give the caller a branch instruction into the jump pad.  */
+  offset = (*jump_entry - tpaddr);
+  if (!can_encode_int32 (offset, 28))
+    {
+      sprintf (err,
+              "E.Jump pad too far from tracepoint "
+              "(offset 0x%" PRIx32 " cannot be encoded in 28 bits).",
+              offset);
+      return 1;
+    }
+
+  emit_b ((uint32_t *) jjump_pad_insn, 0, offset);
+  *jjump_pad_insn_size = 4;
+
+  /* Return the end address of our pad.  */
+  *jump_entry = buildaddr;
+
+  return 0;
+}
+
+/* Implementation of linux_target_ops method
+   "get_min_fast_tracepoint_insn_len".  */
+
+static int
+aarch64_get_min_fast_tracepoint_insn_len (void)
+{
+  return 4;
+}
+
  /* Implementation of linux_target_ops method "supports_range_stepping".  */
  
  static int
@@ -595,10 +2056,10 @@ struct linux_target_ops the_low_target =
    aarch64_linux_prepare_to_resume,
    NULL, /* process_qsupported */
    aarch64_supports_tracepoints,
-  NULL, /* get_thread_area */
-  NULL, /* install_fast_tracepoint_jump_pad */
+  aarch64_get_thread_area,
+  aarch64_install_fast_tracepoint_jump_pad,
    NULL, /* emit_ops */
-  NULL, /* get_min_fast_tracepoint_insn_len */
+  aarch64_get_min_fast_tracepoint_insn_len,
    aarch64_supports_range_stepping,
  };
author	Pierre Langlois <pierre.langlois@arm.com>
	Mon, 21 Sep 2015 14:01:04 +0000 (15:01 +0100)
committer	Yao Qi <yao.qi@linaro.org>
	Mon, 21 Sep 2015 14:01:04 +0000 (15:01 +0100)
gdb/gdbserver/ChangeLog		patch \| blob \| history
gdb/gdbserver/Makefile.in		patch \| blob \| history
gdb/gdbserver/configure.srv		patch \| blob \| history
gdb/gdbserver/linux-aarch64-ipa.c	[new file with mode: 0644]	patch \| blob
gdb/gdbserver/linux-aarch64-low.c		patch \| blob \| history