From: Tom Tromey <tromey@adacore.com>
Date: Wed, 4 May 2022 19:08:11 +0000 (-0600)
Subject: Implement lazy FPU initialization for ravenscar
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=965b71a7f739a747c6b427a96b1fa9dd26e38956;p=binutils-gdb.git

Implement lazy FPU initialization for ravenscar

Some ravenscar runtimes implement lazy FPU handling.  On these
runtimes, the FPU is only initialized when a task tries to use it.
Furthermore, the FP registers aren't automatically saved on a task
switch -- instead, the save is deferred until the new task tries to
use the FPU.  Furthermore, each task's context area has a flag
indicating whether the FPU has been initialized for this task.

This patch teaches GDB to understand this implementation.  When
fetching or storing registers, GDB now checks to see whether the live
FP registers should be used.  If not, the task's saved FP registers
will be used if the task has caused FPU initialization.

Currently only AArch64 uses this code.  bb-runtimes implements this
for ARM as well, but GDB doesn't yet have an arm-ravenscar-thread.c.
---

diff --git a/gdb/aarch64-ravenscar-thread.c b/gdb/aarch64-ravenscar-thread.c
index 045d022fc23..eb7bda88d5d 100644
--- a/gdb/aarch64-ravenscar-thread.c
+++ b/gdb/aarch64-ravenscar-thread.c
@@ -61,10 +61,17 @@ static const int aarch64_context_offsets[] =
   112,       116,
 };
 
+#define V_INIT_OFFSET 640
+
 /* The ravenscar_arch_ops vector for most Aarch64 targets.  */
 
 static struct ravenscar_arch_ops aarch64_ravenscar_ops
-     (aarch64_context_offsets);
+     (aarch64_context_offsets,
+      -1, -1,
+      V_INIT_OFFSET,
+      /* The FPU context buffer starts with the FPSR register.  */
+      aarch64_context_offsets[AARCH64_FPSR_REGNUM],
+      AARCH64_V0_REGNUM, AARCH64_FPCR_REGNUM);
 
 /* Register aarch64_ravenscar_ops in GDBARCH.  */
 
diff --git a/gdb/ravenscar-thread.c b/gdb/ravenscar-thread.c
index e300095b53f..1718c367ff6 100644
--- a/gdb/ravenscar-thread.c
+++ b/gdb/ravenscar-thread.c
@@ -164,6 +164,32 @@ private:
     switch_to_thread (find_thread_ptid (proc_target, underlying));
   }
 
+  /* Some targets use lazy FPU initialization.  On these, the FP
+     registers for a given task might be uninitialized, or stored in
+     the per-task context, or simply be the live registers on the CPU.
+     This enum is used to encode this information.  */
+  enum fpu_state
+  {
+    /* This target doesn't do anything special for FP registers -- if
+       any exist, they are treated just identical to non-FP
+       registers.  */
+    NOTHING_SPECIAL,
+    /* This target uses the lazy FP scheme, and the FP registers are
+       taken from the CPU.  This can happen for any task, because if a
+       task switch occurs, the registers aren't immediately written to
+       the per-task context -- this is deferred until the current task
+       causes an FPU trap.  */
+    LIVE_FP_REGISTERS,
+    /* This target uses the lazy FP scheme, and the FP registers are
+       not available.  Maybe this task never initialized the FPU, or
+       maybe GDB couldn't find the required symbol.  */
+    NO_FP_REGISTERS
+  };
+
+  /* Return the FPU state.  */
+  fpu_state get_fpu_state (struct regcache *regcache,
+			   const ravenscar_arch_ops *arch_ops);
+
   /* This maps a TID to the CPU on which it was running.  This is
      needed because sometimes the runtime will report an active task
      that hasn't yet been put on the list of tasks that is read by
@@ -508,9 +534,11 @@ ravenscar_arch_ops::supply_one_register (struct regcache *regcache,
 }
 
 void
-ravenscar_arch_ops::fetch_registers (struct regcache *regcache,
-				     int regnum) const
+ravenscar_arch_ops::fetch_register (struct regcache *regcache,
+				    int regnum) const
 {
+  gdb_assert (regnum != -1);
+
   struct gdbarch *gdbarch = regcache->arch ();
   /* The tid is the thread_id field, which is a pointer to the thread.  */
   CORE_ADDR thread_descriptor_address
@@ -518,26 +546,17 @@ ravenscar_arch_ops::fetch_registers (struct regcache *regcache,
 
   int sp_regno = -1;
   CORE_ADDR stack_address = 0;
-  if (regnum == -1
-      || (regnum >= first_stack_register && regnum <= last_stack_register))
+  if (regnum >= first_stack_register && regnum <= last_stack_register)
     {
       /* We must supply SP for get_stack_base, so recurse.  */
       sp_regno = gdbarch_sp_regnum (gdbarch);
       gdb_assert (!(sp_regno >= first_stack_register
 		    && sp_regno <= last_stack_register));
-      fetch_registers (regcache, sp_regno);
+      fetch_register (regcache, sp_regno);
       stack_address = get_stack_base (regcache);
     }
 
-  if (regnum == -1)
-    {
-      /* Fetch all registers.  */
-      for (int reg = 0; reg < offsets.size (); ++reg)
-	if (reg != sp_regno && offsets[reg] != -1)
-	  supply_one_register (regcache, reg, thread_descriptor_address,
-			       stack_address);
-    }
-  else if (regnum < offsets.size () && offsets[regnum] != -1)
+  if (regnum < offsets.size () && offsets[regnum] != -1)
     supply_one_register (regcache, regnum, thread_descriptor_address,
 			 stack_address);
 }
@@ -562,27 +581,20 @@ ravenscar_arch_ops::store_one_register (struct regcache *regcache, int regnum,
 }
 
 void
-ravenscar_arch_ops::store_registers (struct regcache *regcache,
-				     int regnum) const
+ravenscar_arch_ops::store_register (struct regcache *regcache,
+				    int regnum) const
 {
+  gdb_assert (regnum != -1);
+
   /* The tid is the thread_id field, which is a pointer to the thread.  */
   CORE_ADDR thread_descriptor_address
     = (CORE_ADDR) regcache->ptid ().tid ();
 
   CORE_ADDR stack_address = 0;
-  if (regnum == -1
-      || (regnum >= first_stack_register && regnum <= last_stack_register))
+  if (regnum >= first_stack_register && regnum <= last_stack_register)
     stack_address = get_stack_base (regcache);
 
-  if (regnum == -1)
-    {
-      /* Store all registers.  */
-      for (int reg = 0; reg < offsets.size (); ++reg)
-	if (offsets[reg] != -1)
-	  store_one_register (regcache, reg, thread_descriptor_address,
-			      stack_address);
-    }
-  else if (regnum < offsets.size () && offsets[regnum] != -1)
+  if (regnum < offsets.size () && offsets[regnum] != -1)
     store_one_register (regcache, regnum, thread_descriptor_address,
 			stack_address);
 }
@@ -615,6 +627,48 @@ private:
   ptid_t m_save_ptid;
 };
 
+ravenscar_thread_target::fpu_state
+ravenscar_thread_target::get_fpu_state (struct regcache *regcache,
+					const ravenscar_arch_ops *arch_ops)
+{
+  /* We want to return true if the special FP register handling is
+     needed.  If this target doesn't have lazy FP, then no special
+     treatment is ever needed.  */
+  if (!arch_ops->on_demand_fp ())
+    return NOTHING_SPECIAL;
+
+  bound_minimal_symbol fpu_context
+    = lookup_minimal_symbol ("system__bb__cpu_primitives__current_fpu_context",
+			     nullptr, nullptr);
+  /* If the symbol can't be found, just fall back.  */
+  if (fpu_context.minsym == nullptr)
+    return NO_FP_REGISTERS;
+
+  struct type *ptr_type = builtin_type (target_gdbarch ())->builtin_data_ptr;
+  ptr_type = lookup_pointer_type (ptr_type);
+  value *val = value_from_pointer (ptr_type, fpu_context.value_address ());
+
+  int cpu = get_thread_base_cpu (regcache->ptid ());
+  /* The array index type has a lower bound of 1 -- it is Ada code --
+     so subtract 1 here.  */
+  val = value_ptradd (val, cpu - 1);
+
+  val = value_ind (val);
+  CORE_ADDR fpu_task = value_as_long (val);
+
+  /* The tid is the thread_id field, which is a pointer to the thread.  */
+  CORE_ADDR thread_descriptor_address
+    = (CORE_ADDR) regcache->ptid ().tid ();
+  if (fpu_task == (thread_descriptor_address
+		   + arch_ops->get_fpu_context_offset ()))
+    return LIVE_FP_REGISTERS;
+
+  int v_init_offset = arch_ops->get_v_init_offset ();
+  gdb_byte init = 0;
+  read_memory (thread_descriptor_address + v_init_offset, &init, 1);
+  return init ? NOTHING_SPECIAL : NO_FP_REGISTERS;
+}
+
 void
 ravenscar_thread_target::fetch_registers (struct regcache *regcache,
 					  int regnum)
@@ -623,19 +677,38 @@ ravenscar_thread_target::fetch_registers (struct regcache *regcache,
 
   if (runtime_initialized () && is_ravenscar_task (ptid))
     {
-      if (task_is_currently_active (ptid))
-	{
-	  ptid_t base = get_base_thread_from_ravenscar_task (ptid);
-	  temporarily_change_regcache_ptid changer (regcache, base);
-	  beneath ()->fetch_registers (regcache, regnum);
-	}
-      else
-	{
-	  struct gdbarch *gdbarch = regcache->arch ();
-	  struct ravenscar_arch_ops *arch_ops
-	    = gdbarch_ravenscar_ops (gdbarch);
+      struct gdbarch *gdbarch = regcache->arch ();
+      bool is_active = task_is_currently_active (ptid);
+      struct ravenscar_arch_ops *arch_ops = gdbarch_ravenscar_ops (gdbarch);
+      gdb::optional<fpu_state> fp_state;
+
+      int low_reg = regnum == -1 ? 0 : regnum;
+      int high_reg = regnum == -1 ? gdbarch_num_regs (gdbarch) : regnum + 1;
 
-	  arch_ops->fetch_registers (regcache, regnum);
+      ptid_t base = get_base_thread_from_ravenscar_task (ptid);
+      for (int i = low_reg; i < high_reg; ++i)
+	{
+	  bool use_beneath = false;
+	  if (arch_ops->is_fp_register (i))
+	    {
+	      if (!fp_state.has_value ())
+		fp_state = get_fpu_state (regcache, arch_ops);
+	      if (*fp_state == NO_FP_REGISTERS)
+		continue;
+	      if (*fp_state == LIVE_FP_REGISTERS
+		  || (is_active && *fp_state == NOTHING_SPECIAL))
+		use_beneath = true;
+	    }
+	  else
+	    use_beneath = is_active;
+
+	  if (use_beneath)
+	    {
+	      temporarily_change_regcache_ptid changer (regcache, base);
+	      beneath ()->fetch_registers (regcache, i);
+	    }
+	  else
+	    arch_ops->fetch_register (regcache, i);
 	}
     }
   else
@@ -650,19 +723,38 @@ ravenscar_thread_target::store_registers (struct regcache *regcache,
 
   if (runtime_initialized () && is_ravenscar_task (ptid))
     {
-      if (task_is_currently_active (ptid))
-	{
-	  ptid_t base = get_base_thread_from_ravenscar_task (ptid);
-	  temporarily_change_regcache_ptid changer (regcache, base);
-	  beneath ()->store_registers (regcache, regnum);
-	}
-      else
-	{
-	  struct gdbarch *gdbarch = regcache->arch ();
-	  struct ravenscar_arch_ops *arch_ops
-	    = gdbarch_ravenscar_ops (gdbarch);
+      struct gdbarch *gdbarch = regcache->arch ();
+      bool is_active = task_is_currently_active (ptid);
+      struct ravenscar_arch_ops *arch_ops = gdbarch_ravenscar_ops (gdbarch);
+      gdb::optional<fpu_state> fp_state;
 
-	  arch_ops->store_registers (regcache, regnum);
+      int low_reg = regnum == -1 ? 0 : regnum;
+      int high_reg = regnum == -1 ? gdbarch_num_regs (gdbarch) : regnum + 1;
+
+      ptid_t base = get_base_thread_from_ravenscar_task (ptid);
+      for (int i = low_reg; i < high_reg; ++i)
+	{
+	  bool use_beneath = false;
+	  if (arch_ops->is_fp_register (i))
+	    {
+	      if (!fp_state.has_value ())
+		fp_state = get_fpu_state (regcache, arch_ops);
+	      if (*fp_state == NO_FP_REGISTERS)
+		continue;
+	      if (*fp_state == LIVE_FP_REGISTERS
+		  || (is_active && *fp_state == NOTHING_SPECIAL))
+		use_beneath = true;
+	    }
+	  else
+	    use_beneath = is_active;
+
+	  if (use_beneath)
+	    {
+	      temporarily_change_regcache_ptid changer (regcache, base);
+	      beneath ()->store_registers (regcache, i);
+	    }
+	  else
+	    arch_ops->store_register (regcache, i);
 	}
     }
   else
diff --git a/gdb/ravenscar-thread.h b/gdb/ravenscar-thread.h
index 5d5661f48df..eda7ab1026d 100644
--- a/gdb/ravenscar-thread.h
+++ b/gdb/ravenscar-thread.h
@@ -26,19 +26,63 @@ struct ravenscar_arch_ops
 {
   ravenscar_arch_ops (gdb::array_view<const int> offsets_,
 		      int first_stack = -1,
-		      int last_stack = -1)
+		      int last_stack = -1,
+		      int v_init = -1,
+		      int fpu_offset = -1,
+		      int first_fp = -1,
+		      int last_fp = -1)
     : offsets (offsets_),
       first_stack_register (first_stack),
-      last_stack_register (last_stack)
+      last_stack_register (last_stack),
+      v_init_offset (v_init),
+      fpu_context_offset (fpu_offset),
+      first_fp_register (first_fp),
+      last_fp_register (last_fp)
   {
     /* These must either both be -1 or both be valid.  */
     gdb_assert ((first_stack_register == -1) == (last_stack_register == -1));
     /* They must also be ordered.  */
     gdb_assert (last_stack_register >= first_stack_register);
+    /* These must either all be -1 or all be valid.  */
+    gdb_assert ((v_init_offset == -1) == (fpu_context_offset == -1)
+		&& (fpu_context_offset == -1) == (first_fp_register == -1)
+		&& (first_fp_register == -1) == (last_fp_register == -1));
   }
 
-  void fetch_registers (struct regcache *, int) const;
-  void store_registers (struct regcache *, int) const;
+  /* Return true if this architecture implements on-demand floating
+     point.  */
+  bool on_demand_fp () const
+  { return v_init_offset != -1; }
+
+  /* Return true if REGNUM is a floating-point register for this
+     target.  If this target does not use the on-demand FP scheme,
+     this will always return false.  */
+  bool is_fp_register (int regnum) const
+  {
+    return regnum >= first_fp_register && regnum <= last_fp_register;
+  }
+
+  /* Return the offset, in the current task context, of the byte
+     indicating whether the FPU has been initialized for the task.
+     This can only be called when the architecture implements
+     on-demand floating-point.  */
+  int get_v_init_offset () const
+  {
+    gdb_assert (on_demand_fp ());
+    return v_init_offset;
+  }
+
+  /* Return the offset, in the current task context, of the FPU
+     context.  This can only be called when the architecture
+     implements on-demand floating-point.  */
+  int get_fpu_context_offset () const
+  {
+    gdb_assert (on_demand_fp ());
+    return fpu_context_offset;
+  }
+
+  void fetch_register (struct regcache *recache, int regnum) const;
+  void store_register (struct regcache *recache, int regnum) const;
 
 private:
 
@@ -54,6 +98,24 @@ private:
   const int first_stack_register;
   const int last_stack_register;
 
+  /* If these are -1, there is no special treatment for floating-point
+     registers -- they are handled, or not, just like all other
+     registers.
+
+     Otherwise, they must all not be -1, and the target is one that
+     uses on-demand FP initialization.  V_INIT_OFFSET is the offset of
+     a boolean field in the context that indicates whether the FP
+     registers have been initialized for this task.
+     FPU_CONTEXT_OFFSET is the offset of the FPU context from the task
+     context.  (This is needed to check whether the FPU registers have
+     been saved.)  FIRST_FP_REGISTER and LAST_FP_REGISTER are the
+     register numbers of the first and last (inclusive) floating point
+     registers.  */
+  const int v_init_offset;
+  const int fpu_context_offset;
+  const int first_fp_register;
+  const int last_fp_register;
+
   /* Helper function to supply one register.  */
   void supply_one_register (struct regcache *regcache, int regnum,
 			    CORE_ADDR descriptor,