gdb/python: improve formatting of help text for user defined commands

author Andrew Burgess <aburgess@redhat.com>

Mon, 16 May 2022 18:26:54 +0000 (19:26 +0100)

committer Andrew Burgess <aburgess@redhat.com>

Sat, 28 May 2022 09:36:50 +0000 (10:36 +0100)
author Andrew Burgess <aburgess@redhat.com>
Mon, 16 May 2022 18:26:54 +0000 (19:26 +0100)
committer Andrew Burgess <aburgess@redhat.com>
Sat, 28 May 2022 09:36:50 +0000 (10:36 +0100)
diff --git a/gdb/NEWS b/gdb/NEWS

index 52ffdc4c83aaf04811c0cccdda48f908b1083276..dac6dabfa42f37066acf3bd96e00b980a7d17c26 100644 (file)
--- a/gdb/NEWS
+++ b/gdb/NEWS
@@ -27,6 +27,13 @@
    emit to indicate where a breakpoint should be placed to break in a function
    past its prologue.
  
+* Python API
+
+  ** GDB will now reformat the doc string for gdb.Command and
+     gdb.Parameter sub-classes to remove unnecessary leading
+     whitespace from each line before using the string as the help
+     output.
+
  * New commands
  
  maintenance set ignore-prologue-end-flag on|off
diff --git a/gdb/python/py-cmd.c b/gdb/python/py-cmd.c

index bc48c2ea9f9cb3df8a96bacc232750e74f60a9a8..f80318374d394993857b241e92fdca1bec8fbf59 100644 (file)
--- a/gdb/python/py-cmd.c
+++ b/gdb/python/py-cmd.c
@@ -493,6 +493,7 @@ cmdpy_init (PyObject *self, PyObject *args, PyObject *kw)
           docstring = python_string_to_host_string (ds_obj.get ());
           if (docstring == nullptr)
             return -1;
+         docstring = gdbpy_fix_doc_string_indentation (std::move (docstring));
         }
      }
    if (docstring == nullptr)
diff --git a/gdb/python/py-param.c b/gdb/python/py-param.c

index cac9bd209a4f0a990a4494f8585f2df20e097802..5d509ba465829df1a2adbf5dddf5e74180ba1df9 100644 (file)
--- a/gdb/python/py-param.c
+++ b/gdb/python/py-param.c
@@ -385,6 +385,8 @@ get_doc_string (PyObject *object, enum doc_string_type doc_type,
           result = python_string_to_host_string (ds_obj.get ());
           if (result == NULL)
             gdbpy_print_stack ();
+         else if (doc_type == doc_string_description)
+           result = gdbpy_fix_doc_string_indentation (std::move (result));
         }
      }
  
diff --git a/gdb/python/py-utils.c b/gdb/python/py-utils.c

index 63eb4e8078ca02932c0ffa994119140787d65f8a..1bd7b477ecbebca25e101e2b7e768f4199e78490 100644 (file)
--- a/gdb/python/py-utils.c
+++ b/gdb/python/py-utils.c
@@ -400,3 +400,197 @@ gdbpy_handle_exception ()
    else
      error ("%s", msg.get ());
  }
+
+/* See python-internal.h.  */
+
+gdb::unique_xmalloc_ptr<char>
+gdbpy_fix_doc_string_indentation (gdb::unique_xmalloc_ptr<char> doc)
+{
+  /* A structure used to track the white-space information on each line of
+     DOC.  */
+  struct line_whitespace
+  {
+    /* Constructor.  OFFSET is the offset from the start of DOC, WS_COUNT
+       is the number of whitespace characters starting at OFFSET.  */
+    line_whitespace (size_t offset, int ws_count)
+      : m_offset (offset),
+       m_ws_count (ws_count)
+    { /* Nothing.  */ }
+
+    /* The offset from the start of DOC.  */
+    size_t offset () const
+    { return m_offset; }
+
+    /* The number of white-space characters at the start of this line.  */
+    int ws () const
+    { return m_ws_count; }
+
+  private:
+    /* The offset from the start of DOC to the first character of this
+       line.  */
+    size_t m_offset;
+
+    /* White space count on this line, the first character of this
+       whitespace is at OFFSET.  */
+    int m_ws_count;
+  };
+
+  /* Count the number of white-space character starting at TXT.  We
+     currently only count true single space characters, things like tabs,
+     newlines, etc are not counted.  */
+  auto count_whitespace = [] (const char *txt) -> int
+  {
+    int count = 0;
+
+    while (*txt == ' ')
+      {
+       ++txt;
+       ++count;
+      }
+
+    return count;
+  };
+
+  /* In MIN_WHITESPACE we track the smallest number of whitespace
+     characters seen at the start of a line (that has actual content), this
+     is the number of characters that we can delete off all lines without
+     altering the relative indentation of all lines in DOC.
+
+     The first line often has no indentation, but instead starts immediates
+     after the 3-quotes marker within the Python doc string, so, if the
+     first line has zero white-space then we just ignore it, and don't set
+     MIN_WHITESPACE to zero.
+
+     Lines without any content should (ideally) have no white-space at
+     all, but if they do then they might have an artificially low number
+     (user left a single stray space at the start of an otherwise blank
+     line), we don't consider lines without content when updating the
+     MIN_WHITESPACE value.  */
+  gdb::optional<int> min_whitespace;
+
+  /* The index into WS_INFO at which the processing of DOC can be
+     considered "all done", that is, after this point there are no further
+     lines with useful content and we should just stop.  */
+  gdb::optional<size_t> all_done_idx;
+
+  /* White-space information for each line in DOC.  */
+  std::vector<line_whitespace> ws_info;
+
+  /* Now look through DOC and collect the required information.  */
+  const char *tmp = doc.get ();
+  while (*tmp != '\0')
+    {
+      /* Add an entry for the offset to the start of this line, and how
+        much white-space there is at the start of this line.  */
+      size_t offset = tmp - doc.get ();
+      int ws_count = count_whitespace (tmp);
+      ws_info.emplace_back (offset, ws_count);
+
+      /* Skip over the white-space.  */
+      tmp += ws_count;
+
+      /* Remember where the content of this line starts, and skip forward
+        to either the end of this line (newline) or the end of the DOC
+        string (null character), whichever comes first.  */
+      const char *content_start = tmp;
+      while (*tmp != '\0' && *tmp != '\n')
+       ++tmp;
+
+      /* If this is not the first line, and if this line has some content,
+        then update MIN_WHITESPACE, this reflects the smallest number of
+        whitespace characters we can delete from all lines without
+        impacting the relative indentation of all the lines of DOC.  */
+      if (offset > 0 && tmp > content_start)
+       {
+         if (!min_whitespace.has_value ())
+           min_whitespace = ws_count;
+         else
+           min_whitespace = std::min (*min_whitespace, ws_count);
+       }
+
+      /* Each time we encounter a line that has some content we update
+        ALL_DONE_IDX to be the index of the next line.  If the last lines
+        of DOC don't contain any content then ALL_DONE_IDX will be left
+        pointing at an earlier line.  When we rewrite DOC, when we reach
+        ALL_DONE_IDX then we can stop, the allows us to trim any blank
+        lines from the end of DOC.  */
+      if (tmp > content_start)
+       all_done_idx = ws_info.size ();
+
+      /* If we reached a newline then skip forward to the start of the next
+        line.  The other possibility at this point is that we're at the
+        very end of the DOC string (null terminator).  */
+      if (*tmp == '\n')
+       ++tmp;
+    }
+
+  /* We found no lines with content, fail safe by just returning the
+     original documentation string.  */
+  if (!all_done_idx.has_value () || !min_whitespace.has_value ())
+    return doc;
+
+  /* Setup DST and SRC, both pointing into the DOC string.  We're going to
+     rewrite DOC in-place, as we only ever make DOC shorter (by removing
+     white-space), thus we know this will not overflow.  */
+  char *dst = doc.get ();
+  char *src = doc.get ();
+
+  /* Array indices used with DST, SRC, and WS_INFO respectively.  */
+  size_t dst_offset = 0;
+  size_t src_offset = 0;
+  size_t ws_info_offset = 0;
+
+  /* Now, walk over the source string, this is the original DOC.  */
+  while (src[src_offset] != '\0')
+    {
+      /* If we are at the start of the next line (in WS_INFO), then we may
+        need to skip some white-space characters.  */
+      if (src_offset == ws_info[ws_info_offset].offset ())
+       {
+         /* If a line has leading white-space then we need to skip over
+            some number of characters now.  */
+         if (ws_info[ws_info_offset].ws () > 0)
+           {
+             /* If the line is entirely white-space then we skip all of
+                the white-space, the next character to copy will be the
+                newline or null character.  Otherwise, we skip the just
+                some portion of the leading white-space.  */
+             if (src[src_offset + ws_info[ws_info_offset].ws ()] == '\n'
+                 || src[src_offset + ws_info[ws_info_offset].ws ()] == '\0')
+               src_offset += ws_info[ws_info_offset].ws ();
+             else
+               src_offset += std::min (*min_whitespace,
+                                       ws_info[ws_info_offset].ws ());
+
+             /* If we skipped white-space, and are now at the end of the
+                input, then we're done.  */
+             if (src[src_offset] == '\0')
+               break;
+           }
+         if (ws_info_offset < (ws_info.size () - 1))
+           ++ws_info_offset;
+         if (ws_info_offset > *all_done_idx)
+           break;
+       }
+
+      /* Don't copy a newline to the start of the DST string, this would
+        result in a leading blank line.  But in all other cases, copy the
+        next character into the destination string.  */
+      if ((dst_offset > 0 || src[src_offset] != '\n'))
+       {
+         dst[dst_offset] = src[src_offset];
+         ++dst_offset;
+       }
+
+      /* Move to the next source character.  */
+      ++src_offset;
+    }
+
+  /* Remove the trailing newline character(s), and ensure we have a null
+     terminator in place.  */
+  while (dst_offset > 1 && dst[dst_offset - 1] == '\n')
+    --dst_offset;
+  dst[dst_offset] = '\0';
+
+  return doc;
+}
diff --git a/gdb/python/python-internal.h b/gdb/python/python-internal.h

index d947b96033b5b180543b904e413c0d9baf782a49..217bc15bb280820c7aa98fc509ba4f104699c267 100644 (file)
--- a/gdb/python/python-internal.h
+++ b/gdb/python/python-internal.h
@@ -822,4 +822,25 @@ extern bool gdbpy_is_architecture (PyObject *obj);
  
  extern bool gdbpy_is_progspace (PyObject *obj);
  
+/* Take DOC, the documentation string for a GDB command defined in Python,
+   and return an (possibly) modified version of that same string.
+
+   When a command is defined in Python, the documentation string will
+   usually be indented based on the indentation of the surrounding Python
+   code.  However, the documentation string is a literal string, all the
+   white-space added for indentation is included within the documentation
+   string.
+
+   This indentation is then included in the help text that GDB displays,
+   which looks odd out of the context of the original Python source code.
+
+   This function analyses DOC and tries to figure out what white-space
+   within DOC was added as part of the indentation, and then removes that
+   white-space from the copy that is returned.
+
+   If the analysis of DOC fails then DOC will be returned unmodified.  */
+
+extern gdb::unique_xmalloc_ptr<char> gdbpy_fix_doc_string_indentation
+  (gdb::unique_xmalloc_ptr<char> doc);
+
  #endif /* PYTHON_PYTHON_INTERNAL_H */
diff --git a/gdb/testsuite/gdb.python/py-doc-reformat.exp b/gdb/testsuite/gdb.python/py-doc-reformat.exp

new file mode 100644 (file)

index 0000000..aae3939
--- /dev/null
+++ b/gdb/testsuite/gdb.python/py-doc-reformat.exp
@@ -0,0 +1,282 @@
+# Copyright 2022 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# Test how GDB reformats the documentation string of commands
+# implemented in Python for use as the help text of those same
+# commands.
+
+load_lib gdb-python.exp
+
+# A global counter used to number the tests.
+set idx 0
+
+# Run a test, create both a command and a parameter with INPUT_DOCS as
+# the Python documentation string, load the command and parameter into
+# GDB, and then ask for the help text of the command, and the show and
+# set help for the parameter.  The output from GDB should match
+# EXPECTED_OUTPUT in each case.
+#
+# The INPUT_DOCS must start with 2 space characters, followed by the
+# 3-quote characters to start the doc string, and must end with the
+# final 3-quote characters.  If this is not true then an error is
+# thrown, and this function immediately returns.
+proc test { input_docs expected_output } {
+    global idx
+
+    set local_idx $idx
+    incr idx
+
+    with_test_prefix "test ${local_idx}" {
+
+       # The tests in this script don't have descriptive names,
+       # rather they use the global IDX to generate a unique name for
+       # each test.  To make it easier to track down a failing test,
+       # we print out the test name and the input string.
+       verbose -log "Start of: test_cmd ${local_idx}"
+       verbose -log "Input:\n${input_docs}"
+
+       if { [string range $input_docs 0 4] != "  \"\"\"" } {
+           perror "Input string does not start with the required pattern"
+           return
+       }
+
+       if { [string range $input_docs end-2 end] != "\"\"\"" } {
+           perror "Input string does not end with the required pattern"
+           return
+       }
+
+       set python_filename [standard_output_file "${::gdb_test_file_name}-${local_idx}.py"]
+
+       set fd [open $python_filename w]
+
+       puts $fd "class test_cmd (gdb.Command):"
+       puts $fd $input_docs
+       puts $fd ""
+       puts $fd "  def __init__ (self):"
+       puts $fd "    super ().__init__ (\"test-cmd\", gdb.COMMAND_OBSCURE)"
+       puts $fd ""
+       puts $fd "  def invoke (self, arg, from_tty):"
+       puts $fd "    print (\"In test-cmd\")"
+       puts $fd ""
+
+       puts $fd "class test_param (gdb.Parameter):"
+       puts $fd $input_docs
+       puts $fd ""
+       puts $fd "  show_doc = \"This is the show doc line\""
+       puts $fd "  set_doc = \"This is the set doc line\""
+       puts $fd ""
+       puts $fd "  def __init__ (self):"
+       puts $fd "    super ().__init__ (\"test-param\", gdb.COMMAND_OBSCURE, gdb.PARAM_BOOLEAN)"
+       puts $fd ""
+       puts $fd "  def get_show_string (self, value):"
+       puts $fd "    return \"The state is '\" + value + \"'\""
+
+       puts $fd ""
+       puts $fd "test_param ()"
+       puts $fd "test_cmd ()"
+
+       puts $fd ""
+       puts $fd "print(\"Loaded OK.\")"
+
+       close $fd
+
+       set remote_python_file \
+           [gdb_remote_download host $python_filename]
+
+       clean_restart
+
+       gdb_test "source ${remote_python_file}" \
+           "Loaded OK\\." \
+           "source python command file"
+
+       # Use gdb_test_multiple here as plain gdb_test will allow
+       # excess blank lines between the expected_output pattern and
+       # the gdb_prompt - a core part of this test is that we are
+       # checking that such blank lines are removed - so we can't use
+       # gdb_test here.
+       gdb_test_multiple "help test-cmd" "" {
+           -re "^help test-cmd\r\n" {
+               exp_continue
+           }
+           -re "^$expected_output\r\n$::gdb_prompt $" {
+               pass $gdb_test_name
+           }
+       }
+
+       gdb_test_multiple "help set test-param" "" {
+           -re "^help set test-param\r\n" {
+               exp_continue
+           }
+           -re "^This is the set doc line\r\n" {
+               exp_continue
+           }
+           -re "^$expected_output\r\n$::gdb_prompt $" {
+               pass $gdb_test_name
+           }
+       }
+
+       gdb_test_multiple "help show test-param" "" {
+           -re "^help show test-param\r\n" {
+               exp_continue
+           }
+           -re "^This is the show doc line\r\n" {
+               exp_continue
+           }
+           -re "^$expected_output\r\n$::gdb_prompt $" {
+               pass $gdb_test_name
+           }
+       }
+    }
+}
+
+# The first test is pretty vanilla. First line starts immediately
+# after the opening quotes, and closing quotes are immediately after
+# the last line.
+test \
+    [multi_line_input \
+        "  \"\"\"This is the first line." \
+        "" \
+        "  This is the third line.\"\"\""] \
+    [multi_line \
+        "This is the first line\\." \
+        "" \
+        "This is the third line\\."]
+
+# In this test the first line starts on the line after the opening
+# quotes.  The blank line in the middle is still completely empty.
+test \
+    [multi_line_input \
+        "  \"\"\"" \
+        "  This is the first line." \
+        "" \
+        "  This is the third line.\"\"\""] \
+    [multi_line \
+        "This is the first line\\." \
+        "" \
+        "This is the third line\\."]
+
+# This test adds an indented line in the middle, we expect the
+# relative indentation to be retained.
+test\
+    [multi_line_input \
+        "  \"\"\"" \
+        "  This is the first line." \
+        "    Indented second line." \
+        "  This is the third line.\"\"\""] \
+    [multi_line \
+        "This is the first line\\." \
+        "  Indented second line\\." \
+        "This is the third line\\."]
+
+# The indentation moves to the first line.
+test\
+    [multi_line_input \
+        "  \"\"\"" \
+        "    Indented first line." \
+        "  This is the second line." \
+        "  This is the third line.\"\"\""] \
+    [multi_line \
+        "  Indented first line\\." \
+        "This is the second line\\." \
+        "This is the third line\\."]
+
+# The indentation moves to the last line.
+test\
+    [multi_line_input \
+        "  \"\"\"" \
+        "  This is the first line." \
+        "  This is the second line." \
+        "    Indented third line.\"\"\""] \
+    [multi_line \
+        "This is the first line\\." \
+        "This is the second line\\." \
+        "  Indented third line\\."]
+
+# Tests using different amounts of white-space on a line of its own.
+# We test with the white-space at the beginning, end, and in the
+# middle of the doc string.
+foreach line [list "" " " "  " "   " "    "] {
+    # Blank lines at the beginning should be removed.
+    test \
+       [multi_line_input \
+            "  \"\"\"" \
+            $line \
+            "  This is the first line." \
+            "    Indented second line." \
+            "  This is the third line.\"\"\""] \
+       [multi_line \
+            "This is the first line\\." \
+            "  Indented second line\\." \
+            "This is the third line\\."]
+
+    # As should blank lines at the end.
+    test \
+       [multi_line_input \
+            "  \"\"\"" \
+            "  This is the first line." \
+            "    Indented second line." \
+            "  This is the third line." \
+            $line \
+            "  \"\"\""] \
+       [multi_line \
+            "This is the first line\\." \
+            "  Indented second line\\." \
+            "This is the third line\\."]
+
+    # While blank lines in the middle should have all white-space
+    # removed.
+    test \
+       [multi_line_input \
+            "  \"\"\"" \
+            "  This is the first line." \
+            $line \
+            "  This is the third line." \
+            $line \
+            "  \"\"\""] \
+       [multi_line \
+            "This is the first line\\." \
+            "" \
+            "This is the third line\\."]
+}
+
+# Check for correct handling of closing quotes being on a line of
+# their own.
+test\
+    [multi_line_input \
+        "  \"\"\"  " \
+        "  This is the first line." \
+        "    Indented second line." \
+        "  This is the third line." \
+        "  \"\"\""] \
+    [multi_line \
+        "This is the first line\\." \
+        "  Indented second line\\." \
+        "This is the third line\\." ]
+
+
+# Check with a single 'x' character before the closing quotes.  Make
+# sure we don't drop this character.
+test\
+    [multi_line_input \
+        "  \"\"\"  " \
+        "  This is the first line." \
+        "    Indented second line." \
+        "  This is the third line." \
+        "  x\"\"\""] \
+    [multi_line \
+        "This is the first line\\." \
+        "  Indented second line\\." \
+        "This is the third line\\." \
+        "x"]
author	Andrew Burgess <aburgess@redhat.com>
	Mon, 16 May 2022 18:26:54 +0000 (19:26 +0100)
committer	Andrew Burgess <aburgess@redhat.com>
	Sat, 28 May 2022 09:36:50 +0000 (10:36 +0100)
gdb/NEWS		patch \| blob \| history
gdb/python/py-cmd.c		patch \| blob \| history
gdb/python/py-param.c		patch \| blob \| history
gdb/python/py-utils.c		patch \| blob \| history
gdb/python/python-internal.h		patch \| blob \| history
gdb/testsuite/gdb.python/py-doc-reformat.exp	[new file with mode: 0644]	patch \| blob