TEMP: add rst-conversion scripts
authorErik Faye-Lund <erik.faye-lund@collabora.com>
Mon, 10 Jun 2019 17:08:35 +0000 (19:08 +0200)
committerMarge Bot <eric+marge@anholt.net>
Sat, 13 Jun 2020 10:42:00 +0000 (10:42 +0000)
This is just a temporary commit, adding the scripts that performs the
automated conversion of the docs. The next commit contains the results
of the conversion, and the commit following that removes these scripts
again.

To redo the conversion in the next commit, rebase interactively to edit
this commit and delete the next one, and run './update-docs.sh' from the
root directory. Then continue the rebasing, and resolve any conflicts
that might have occurred in the manual fixes on top. Finally, build the
documentation to ensure no further fixups are needed.

Reviewed-by: Eric Engestrom <eric@engestrom.ch>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4630>

convert-sphinx.py [new file with mode: 0644]
update-docs.sh [new file with mode: 0755]

diff --git a/convert-sphinx.py b/convert-sphinx.py
new file mode 100644 (file)
index 0000000..a260c20
--- /dev/null
@@ -0,0 +1,79 @@
+import os, glob
+from bs4 import BeautifulSoup
+from subprocess import run, PIPE
+from urllib.parse import urlparse
+import dashtable
+
+def html_to_rst(input):
+    return run(['pandoc', '-f', 'html', '-t', 'rst'],
+               input=input, stdout=PIPE, universal_newlines=True).stdout
+
+def convert_toc(filename):
+    with open(filename, encoding='utf8') as input:
+        soup = BeautifulSoup(input, 'html5lib')
+        body = soup.find('body')
+        with open('./docs/contents.rst', 'w', encoding='utf-8') as output:
+            for elm in body.contents:
+                if elm.name == 'h2':
+                    output.write(""".. toctree::
+   :maxdepth: 1
+   :caption: {0}
+   :hidden:\n""".format(elm.get_text()))
+                elif elm.name == 'ul':
+                    output.write('\n')
+                    for li in elm.contents:
+                        if li.name == 'li':
+                            a = li.find('a')
+                            url = a['href']
+                            if url == 'index.html':
+                                output.write('   self\n')
+                            elif bool(urlparse(url).netloc):
+                                output.write('   {0} <{1}>\n'.format(a.get_text(), url))
+                            else:
+                                output.write('   {0}\n'.format(url[:-5]))
+                    output.write('\n')
+                elif elm.name == 'dl':
+                    a = elm.find('a')
+                    output.write('\n   {0} <{1}>\n'.format(a.get_text(), url))
+                elif hasattr(elm, 'contents'):
+                    print('**** UNKNOWN: ' + str(elm))
+                    exit(1)
+    print("SUCCESS: " + filename)
+
+def convert_article(filename):
+    with open(filename, encoding='utf8') as input:
+        soup = BeautifulSoup(input, 'html5lib')
+
+        table = None
+        if filename == './docs/release-calendar.html':
+            table = dashtable.html2rst(str(soup.table.extract()))
+
+        content = soup.find('div', 'content')
+        content = ''.join(map(str, content.contents))
+        content = html_to_rst(str(content))
+
+        if table:
+            content = '\n'.join([content, table, ''])
+
+        with open(os.path.splitext(filename)[0]+'.rst', 'w', encoding='utf-8') as output:
+            output.write(str(content))
+            if filename == './docs/relnotes.html':
+                output.write("""\n.. toctree::
+   :maxdepth: 1
+   :hidden:\n""")
+                output.write('\n')
+                for li in soup.findAll('li'):
+                    a = li.find('a')
+                    url = a['href']
+                    split = os.path.splitext(url)
+                    if split[1] == '.html':
+                        output.write('   {0}\n'.format(split[0]))
+                output.write('   Older Versions <versions>\n')
+
+    print("SUCCESS: " + filename)
+
+for filename in glob.iglob('./docs/**/*.html', recursive=True):
+    if filename == './docs/contents.html':
+        convert_toc(filename)
+    else:
+        convert_article(filename)
diff --git a/update-docs.sh b/update-docs.sh
new file mode 100755 (executable)
index 0000000..7b16cec
--- /dev/null
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+python3 ./convert-sphinx.py &&
+git add "docs/**.rst" && git rm "docs/**.html" &&
+git commit -am "docs: convert articles to reructuredtext
+
+This uses the previously added scripts to convert the documentation to
+reStructuredText, which is both easier to read offline, and can be used
+to generate modern HTML for online documentation.
+
+No modification to the generated results have been done.
+
+Acked-by: Eric Engestrom <eric@engestrom.ch>"