From 1df5dbf51659000f74ca9511a211a7b026bfa936 Mon Sep 17 00:00:00 2001 From: Erik Faye-Lund Date: Mon, 10 Jun 2019 19:08:35 +0200 Subject: [PATCH] TEMP: add rst-conversion scripts This is just a temporary commit, adding the scripts that performs the automated conversion of the docs. The next commit contains the results of the conversion, and the commit following that removes these scripts again. To redo the conversion in the next commit, rebase interactively to edit this commit and delete the next one, and run './update-docs.sh' from the root directory. Then continue the rebasing, and resolve any conflicts that might have occurred in the manual fixes on top. Finally, build the documentation to ensure no further fixups are needed. Reviewed-by: Eric Engestrom Part-of: --- convert-sphinx.py | 79 +++++++++++++++++++++++++++++++++++++++++++++++ update-docs.sh | 13 ++++++++ 2 files changed, 92 insertions(+) create mode 100644 convert-sphinx.py create mode 100755 update-docs.sh diff --git a/convert-sphinx.py b/convert-sphinx.py new file mode 100644 index 00000000000..a260c200081 --- /dev/null +++ b/convert-sphinx.py @@ -0,0 +1,79 @@ +import os, glob +from bs4 import BeautifulSoup +from subprocess import run, PIPE +from urllib.parse import urlparse +import dashtable + +def html_to_rst(input): + return run(['pandoc', '-f', 'html', '-t', 'rst'], + input=input, stdout=PIPE, universal_newlines=True).stdout + +def convert_toc(filename): + with open(filename, encoding='utf8') as input: + soup = BeautifulSoup(input, 'html5lib') + body = soup.find('body') + with open('./docs/contents.rst', 'w', encoding='utf-8') as output: + for elm in body.contents: + if elm.name == 'h2': + output.write(""".. toctree:: + :maxdepth: 1 + :caption: {0} + :hidden:\n""".format(elm.get_text())) + elif elm.name == 'ul': + output.write('\n') + for li in elm.contents: + if li.name == 'li': + a = li.find('a') + url = a['href'] + if url == 'index.html': + output.write(' self\n') + elif bool(urlparse(url).netloc): + output.write(' {0} <{1}>\n'.format(a.get_text(), url)) + else: + output.write(' {0}\n'.format(url[:-5])) + output.write('\n') + elif elm.name == 'dl': + a = elm.find('a') + output.write('\n {0} <{1}>\n'.format(a.get_text(), url)) + elif hasattr(elm, 'contents'): + print('**** UNKNOWN: ' + str(elm)) + exit(1) + print("SUCCESS: " + filename) + +def convert_article(filename): + with open(filename, encoding='utf8') as input: + soup = BeautifulSoup(input, 'html5lib') + + table = None + if filename == './docs/release-calendar.html': + table = dashtable.html2rst(str(soup.table.extract())) + + content = soup.find('div', 'content') + content = ''.join(map(str, content.contents)) + content = html_to_rst(str(content)) + + if table: + content = '\n'.join([content, table, '']) + + with open(os.path.splitext(filename)[0]+'.rst', 'w', encoding='utf-8') as output: + output.write(str(content)) + if filename == './docs/relnotes.html': + output.write("""\n.. toctree:: + :maxdepth: 1 + :hidden:\n""") + output.write('\n') + for li in soup.findAll('li'): + a = li.find('a') + url = a['href'] + split = os.path.splitext(url) + if split[1] == '.html': + output.write(' {0}\n'.format(split[0])) + output.write(' Older Versions \n') + + print("SUCCESS: " + filename) + +for filename in glob.iglob('./docs/**/*.html', recursive=True): + if filename == './docs/contents.html': + convert_toc(filename) + else: + convert_article(filename) diff --git a/update-docs.sh b/update-docs.sh new file mode 100755 index 00000000000..7b16cecdb35 --- /dev/null +++ b/update-docs.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +python3 ./convert-sphinx.py && +git add "docs/**.rst" && git rm "docs/**.html" && +git commit -am "docs: convert articles to reructuredtext + +This uses the previously added scripts to convert the documentation to +reStructuredText, which is both easier to read offline, and can be used +to generate modern HTML for online documentation. + +No modification to the generated results have been done. + +Acked-by: Eric Engestrom " -- 2.30.2