+++ /dev/null
-import os, glob
-from bs4 import BeautifulSoup
-from subprocess import run, PIPE
-from urllib.parse import urlparse
-import dashtable
-
-def html_to_rst(input):
- return run(['pandoc', '-f', 'html', '-t', 'rst'],
- input=input, stdout=PIPE, universal_newlines=True).stdout
-
-def convert_toc(filename):
- with open(filename, encoding='utf8') as input:
- soup = BeautifulSoup(input, 'html5lib')
- body = soup.find('body')
- with open('./docs/contents.rst', 'w', encoding='utf-8') as output:
- for elm in body.contents:
- if elm.name == 'h2':
- output.write(""".. toctree::
- :maxdepth: 1
- :caption: {0}
- :hidden:\n""".format(elm.get_text()))
- elif elm.name == 'ul':
- output.write('\n')
- for li in elm.contents:
- if li.name == 'li':
- a = li.find('a')
- url = a['href']
- if url == 'index.html':
- output.write(' self\n')
- elif bool(urlparse(url).netloc):
- output.write(' {0} <{1}>\n'.format(a.get_text(), url))
- else:
- output.write(' {0}\n'.format(url[:-5]))
- output.write('\n')
- elif elm.name == 'dl':
- a = elm.find('a')
- output.write('\n {0} <{1}>\n'.format(a.get_text(), url))
- elif hasattr(elm, 'contents'):
- print('**** UNKNOWN: ' + str(elm))
- exit(1)
- print("SUCCESS: " + filename)
-
-def convert_article(filename):
- with open(filename, encoding='utf8') as input:
- soup = BeautifulSoup(input, 'html5lib')
-
- table = None
- if filename == './docs/release-calendar.html':
- table = dashtable.html2rst(str(soup.table.extract()))
-
- content = soup.find('div', 'content')
- content = ''.join(map(str, content.contents))
- content = html_to_rst(str(content))
-
- if table:
- content = '\n'.join([content, table, ''])
-
- with open(os.path.splitext(filename)[0]+'.rst', 'w', encoding='utf-8') as output:
- output.write(str(content))
- if filename == './docs/relnotes.html':
- output.write("""\n.. toctree::
- :maxdepth: 1
- :hidden:\n""")
- output.write('\n')
- for li in soup.findAll('li'):
- a = li.find('a')
- url = a['href']
- split = os.path.splitext(url)
- if split[1] == '.html':
- output.write(' {0}\n'.format(split[0]))
- output.write(' Older Versions <versions>\n')
-
- print("SUCCESS: " + filename)
-
-for filename in glob.iglob('./docs/**/*.html', recursive=True):
- if filename == './docs/contents.html':
- convert_toc(filename)
- else:
- convert_article(filename)