TEMP: add rst-conversion scripts
[mesa.git] / convert-sphinx.py
1 import os, glob
2 from bs4 import BeautifulSoup
3 from subprocess import run, PIPE
4 from urllib.parse import urlparse
5 import dashtable
6
7 def html_to_rst(input):
8 return run(['pandoc', '-f', 'html', '-t', 'rst'],
9 input=input, stdout=PIPE, universal_newlines=True).stdout
10
11 def convert_toc(filename):
12 with open(filename, encoding='utf8') as input:
13 soup = BeautifulSoup(input, 'html5lib')
14 body = soup.find('body')
15 with open('./docs/contents.rst', 'w', encoding='utf-8') as output:
16 for elm in body.contents:
17 if elm.name == 'h2':
18 output.write(""".. toctree::
19 :maxdepth: 1
20 :caption: {0}
21 :hidden:\n""".format(elm.get_text()))
22 elif elm.name == 'ul':
23 output.write('\n')
24 for li in elm.contents:
25 if li.name == 'li':
26 a = li.find('a')
27 url = a['href']
28 if url == 'index.html':
29 output.write(' self\n')
30 elif bool(urlparse(url).netloc):
31 output.write(' {0} <{1}>\n'.format(a.get_text(), url))
32 else:
33 output.write(' {0}\n'.format(url[:-5]))
34 output.write('\n')
35 elif elm.name == 'dl':
36 a = elm.find('a')
37 output.write('\n {0} <{1}>\n'.format(a.get_text(), url))
38 elif hasattr(elm, 'contents'):
39 print('**** UNKNOWN: ' + str(elm))
40 exit(1)
41 print("SUCCESS: " + filename)
42
43 def convert_article(filename):
44 with open(filename, encoding='utf8') as input:
45 soup = BeautifulSoup(input, 'html5lib')
46
47 table = None
48 if filename == './docs/release-calendar.html':
49 table = dashtable.html2rst(str(soup.table.extract()))
50
51 content = soup.find('div', 'content')
52 content = ''.join(map(str, content.contents))
53 content = html_to_rst(str(content))
54
55 if table:
56 content = '\n'.join([content, table, ''])
57
58 with open(os.path.splitext(filename)[0]+'.rst', 'w', encoding='utf-8') as output:
59 output.write(str(content))
60 if filename == './docs/relnotes.html':
61 output.write("""\n.. toctree::
62 :maxdepth: 1
63 :hidden:\n""")
64 output.write('\n')
65 for li in soup.findAll('li'):
66 a = li.find('a')
67 url = a['href']
68 split = os.path.splitext(url)
69 if split[1] == '.html':
70 output.write(' {0}\n'.format(split[0]))
71 output.write(' Older Versions <versions>\n')
72
73 print("SUCCESS: " + filename)
74
75 for filename in glob.iglob('./docs/**/*.html', recursive=True):
76 if filename == './docs/contents.html':
77 convert_toc(filename)
78 else:
79 convert_article(filename)