2 from bs4
import BeautifulSoup
3 from subprocess
import run
, PIPE
4 from urllib
.parse
import urlparse
7 def html_to_rst(input):
8 return run(['pandoc', '-f', 'html', '-t', 'rst'],
9 input=input, stdout
=PIPE
, universal_newlines
=True).stdout
11 def convert_toc(filename
):
12 with
open(filename
, encoding
='utf8') as input:
13 soup
= BeautifulSoup(input, 'html5lib')
14 body
= soup
.find('body')
15 with
open('./docs/contents.rst', 'w', encoding
='utf-8') as output
:
16 for elm
in body
.contents
:
18 output
.write(""".. toctree::
21 :hidden:\n""".format(elm
.get_text()))
22 elif elm
.name
== 'ul':
24 for li
in elm
.contents
:
28 if url
== 'index.html':
29 output
.write(' self\n')
30 elif bool(urlparse(url
).netloc
):
31 output
.write(' {0} <{1}>\n'.format(a
.get_text(), url
))
33 output
.write(' {0}\n'.format(url
[:-5]))
35 elif elm
.name
== 'dl':
37 output
.write('\n {0} <{1}>\n'.format(a
.get_text(), url
))
38 elif hasattr(elm
, 'contents'):
39 print('**** UNKNOWN: ' + str(elm
))
41 print("SUCCESS: " + filename
)
43 def convert_article(filename
):
44 with
open(filename
, encoding
='utf8') as input:
45 soup
= BeautifulSoup(input, 'html5lib')
48 if filename
== './docs/release-calendar.html':
49 table
= dashtable
.html2rst(str(soup
.table
.extract()))
51 content
= soup
.find('div', 'content')
52 content
= ''.join(map(str, content
.contents
))
53 content
= html_to_rst(str(content
))
56 content
= '\n'.join([content
, table
, ''])
58 with
open(os
.path
.splitext(filename
)[0]+'.rst', 'w', encoding
='utf-8') as output
:
59 output
.write(str(content
))
60 if filename
== './docs/relnotes.html':
61 output
.write("""\n.. toctree::
65 for li
in soup
.findAll('li'):
68 split
= os
.path
.splitext(url
)
69 if split
[1] == '.html':
70 output
.write(' {0}\n'.format(split
[0]))
71 output
.write(' Older Versions <versions>\n')
73 print("SUCCESS: " + filename
)
75 for filename
in glob
.iglob('./docs/**/*.html', recursive
=True):
76 if filename
== './docs/contents.html':
79 convert_article(filename
)