debeccec3623e48bc28b03f9d84b1d69070f2fec
[openpower-isa.git] / pagereader.py
1 # Reads OpenPOWER ISA pages from http://libre-soc.org/openpower/isa
2 """OpenPOWER ISA page parser
3
4 returns an OrderedDict of namedtuple "Ops" containing details of all
5 instructions listed in markdown files.
6
7 format must be strictly as follows (no optional sections) including whitespace:
8
9 # Compare Logical
10
11 X-Form
12
13 * cmpl BF,L,RA,RB
14
15 if L = 0 then a <- [0]*32 || (RA)[32:63]
16 b <- [0]*32 || (RB)[32:63]
17 else a <- (RA)
18 b <- (RB)
19 if a <u b then c <- 0b100
20 else if a >u b then c <- 0b010
21 else c <- 0b001
22 CR[4*BF+32:4*BF+35] <- c || XER[SO]
23
24 Special Registers Altered:
25
26 CR field BF
27 Another field
28
29 this translates to:
30
31 # heading
32 blank
33 Some-Form
34 blank
35 * instruction registerlist
36 * instruction registerlist
37 blank
38 4-space-indented pseudo-code
39 4-space-indented pseudo-code
40 blank
41 Special Registers Altered:
42 4-space-indented register description
43 blank
44 blank(s) (optional for convenience at end-of-page)
45 """
46
47 from openpower.util import log
48 from collections import namedtuple, OrderedDict
49 from copy import copy
50 import os
51 import re
52
53 opfields = ("desc", "form", "opcode", "regs", "pcode", "sregs", "page")
54 Ops = namedtuple("Ops", opfields)
55
56
57 def get_isa_dir():
58 fdir = os.path.abspath(os.path.dirname(__file__))
59 fdir = os.path.split(fdir)[0]
60 fdir = os.path.split(fdir)[0]
61 fdir = os.path.split(fdir)[0]
62 fdir = os.path.split(fdir)[0]
63 # print (fdir)
64 return os.path.join(fdir, "openpower", "isa")
65
66
67 pattern_opcode = r"[A-Za-z0-9_\.]+\.?"
68 pattern_dynamic = r"[A-Za-z0-9_]+(?:\([A-Za-z0-9_]+\))*"
69 pattern_static = r"[A-Za-z0-9]+\=[01]"
70 regex_opcode = re.compile(f"^{pattern_opcode}$")
71 regex_dynamic = re.compile(f"^{pattern_dynamic}(?:,{pattern_dynamic})*$")
72 regex_static = re.compile(f"^\({pattern_static}(?:\s{pattern_static})*\)$")
73
74
75 def operands(opcode, desc):
76 if desc is None:
77 return
78 desc = desc.replace("(", "")
79 desc = desc.replace(")", "")
80 desc = desc.replace(",", " ")
81 for operand in desc.split(" "):
82 operand = operand.strip()
83 if operand:
84 yield operand
85
86
87 class ISA:
88 def __init__(self):
89 self.instr = OrderedDict()
90 self.forms = {}
91 self.page = {}
92 self.verbose = False
93 for pth in os.listdir(os.path.join(get_isa_dir())):
94 if self.verbose:
95 print("examining", get_isa_dir(), pth)
96 if "swp" in pth:
97 continue
98 if not pth.endswith(".mdwn"):
99 log ("warning, file not .mdwn, skipping", pth)
100 continue
101 self.read_file(pth)
102 continue
103 # code which helped add in the keyword "Pseudo-code:" automatically
104 rewrite = self.read_file_for_rewrite(pth)
105 name = os.path.join("/tmp", pth)
106 with open(name, "w") as f:
107 f.write('\n'.join(rewrite) + '\n')
108
109 def __iter__(self):
110 yield from self.instr.items()
111
112 def read_file_for_rewrite(self, fname):
113 pagename = fname.split('.')[0]
114 fname = os.path.join(get_isa_dir(), fname)
115 with open(fname) as f:
116 lines = f.readlines()
117 rewrite = []
118
119 l = lines.pop(0).rstrip() # get first line
120 rewrite.append(l)
121 while lines:
122 if self.verbose:
123 print(l)
124 # look for HTML comment, if starting, skip line.
125 # XXX this is braindead! it doesn't look for the end
126 # so please put ending of comments on one line:
127 # <!-- line 1 comment -->
128 # {some whitespace}<!-- line 2 comment -->
129 if l.strip().startswith('<!--'):
130 # print ("skipping comment", l)
131 l = lines.pop(0).rstrip() # get first line
132 continue
133
134 # Ignore blank lines before the first #
135 if len(l.strip()) == 0:
136 continue
137
138 # expect get heading
139 assert l.startswith('#'), ("# not found in line %s" % l)
140
141 # whitespace expected
142 l = lines.pop(0).strip()
143 if self.verbose:
144 print(repr(l))
145 assert len(l) == 0, ("blank line not found %s" % l)
146 rewrite.append(l)
147
148 # Form expected
149 l = lines.pop(0).strip()
150 assert l.endswith('-Form'), ("line with -Form expected %s" % l)
151 rewrite.append(l)
152
153 # whitespace expected
154 l = lines.pop(0).strip()
155 assert len(l) == 0, ("blank line not found %s" % l)
156 rewrite.append(l)
157
158 # get list of opcodes
159 while True:
160 l = lines.pop(0).strip()
161 rewrite.append(l)
162 if len(l) == 0:
163 break
164 assert l.startswith('*'), ("* not found in line %s" % l)
165
166 rewrite.append("Pseudo-code:")
167 rewrite.append("")
168 # get pseudocode
169 while True:
170 l = lines.pop(0).rstrip()
171 if l.strip().startswith('<!--'):
172 # print ("skipping comment", l)
173 l = lines.pop(0).rstrip() # get first line
174 continue
175 rewrite.append(l)
176 if len(l) == 0:
177 break
178 assert l.startswith(' '), ("4spcs not found in line %s" % l)
179
180 # "Special Registers Altered" expected
181 l = lines.pop(0).rstrip()
182 assert l.startswith("Special"), ("special not found %s" % l)
183 rewrite.append(l)
184
185 # whitespace expected
186 l = lines.pop(0).strip()
187 assert len(l) == 0, ("blank line not found %s" % l)
188 rewrite.append(l)
189
190 # get special regs
191 while lines:
192 l = lines.pop(0).rstrip()
193 rewrite.append(l)
194 if len(l) == 0:
195 break
196 assert l.startswith(' '), ("4spcs not found in line %s" % l)
197
198 # expect and drop whitespace
199 while lines:
200 l = lines.pop(0).rstrip()
201 rewrite.append(l)
202 if len(l) != 0 and not l.strip().startswith('<!--'):
203 break
204
205 return rewrite
206
207 def read_file(self, fname):
208 pagename = fname.split('.')[0]
209 fname = os.path.join(get_isa_dir(), fname)
210 with open(fname) as f:
211 lines = f.readlines()
212
213 # set up dict with current page name
214 d = {'page': pagename}
215
216 # line-by-line lexer/parser, quite straightforward: pops one
217 # line off the list and checks it. nothing complicated needed,
218 # all sections are mandatory so no need for a full LALR parser.
219
220 l = lines.pop(0).rstrip() # get first line
221 while lines:
222 if self.verbose:
223 print(l)
224 # look for HTML comment, if starting, skip line.
225 # XXX this is braindead! it doesn't look for the end
226 # so please put ending of comments on one line:
227 # <!-- line 1 comment -->
228 # <!-- line 2 comment -->
229 if l.strip().startswith('<!--'):
230 # print ("skipping comment", l)
231 l = lines.pop(0).rstrip() # get next line
232 continue
233
234 # Ignore blank lines before the first #
235 if len(l) == 0:
236 l = lines.pop(0).rstrip() # get next line
237 continue
238
239 # expect get heading
240 assert l.startswith('#'), ("# not found in line '%s'" % l)
241 d['desc'] = l[1:].strip()
242
243 # whitespace expected
244 l = lines.pop(0).strip()
245 if self.verbose:
246 print(repr(l))
247 assert len(l) == 0, ("blank line not found %s" % l)
248
249 # Form expected
250 l = lines.pop(0).strip()
251 assert l.endswith('-Form'), ("line with -Form expected %s" % l)
252 d['form'] = l.split('-')[0]
253
254 # whitespace expected
255 l = lines.pop(0).strip()
256 assert len(l) == 0, ("blank line not found %s" % l)
257
258 # get list of opcodes
259 opcodes = []
260 while True:
261 l = lines.pop(0).strip()
262 if len(l) == 0:
263 break
264 assert l.startswith('*'), ("* not found in line %s" % l)
265 rest = l[1:].strip()
266
267 (opcode, _, rest) = map(str.strip, rest.partition(" "))
268 if regex_opcode.match(opcode) is None:
269 raise IOError(repr(opcode))
270 opcode = [opcode]
271
272 (dynamic, _, rest) = map(str.strip, rest.partition(" "))
273 if regex_dynamic.match(dynamic) is None and dynamic:
274 raise IOError(f"{l!r}: {dynamic!r}")
275 if dynamic:
276 opcode.append(dynamic.split(","))
277
278 static = rest
279 if regex_static.match(static) is None and static:
280 raise IOError(f"{l!r}: {static!r}")
281 if static:
282 opcode.extend(static[1:-1].split(" "))
283
284 opcodes.append(opcode)
285
286 # "Pseudocode" expected
287 l = lines.pop(0).rstrip()
288 assert l.startswith("Pseudo-code:"), ("pseudocode found %s" % l)
289
290 # whitespace expected
291 l = lines.pop(0).strip()
292 if self.verbose:
293 print(repr(l))
294 assert len(l) == 0, ("blank line not found %s" % l)
295
296 # get pseudocode
297 li = []
298 while True:
299 l = lines.pop(0).rstrip()
300 if l.strip().startswith('<!--'):
301 continue
302 if len(l) == 0:
303 break
304 assert l.startswith(' '), ("4spcs not found in line %s" % l)
305 l = l[4:] # lose 4 spaces
306 li.append(l)
307 d['pcode'] = li
308
309 # "Special Registers Altered" expected
310 l = lines.pop(0).rstrip()
311 assert l.startswith("Special"), ("special not found %s" % l)
312
313 # whitespace expected
314 l = lines.pop(0).strip()
315 assert len(l) == 0, ("blank line not found %s" % l)
316
317 # get special regs
318 li = []
319 while lines:
320 l = lines.pop(0).rstrip()
321 if len(l) == 0:
322 break
323 assert l.startswith(' '), ("4spcs not found in line %s" % l)
324 l = l[4:] # lose 4 spaces
325 li.append(l)
326 d['sregs'] = li
327
328 # add in opcode
329 for o in opcodes:
330 self.add_op(o, d)
331
332 # expect and drop whitespace and comments
333 while lines:
334 l = lines.pop(0).rstrip()
335 if len(l) != 0 and not l.strip().startswith('<!--'):
336 break
337
338 def add_op(self, o, d):
339 opcode, regs = o[0], o[1:]
340 op = copy(d)
341 op['regs'] = regs
342 op['opcode'] = opcode
343 self.instr[opcode] = Ops(**op)
344
345 # create list of instructions by form
346 form = op['form']
347 fl = self.forms.get(form, [])
348 self.forms[form] = fl + [opcode]
349
350 # create list of instructions by page
351 page = op['page']
352 pl = self.page.get(page, [])
353 self.page[page] = pl + [opcode]
354
355 def pprint_ops(self):
356 for k, v in self.instr.items():
357 print("# %s %s" % (v.opcode, v.desc))
358 print("Form: %s Regs: %s" % (v.form, v.regs))
359 print('\n'.join(map(lambda x: " %s" % x, v.pcode)))
360 print("Specials")
361 print('\n'.join(map(lambda x: " %s" % x, v.sregs)))
362 print()
363 for k, v in isa.forms.items():
364 print(k, v)
365
366
367 if __name__ == '__main__':
368 isa = ISA()
369 isa.pprint_ops()
370 # example on how to access cmp regs:
371 print ("cmp regs:", isa.instr["cmp"].regs)