use raise_syntax_error for `IndentationError`s as well
[openpower-isa.git] / src / openpower / decoder / pseudo / pagereader.py
1 # Reads OpenPOWER ISA pages from http://libre-soc.org/openpower/isa
2 """OpenPOWER ISA page parser
3
4 returns an OrderedDict of namedtuple "Ops" containing details of all
5 instructions listed in markdown files.
6
7 format must be strictly as follows (no optional sections) including whitespace:
8
9 # Compare Logical
10
11 X-Form
12
13 * cmpl BF,L,RA,RB
14
15 if L = 0 then a <- [0]*32 || (RA)[32:63]
16 b <- [0]*32 || (RB)[32:63]
17 else a <- (RA)
18 b <- (RB)
19 if a <u b then c <- 0b100
20 else if a >u b then c <- 0b010
21 else c <- 0b001
22 CR[4*BF+32:4*BF+35] <- c || XER[SO]
23
24 Special Registers Altered:
25
26 CR field BF
27 Another field
28
29 this translates to:
30
31 # heading
32 blank
33 Some-Form
34 blank
35 * instruction registerlist
36 * instruction registerlist
37 blank
38 4-space-indented pseudo-code
39 4-space-indented pseudo-code
40 blank
41 Special Registers Altered:
42 4-space-indented register description
43 blank
44 blank(s) (optional for convenience at end-of-page)
45 """
46
47 from openpower.util import log
48 from openpower.decoder.orderedset import OrderedSet
49 from collections import namedtuple, OrderedDict
50 from copy import copy
51 import os
52 import re
53
54 opfields = ("desc", "form", "opcode", "regs", "pcode", "sregs", "page",
55 "extra_uninit_regs")
56 Ops = namedtuple("Ops", opfields)
57
58
59 def get_isa_dir():
60 fdir = os.path.abspath(os.path.dirname(__file__))
61 fdir = os.path.split(fdir)[0]
62 fdir = os.path.split(fdir)[0]
63 fdir = os.path.split(fdir)[0]
64 fdir = os.path.split(fdir)[0]
65 # print (fdir)
66 return os.path.join(fdir, "openpower", "isa")
67
68
69 pattern_opcode = r"[A-Za-z0-9_\.]+\.?"
70 pattern_dynamic = r"[A-Za-z0-9_]+(?:\([A-Za-z0-9_]+\))*"
71 pattern_static = r"[A-Za-z0-9]+\=[01]"
72 regex_opcode = re.compile(f"^{pattern_opcode}$")
73 regex_dynamic = re.compile(f"^{pattern_dynamic}(?:,{pattern_dynamic})*$")
74 regex_static = re.compile(f"^\({pattern_static}(?:\s{pattern_static})*\)$")
75
76
77 def operands(opcode, desc):
78 if desc is None:
79 return
80 desc = desc.replace("(", "")
81 desc = desc.replace(")", "")
82 desc = desc.replace(",", " ")
83 for operand in desc.split(" "):
84 operand = operand.strip()
85 if operand:
86 yield operand
87
88
89 class ISA:
90 def __init__(self):
91 self.instr = OrderedDict()
92 self.forms = {}
93 self.page = {}
94 self.verbose = False
95 for pth in os.listdir(os.path.join(get_isa_dir())):
96 if self.verbose:
97 print("examining", get_isa_dir(), pth)
98 if "swp" in pth:
99 continue
100 if not pth.endswith(".mdwn"):
101 log ("warning, file not .mdwn, skipping", pth)
102 continue
103 self.read_file(pth)
104 continue
105 # code which helped add in the keyword "Pseudo-code:" automatically
106 rewrite = self.read_file_for_rewrite(pth)
107 name = os.path.join("/tmp", pth)
108 with open(name, "w") as f:
109 f.write('\n'.join(rewrite) + '\n')
110
111 def __iter__(self):
112 yield from self.instr.items()
113
114 def read_file_for_rewrite(self, fname):
115 pagename = fname.split('.')[0]
116 fname = os.path.join(get_isa_dir(), fname)
117 with open(fname) as f:
118 lines = f.readlines()
119 rewrite = []
120
121 l = lines.pop(0).rstrip() # get first line
122 rewrite.append(l)
123 while lines:
124 if self.verbose:
125 print(l)
126 # look for HTML comment, if starting, skip line.
127 # XXX this is braindead! it doesn't look for the end
128 # so please put ending of comments on one line:
129 # <!-- line 1 comment -->
130 # {some whitespace}<!-- line 2 comment -->
131 if l.strip().startswith('<!--'):
132 # print ("skipping comment", l)
133 l = lines.pop(0).rstrip() # get first line
134 continue
135
136 # Ignore blank lines before the first #
137 if len(l.strip()) == 0:
138 continue
139
140 # expect get heading
141 assert l.startswith('#'), ("# not found in line %s" % l)
142
143 # whitespace expected
144 l = lines.pop(0).strip()
145 if self.verbose:
146 print(repr(l))
147 assert len(l) == 0, ("blank line not found %s" % l)
148 rewrite.append(l)
149
150 # Form expected
151 l = lines.pop(0).strip()
152 assert l.endswith('-Form'), ("line with -Form expected %s" % l)
153 rewrite.append(l)
154
155 # whitespace expected
156 l = lines.pop(0).strip()
157 assert len(l) == 0, ("blank line not found %s" % l)
158 rewrite.append(l)
159
160 # get list of opcodes
161 while True:
162 l = lines.pop(0).strip()
163 rewrite.append(l)
164 if len(l) == 0:
165 break
166 assert l.startswith('*'), ("* not found in line %s" % l)
167
168 rewrite.append("Pseudo-code:")
169 rewrite.append("")
170 # get pseudocode
171 while True:
172 l = lines.pop(0).rstrip()
173 if l.strip().startswith('<!--'):
174 # print ("skipping comment", l)
175 l = lines.pop(0).rstrip() # get first line
176 continue
177 rewrite.append(l)
178 if len(l) == 0:
179 break
180 assert l.startswith(' '), ("4spcs not found in line %s" % l)
181
182 # "Special Registers Altered" expected
183 l = lines.pop(0).rstrip()
184 assert l.startswith("Special"), ("special not found %s" % l)
185 rewrite.append(l)
186
187 # whitespace expected
188 l = lines.pop(0).strip()
189 assert len(l) == 0, ("blank line not found %s" % l)
190 rewrite.append(l)
191
192 # get special regs
193 while lines:
194 l = lines.pop(0).rstrip()
195 rewrite.append(l)
196 if len(l) == 0:
197 break
198 assert l.startswith(' '), ("4spcs not found in line %s" % l)
199
200 # expect and drop whitespace
201 while lines:
202 l = lines.pop(0).rstrip()
203 rewrite.append(l)
204 if len(l) != 0 and not l.strip().startswith('<!--'):
205 break
206
207 return rewrite
208
209 def read_file(self, fname):
210 pagename = fname.split('.')[0]
211 fname = os.path.join(get_isa_dir(), fname)
212 with open(fname) as f:
213 lines = f.readlines()
214
215 # set up dict with current page name
216 d = {'page': pagename}
217
218 # line-by-line lexer/parser, quite straightforward: pops one
219 # line off the list and checks it. nothing complicated needed,
220 # all sections are mandatory so no need for a full LALR parser.
221
222 l = lines.pop(0).rstrip() # get first line
223 prefix_lines = 0
224 while lines:
225 if self.verbose:
226 print(l)
227 # look for HTML comment, if starting, skip line.
228 # XXX this is braindead! it doesn't look for the end
229 # so please put ending of comments on one line:
230 # <!-- line 1 comment -->
231 # <!-- line 2 comment -->
232 if l.strip().startswith('<!--'):
233 # print ("skipping comment", l)
234 l = lines.pop(0).rstrip() # get next line
235 prefix_lines += 1
236 continue
237
238 # Ignore blank lines before the first #
239 if len(l) == 0:
240 l = lines.pop(0).rstrip() # get next line
241 prefix_lines += 1
242 continue
243
244 # expect get heading
245 assert l.startswith('#'), ("# not found in line '%s'" % l)
246 d['desc'] = l[1:].strip()
247
248 # whitespace expected
249 l = lines.pop(0).strip()
250 prefix_lines += 1
251 if self.verbose:
252 print(repr(l))
253 assert len(l) == 0, ("blank line not found %s" % l)
254
255 # Form expected
256 l = lines.pop(0).strip()
257 prefix_lines += 1
258 assert l.endswith('-Form'), ("line with -Form expected %s" % l)
259 d['form'] = l.split('-')[0]
260
261 # whitespace expected
262 l = lines.pop(0).strip()
263 prefix_lines += 1
264 assert len(l) == 0, ("blank line not found %s" % l)
265
266 # get list of opcodes
267 opcodes = []
268 while True:
269 l = lines.pop(0).strip()
270 prefix_lines += 1
271 if len(l) == 0:
272 break
273 assert l.startswith('*'), ("* not found in line %s" % l)
274 rest = l[1:].strip()
275
276 (opcode, _, rest) = map(str.strip, rest.partition(" "))
277 if regex_opcode.match(opcode) is None:
278 raise IOError(repr(opcode))
279 opcode = [opcode]
280
281 (dynamic, _, rest) = map(str.strip, rest.partition(" "))
282 if regex_dynamic.match(dynamic) is None and dynamic:
283 raise IOError(f"{l!r}: {dynamic!r}")
284 if dynamic:
285 opcode.append(dynamic.split(","))
286
287 static = rest
288 if regex_static.match(static) is None and static:
289 raise IOError(f"{l!r}: {static!r}")
290 if static:
291 opcode.extend(static[1:-1].split(" "))
292
293 opcodes.append(opcode)
294
295 # "Pseudocode" expected
296 l = lines.pop(0).rstrip()
297 prefix_lines += 1
298 assert l.startswith("Pseudo-code:"), ("pseudocode found %s" % l)
299
300 # whitespace expected
301 l = lines.pop(0).strip()
302 prefix_lines += 1
303 if self.verbose:
304 print(repr(l))
305 assert len(l) == 0, ("blank line not found %s" % l)
306
307 extra_uninit_regs = OrderedSet()
308
309 # get pseudocode
310
311 # fix parser line numbers by prepending the right number of
312 # blank lines to the parser input
313 li = [""] * prefix_lines
314 li += [l[4:]] # first line detected with 4-space
315 while True:
316 l = lines.pop(0).rstrip()
317 prefix_lines += 1
318 if len(l) == 0:
319 li.append(l)
320 break
321 re_match = re.fullmatch(r" *<!-- EXTRA_UNINIT_REGS:(.*)-->", l)
322 if re_match:
323 for i in re_match[1].split(' '):
324 if i != "":
325 extra_uninit_regs.add(i)
326 li.append("")
327 continue
328 if l.strip().startswith('<!--'):
329 li.append("")
330 continue
331 assert l.startswith(' '), ("4spcs not found in line %s" % l)
332 l = l[4:] # lose 4 spaces
333 li.append(l)
334 d['pcode'] = li
335 d['extra_uninit_regs'] = extra_uninit_regs
336
337 # "Special Registers Altered" expected
338 l = lines.pop(0).rstrip()
339 prefix_lines += 1
340 assert l.startswith("Special"), ("special not found %s" % l)
341
342 # whitespace expected
343 l = lines.pop(0).strip()
344 prefix_lines += 1
345 assert len(l) == 0, ("blank line not found %s" % l)
346
347 # get special regs
348 li = []
349 while lines:
350 l = lines.pop(0).rstrip()
351 prefix_lines += 1
352 if len(l) == 0:
353 break
354 assert l.startswith(' '), ("4spcs not found in line %s" % l)
355 l = l[4:] # lose 4 spaces
356 li.append(l)
357 d['sregs'] = li
358
359 # add in opcode
360 for o in opcodes:
361 self.add_op(o, d)
362
363 # expect and drop whitespace and comments
364 while lines:
365 l = lines.pop(0).rstrip()
366 prefix_lines += 1
367 if len(l) != 0 and not l.strip().startswith('<!--'):
368 break
369
370 def add_op(self, o, d):
371 opcode, regs = o[0], o[1:]
372 op = copy(d)
373 op['regs'] = regs
374 op['opcode'] = opcode
375 self.instr[opcode] = Ops(**op)
376
377 # create list of instructions by form
378 form = op['form']
379 fl = self.forms.get(form, [])
380 self.forms[form] = fl + [opcode]
381
382 # create list of instructions by page
383 page = op['page']
384 pl = self.page.get(page, [])
385 self.page[page] = pl + [opcode]
386
387 def pprint_ops(self):
388 for k, v in self.instr.items():
389 print("# %s %s" % (v.opcode, v.desc))
390 print("Form: %s Regs: %s" % (v.form, v.regs))
391 print('\n'.join(map(lambda x: " %s" % x, v.pcode)))
392 print("Specials")
393 print('\n'.join(map(lambda x: " %s" % x, v.sregs)))
394 print()
395 for k, v in isa.forms.items():
396 print(k, v)
397
398
399 if __name__ == '__main__':
400 isa = ISA()
401 isa.pprint_ops()
402 # example on how to access cmp regs:
403 print ("cmp regs:", isa.instr["cmp"].regs)