d3f51cd5f3a2520c2b7fb5fdec51bfab4cbd26a3
[openpower-isa.git] / src / openpower / decoder / pseudo / pagereader.py
1 # Reads OpenPOWER ISA pages from http://libre-soc.org/openpower/isa
2 """OpenPOWER ISA page parser
3
4 returns an OrderedDict of namedtuple "Ops" containing details of all
5 instructions listed in markdown files.
6
7 format must be strictly as follows (no optional sections) including whitespace:
8
9 # Compare Logical
10
11 X-Form
12
13 * cmpl BF,L,RA,RB
14
15 if L = 0 then a <- [0]*32 || (RA)[32:63]
16 b <- [0]*32 || (RB)[32:63]
17 else a <- (RA)
18 b <- (RB)
19 if a <u b then c <- 0b100
20 else if a >u b then c <- 0b010
21 else c <- 0b001
22 CR[4*BF+32:4*BF+35] <- c || XER[SO]
23
24 Special Registers Altered:
25
26 CR field BF
27 Another field
28
29 this translates to:
30
31 # heading
32 blank
33 Some-Form
34 blank
35 * instruction registerlist
36 * instruction registerlist
37 blank
38 4-space-indented pseudo-code
39 4-space-indented pseudo-code
40 blank
41 Special Registers Altered:
42 4-space-indented register description
43 blank
44 blank(s) (optional for convenience at end-of-page)
45 """
46
47 from openpower.util import log
48 from openpower.decoder.orderedset import OrderedSet
49 from collections import namedtuple, OrderedDict
50 from copy import copy
51 import os
52 import re
53
54 opfields = ("desc", "form", "opcode", "regs", "pcode", "sregs", "page",
55 "extra_uninit_regs")
56 Ops = namedtuple("Ops", opfields)
57
58
59 def get_isa_dir():
60 fdir = os.path.abspath(os.path.dirname(__file__))
61 fdir = os.path.split(fdir)[0]
62 fdir = os.path.split(fdir)[0]
63 fdir = os.path.split(fdir)[0]
64 fdir = os.path.split(fdir)[0]
65 # print (fdir)
66 return os.path.join(fdir, "openpower", "isa")
67
68
69 pattern_opcode = r"[A-Za-z0-9_\.]+\.?"
70 pattern_dynamic = r"[A-Za-z0-9_]+(?:\([A-Za-z0-9_]+\))*"
71 pattern_static = r"[A-Za-z0-9]+\=[01]"
72 regex_opcode = re.compile(f"^{pattern_opcode}$")
73 regex_dynamic = re.compile(f"^{pattern_dynamic}(?:,{pattern_dynamic})*$")
74 regex_static = re.compile(f"^\({pattern_static}(?:\s{pattern_static})*\)$")
75
76
77 def operands(opcode, desc):
78 if desc is None:
79 return
80 desc = desc.replace("(", "")
81 desc = desc.replace(")", "")
82 desc = desc.replace(",", " ")
83 for operand in desc.split(" "):
84 operand = operand.strip()
85 if operand:
86 yield operand
87
88 def get_indented_lines(lines):
89 li = []
90 while lines:
91 l = lines.pop(0).rstrip()
92 if len(l) == 0:
93 break
94 assert l.startswith(' '), ("4spcs not found in line %s" % l)
95 l = l[4:] # lose 4 spaces
96 li.append(l)
97 return li
98
99
100 class ISA:
101 def __init__(self):
102 self.instr = OrderedDict()
103 self.forms = {}
104 self.page = {}
105 self.verbose = False
106 for pth in os.listdir(os.path.join(get_isa_dir())):
107 if self.verbose:
108 print("examining", get_isa_dir(), pth)
109 if "swp" in pth:
110 continue
111 if not pth.endswith(".mdwn"):
112 if not os.path.isdir(os.path.join(get_isa_dir(), pth)):
113 log("warning, file not .mdwn, skipping", pth)
114 continue
115 self.read_file(pth)
116 continue
117 # code which helped add in the keyword "Pseudo-code:" automatically
118 rewrite = self.read_file_for_rewrite(pth)
119 name = os.path.join("/tmp", pth)
120 with open(name, "w") as f:
121 f.write('\n'.join(rewrite) + '\n')
122
123 def __iter__(self):
124 yield from self.instr.items()
125
126 def read_file_for_rewrite(self, fname):
127 pagename = fname.split('.')[0]
128 fname = os.path.join(get_isa_dir(), fname)
129 with open(fname) as f:
130 lines = f.readlines()
131 rewrite = []
132
133 l = lines.pop(0).rstrip() # get first line
134 rewrite.append(l)
135 while lines:
136 if self.verbose:
137 print(l)
138 # look for HTML comment, if starting, skip line.
139 # XXX this is braindead! it doesn't look for the end
140 # so please put ending of comments on one line:
141 # <!-- line 1 comment -->
142 # {some whitespace}<!-- line 2 comment -->
143 if l.strip().startswith('<!--'):
144 # print ("skipping comment", l)
145 l = lines.pop(0).rstrip() # get first line
146 continue
147
148 # Ignore blank lines before the first #
149 if len(l.strip()) == 0:
150 continue
151
152 # expect get heading
153 assert l.startswith('#'), ("# not found in line %s" % l)
154
155 # whitespace expected
156 l = lines.pop(0).strip()
157 if self.verbose:
158 print(repr(l))
159 assert len(l) == 0, ("blank line not found %s" % l)
160 rewrite.append(l)
161
162 # Form expected
163 l = lines.pop(0).strip()
164 assert l.endswith('-Form'), ("line with -Form expected %s" % l)
165 rewrite.append(l)
166
167 # whitespace expected
168 l = lines.pop(0).strip()
169 assert len(l) == 0, ("blank line not found %s" % l)
170 rewrite.append(l)
171
172 # get list of opcodes
173 while True:
174 l = lines.pop(0).strip()
175 rewrite.append(l)
176 if len(l) == 0:
177 break
178 assert l.startswith('*'), ("* not found in line %s" % l)
179
180 rewrite.append("Pseudo-code:")
181 rewrite.append("")
182 # get pseudocode
183 while True:
184 l = lines.pop(0).rstrip()
185 if l.strip().startswith('<!--'):
186 # print ("skipping comment", l)
187 l = lines.pop(0).rstrip() # get first line
188 continue
189 rewrite.append(l)
190 if len(l) == 0:
191 break
192 assert l.startswith(' '), ("4spcs not found in line %s" % l)
193
194 # "Special Registers Altered" expected
195 l = lines.pop(0).rstrip()
196 assert l.startswith("Special"), ("special not found %s" % l)
197 rewrite.append(l)
198
199 # whitespace expected
200 l = lines.pop(0).strip()
201 assert len(l) == 0, ("blank line not found %s" % l)
202 rewrite.append(l)
203
204 # get special regs
205 while lines:
206 l = lines.pop(0).rstrip()
207 rewrite.append(l)
208 if len(l) == 0:
209 break
210 assert l.startswith(' '), ("4spcs not found in line %s" % l)
211
212 # expect and drop whitespace
213 while lines:
214 l = lines.pop(0).rstrip()
215 rewrite.append(l)
216 if len(l) != 0 and not l.strip().startswith('<!--'):
217 break
218
219 return rewrite
220
221 def read_file(self, fname):
222 pagename = fname.split('.')[0]
223 fname = os.path.join(get_isa_dir(), fname)
224 with open(fname) as f:
225 lines = f.readlines()
226
227 # set up dict with current page name
228 d = {'page': pagename}
229
230 # line-by-line lexer/parser, quite straightforward: pops one
231 # line off the list and checks it. nothing complicated needed,
232 # all sections are mandatory so no need for a full LALR parser.
233
234 l = lines.pop(0).rstrip() # get first line
235 while lines:
236 if self.verbose:
237 print(l)
238 # look for HTML comment, if starting, skip line.
239 # XXX this is braindead! it doesn't look for the end
240 # so please put ending of comments on one line:
241 # <!-- line 1 comment -->
242 # <!-- line 2 comment -->
243 if l.strip().startswith('<!--'):
244 # print ("skipping comment", l)
245 l = lines.pop(0).rstrip() # get next line
246 continue
247
248 # Ignore blank lines before the first #
249 if len(l) == 0:
250 l = lines.pop(0).rstrip() # get next line
251 continue
252
253 # expect get heading
254 assert l.startswith('#'), ("# not found in line '%s'" % l)
255 d['desc'] = l[1:].strip()
256
257 # whitespace expected
258 l = lines.pop(0).strip()
259 if self.verbose:
260 print(repr(l))
261 assert len(l) == 0, ("blank line not found %s" % l)
262
263 # Form expected
264 l = lines.pop(0).strip()
265 assert l.endswith('-Form'), ("line with -Form expected %s" % l)
266 d['form'] = l.split('-')[0]
267
268 # whitespace expected
269 l = lines.pop(0).strip()
270 assert len(l) == 0, ("blank line not found %s" % l)
271
272 # get list of opcodes
273 opcodes = []
274 while True:
275 l = lines.pop(0).strip()
276 if len(l) == 0:
277 break
278 assert l.startswith('*'), ("* not found in line %s" % l)
279 rest = l[1:].strip()
280
281 (opcode, _, rest) = map(str.strip, rest.partition(" "))
282 if regex_opcode.match(opcode) is None:
283 raise IOError(repr(opcode))
284 opcode = [opcode]
285
286 (dynamic, _, rest) = map(str.strip, rest.partition(" "))
287 if regex_dynamic.match(dynamic) is None and dynamic:
288 raise IOError(f"{l!r}: {dynamic!r}")
289 if dynamic:
290 opcode.append(dynamic.split(","))
291
292 static = rest
293 if regex_static.match(static) is None and static:
294 raise IOError(f"{l!r}: {static!r}")
295 if static:
296 opcode.extend(static[1:-1].split(" "))
297
298 opcodes.append(opcode)
299
300 # "Pseudocode" expected
301 l = lines.pop(0).rstrip()
302 assert l.startswith("Pseudo-code:"), ("pseudocode found %s" % l)
303
304 # whitespace expected
305 l = lines.pop(0).strip()
306 if self.verbose:
307 print(repr(l))
308 assert len(l) == 0, ("blank line not found %s" % l)
309
310 extra_uninit_regs = OrderedSet()
311
312 # get pseudocode
313 li = []
314 while True:
315 l = lines.pop(0).rstrip()
316 re_match = re.fullmatch(r" *<!-- EXTRA_UNINIT_REGS:(.*)-->", l)
317 if re_match:
318 for i in re_match[1].split(' '):
319 if i != "":
320 extra_uninit_regs.add(i)
321 li.append("")
322 continue
323 if l.strip().startswith('<!--'):
324 continue
325 if len(l) == 0:
326 break
327 assert l.startswith(' '), ("4spcs not found in line %s" % l)
328 l = l[4:] # lose 4 spaces
329 li.append(l)
330 d['pcode'] = li
331 d['extra_uninit_regs'] = extra_uninit_regs
332
333 # "Special Registers Altered" expected
334 l = lines.pop(0).rstrip()
335 assert l.startswith("Special"), ("special not found %s" % l)
336
337 # whitespace expected
338 l = lines.pop(0).strip()
339 assert len(l) == 0, ("blank line not found %s" % l)
340
341 # get special regs
342 li = get_indented_lines(lines)
343 d['sregs'] = li
344
345 # add in opcode
346 for o in opcodes:
347 self.add_op(o, d)
348
349 # expect and drop whitespace and comments
350 while lines:
351 l = lines.pop(0).rstrip()
352 if len(l) != 0 and not l.strip().startswith('<!--'):
353 break
354
355 def add_op(self, o, d):
356 opcode, regs = o[0], o[1:]
357 op = copy(d)
358 op['regs'] = regs
359 op['opcode'] = opcode
360 self.instr[opcode] = Ops(**op)
361
362 # create list of instructions by form
363 form = op['form']
364 fl = self.forms.get(form, [])
365 self.forms[form] = fl + [opcode]
366
367 # create list of instructions by page
368 page = op['page']
369 pl = self.page.get(page, [])
370 self.page[page] = pl + [opcode]
371
372 def pprint_ops(self):
373 for k, v in self.instr.items():
374 print("# %s %s" % (v.opcode, v.desc))
375 print("Form: %s Regs: %s" % (v.form, v.regs))
376 print('\n'.join(map(lambda x: " %s" % x, v.pcode)))
377 print("Specials")
378 print('\n'.join(map(lambda x: " %s" % x, v.sregs)))
379 print()
380 for k, v in isa.forms.items():
381 print(k, v)
382
383
384 if __name__ == '__main__':
385 isa = ISA()
386 isa.pprint_ops()
387 # example on how to access cmp regs:
388 print ("cmp regs:", isa.instr["cmp"].regs)