pagereader.py

   1 # Reads OpenPOWER ISA pages from http://libre-soc.org/openpower/isa
   2 """OpenPOWER ISA page parser
   3
   4 returns an OrderedDict of namedtuple "Ops" containing details of all
   5 instructions listed in markdown files.
   6
   7 format must be strictly as follows (no optional sections) including whitespace:
   8
   9 # Compare Logical
  10
  11 X-Form
  12
  13 * cmpl BF,L,RA,RB
  14
  15     if L = 0 then a <- [0]*32 || (RA)[32:63]
  16                   b <- [0]*32 || (RB)[32:63]
  17              else a <-  (RA)
  18                   b <-  (RB)
  19     if      a <u b then c <- 0b100
  20     else if a >u b then c <- 0b010
  21     else                c <-  0b001
  22     CR[4*BF+32:4*BF+35] <- c || XER[SO]
  23
  24 Special Registers Altered:
  25
  26     CR field BF
  27     Another field
  28
  29 this translates to:
  30
  31     # heading
  32     blank
  33     Some-Form
  34     blank
  35     * instruction registerlist
  36     * instruction registerlist
  37     blank
  38     4-space-indented pseudo-code
  39     4-space-indented pseudo-code
  40     blank
  41     Special Registers Altered:
  42     4-space-indented register description
  43     blank
  44     blank(s) (optional for convenience at end-of-page)
  45 """
  46
  47 from openpower.util import log
  48 from collections import namedtuple, OrderedDict
  49 from copy import copy
  50 import os
  51 import re
  52
  53 opfields = ("desc", "form", "opcode", "regs", "pcode", "sregs", "page")
  54 Ops = namedtuple("Ops", opfields)
  55
  56
  57 def get_isa_dir():
  58     fdir = os.path.abspath(os.path.dirname(__file__))
  59     fdir = os.path.split(fdir)[0]
  60     fdir = os.path.split(fdir)[0]
  61     fdir = os.path.split(fdir)[0]
  62     fdir = os.path.split(fdir)[0]
  63     # print (fdir)
  64     return os.path.join(fdir, "openpower", "isa")
  65
  66
  67 pattern_opcode = r"[A-Za-z0-9_\.]+\.?"
  68 pattern_dynamic = r"[A-Za-z0-9_]+(?:\([A-Za-z0-9_]+\))*"
  69 pattern_static = r"[A-Za-z0-9]+\=[01]"
  70 regex_opcode = re.compile(f"^{pattern_opcode}$")
  71 regex_dynamic = re.compile(f"^{pattern_dynamic}(?:,{pattern_dynamic})*$")
  72 regex_static = re.compile(f"^\({pattern_static}(?:\s{pattern_static})*\)$")
  73
  74
  75 def operands(opcode, desc):
  76     if desc is None:
  77         return
  78     desc = desc.replace("(", "")
  79     desc = desc.replace(")", "")
  80     desc = desc.replace(",", " ")
  81     for operand in desc.split(" "):
  82         operand = operand.strip()
  83         if operand:
  84             yield operand
  85
  86
  87 class ISA:
  88     def __init__(self):
  89         self.instr = OrderedDict()
  90         self.forms = {}
  91         self.page = {}
  92         self.verbose = False
  93         for pth in os.listdir(os.path.join(get_isa_dir())):
  94             if self.verbose:
  95                 print("examining", get_isa_dir(), pth)
  96             if "swp" in pth:
  97                 continue
  98             if not pth.endswith(".mdwn"):
  99                 log ("warning, file not .mdwn, skipping", pth)
 100                 continue
 101             self.read_file(pth)
 102             continue
 103             # code which helped add in the keyword "Pseudo-code:" automatically
 104             rewrite = self.read_file_for_rewrite(pth)
 105             name = os.path.join("/tmp", pth)
 106             with open(name, "w") as f:
 107                 f.write('\n'.join(rewrite) + '\n')
 108
 109     def __iter__(self):
 110         yield from self.instr.items()
 111
 112     def read_file_for_rewrite(self, fname):
 113         pagename = fname.split('.')[0]
 114         fname = os.path.join(get_isa_dir(), fname)
 115         with open(fname) as f:
 116             lines = f.readlines()
 117         rewrite = []
 118
 119         l = lines.pop(0).rstrip()  # get first line
 120         rewrite.append(l)
 121         while lines:
 122             if self.verbose:
 123                 print(l)
 124             # look for HTML comment, if starting, skip line.
 125             # XXX this is braindead!  it doesn't look for the end
 126             # so please put ending of comments on one line:
 127             # <!-- line 1 comment -->
 128             # {some whitespace}<!-- line 2 comment -->
 129             if l.strip().startswith('<!--'):
 130                 # print ("skipping comment", l)
 131                 l = lines.pop(0).rstrip()  # get first line
 132                 continue
 133
 134             # Ignore blank lines before the first #
 135             if len(l.strip()) == 0:
 136                 continue
 137
 138             # expect get heading
 139             assert l.startswith('#'), ("# not found in line %s" % l)
 140
 141             # whitespace expected
 142             l = lines.pop(0).strip()
 143             if self.verbose:
 144                 print(repr(l))
 145             assert len(l) == 0, ("blank line not found %s" % l)
 146             rewrite.append(l)
 147
 148             # Form expected
 149             l = lines.pop(0).strip()
 150             assert l.endswith('-Form'), ("line with -Form expected %s" % l)
 151             rewrite.append(l)
 152
 153             # whitespace expected
 154             l = lines.pop(0).strip()
 155             assert len(l) == 0, ("blank line not found %s" % l)
 156             rewrite.append(l)
 157
 158             # get list of opcodes
 159             while True:
 160                 l = lines.pop(0).strip()
 161                 rewrite.append(l)
 162                 if len(l) == 0:
 163                     break
 164                 assert l.startswith('*'), ("* not found in line %s" % l)
 165
 166             rewrite.append("Pseudo-code:")
 167             rewrite.append("")
 168             # get pseudocode
 169             while True:
 170                 l = lines.pop(0).rstrip()
 171                 if l.strip().startswith('<!--'):
 172                     # print ("skipping comment", l)
 173                     l = lines.pop(0).rstrip()  # get first line
 174                     continue
 175                 rewrite.append(l)
 176                 if len(l) == 0:
 177                     break
 178                 assert l.startswith('    '), ("4spcs not found in line %s" % l)
 179
 180             # "Special Registers Altered" expected
 181             l = lines.pop(0).rstrip()
 182             assert l.startswith("Special"), ("special not found %s" % l)
 183             rewrite.append(l)
 184
 185             # whitespace expected
 186             l = lines.pop(0).strip()
 187             assert len(l) == 0, ("blank line not found %s" % l)
 188             rewrite.append(l)
 189
 190             # get special regs
 191             while lines:
 192                 l = lines.pop(0).rstrip()
 193                 rewrite.append(l)
 194                 if len(l) == 0:
 195                     break
 196                 assert l.startswith('    '), ("4spcs not found in line %s" % l)
 197
 198             # expect and drop whitespace
 199             while lines:
 200                 l = lines.pop(0).rstrip()
 201                 rewrite.append(l)
 202                 if len(l) != 0 and not l.strip().startswith('<!--'):
 203                     break
 204
 205         return rewrite
 206
 207     def read_file(self, fname):
 208         pagename = fname.split('.')[0]
 209         fname = os.path.join(get_isa_dir(), fname)
 210         with open(fname) as f:
 211             lines = f.readlines()
 212
 213         # set up dict with current page name
 214         d = {'page': pagename}
 215
 216         # line-by-line lexer/parser, quite straightforward: pops one
 217         # line off the list and checks it.  nothing complicated needed,
 218         # all sections are mandatory so no need for a full LALR parser.
 219
 220         l = lines.pop(0).rstrip()  # get first line
 221         while lines:
 222             if self.verbose:
 223                 print(l)
 224             # look for HTML comment, if starting, skip line.
 225             # XXX this is braindead!  it doesn't look for the end
 226             # so please put ending of comments on one line:
 227             # <!-- line 1 comment -->
 228             # <!-- line 2 comment -->
 229             if l.strip().startswith('<!--'):
 230                 # print ("skipping comment", l)
 231                 l = lines.pop(0).rstrip()  # get next line
 232                 continue
 233
 234             # Ignore blank lines before the first #
 235             if len(l) == 0:
 236                 l = lines.pop(0).rstrip()  # get next line
 237                 continue
 238
 239             # expect get heading
 240             assert l.startswith('#'), ("# not found in line '%s'" % l)
 241             d['desc'] = l[1:].strip()
 242
 243             # whitespace expected
 244             l = lines.pop(0).strip()
 245             if self.verbose:
 246                 print(repr(l))
 247             assert len(l) == 0, ("blank line not found %s" % l)
 248
 249             # Form expected
 250             l = lines.pop(0).strip()
 251             assert l.endswith('-Form'), ("line with -Form expected %s" % l)
 252             d['form'] = l.split('-')[0]
 253
 254             # whitespace expected
 255             l = lines.pop(0).strip()
 256             assert len(l) == 0, ("blank line not found %s" % l)
 257
 258             # get list of opcodes
 259             opcodes = []
 260             while True:
 261                 l = lines.pop(0).strip()
 262                 if len(l) == 0:
 263                     break
 264                 assert l.startswith('*'), ("* not found in line %s" % l)
 265                 rest = l[1:].strip()
 266
 267                 (opcode, _, rest) = map(str.strip, rest.partition(" "))
 268                 if regex_opcode.match(opcode) is None:
 269                     raise IOError(repr(opcode))
 270                 opcode = [opcode]
 271
 272                 (dynamic, _, rest) = map(str.strip, rest.partition(" "))
 273                 if regex_dynamic.match(dynamic) is None and dynamic:
 274                     raise IOError(f"{l!r}: {dynamic!r}")
 275                 if dynamic:
 276                     opcode.append(dynamic.split(","))
 277
 278                 static = rest
 279                 if regex_static.match(static) is None and static:
 280                     raise IOError(f"{l!r}: {static!r}")
 281                 if static:
 282                     opcode.extend(static[1:-1].split(" "))
 283
 284                 opcodes.append(opcode)
 285
 286             # "Pseudocode" expected
 287             l = lines.pop(0).rstrip()
 288             assert l.startswith("Pseudo-code:"), ("pseudocode found %s" % l)
 289
 290             # whitespace expected
 291             l = lines.pop(0).strip()
 292             if self.verbose:
 293                 print(repr(l))
 294             assert len(l) == 0, ("blank line not found %s" % l)
 295
 296             # get pseudocode
 297             li = []
 298             while True:
 299                 l = lines.pop(0).rstrip()
 300                 if l.strip().startswith('<!--'):
 301                     continue
 302                 if len(l) == 0:
 303                     break
 304                 assert l.startswith('    '), ("4spcs not found in line %s" % l)
 305                 l = l[4:]  # lose 4 spaces
 306                 li.append(l)
 307             d['pcode'] = li
 308
 309             # "Special Registers Altered" expected
 310             l = lines.pop(0).rstrip()
 311             assert l.startswith("Special"), ("special not found %s" % l)
 312
 313             # whitespace expected
 314             l = lines.pop(0).strip()
 315             assert len(l) == 0, ("blank line not found %s" % l)
 316
 317             # get special regs
 318             li = []
 319             while lines:
 320                 l = lines.pop(0).rstrip()
 321                 if len(l) == 0:
 322                     break
 323                 assert l.startswith('    '), ("4spcs not found in line %s" % l)
 324                 l = l[4:]  # lose 4 spaces
 325                 li.append(l)
 326             d['sregs'] = li
 327
 328             # add in opcode
 329             for o in opcodes:
 330                 self.add_op(o, d)
 331
 332             # expect and drop whitespace and comments
 333             while lines:
 334                 l = lines.pop(0).rstrip()
 335                 if len(l) != 0 and not l.strip().startswith('<!--'):
 336                     break
 337
 338     def add_op(self, o, d):
 339         opcode, regs = o[0], o[1:]
 340         op = copy(d)
 341         op['regs'] = regs
 342         op['opcode'] = opcode
 343         self.instr[opcode] = Ops(**op)
 344
 345         # create list of instructions by form
 346         form = op['form']
 347         fl = self.forms.get(form, [])
 348         self.forms[form] = fl + [opcode]
 349
 350         # create list of instructions by page
 351         page = op['page']
 352         pl = self.page.get(page, [])
 353         self.page[page] = pl + [opcode]
 354
 355     def pprint_ops(self):
 356         for k, v in self.instr.items():
 357             print("# %s %s" % (v.opcode, v.desc))
 358             print("Form: %s Regs: %s" % (v.form, v.regs))
 359             print('\n'.join(map(lambda x: "    %s" % x, v.pcode)))
 360             print("Specials")
 361             print('\n'.join(map(lambda x: "    %s" % x, v.sregs)))
 362             print()
 363         for k, v in isa.forms.items():
 364             print(k, v)
 365
 366
 367 if __name__ == '__main__':
 368     isa = ISA()
 369     isa.pprint_ops()
 370     # example on how to access cmp regs:
 371     print ("cmp regs:", isa.instr["cmp"].regs)