src/arch/isa_parser.py

   1 # Copyright (c) 2003-2005 The Regents of The University of Michigan
   2 # All rights reserved.
   3 #
   4 # Redistribution and use in source and binary forms, with or without
   5 # modification, are permitted provided that the following conditions are
   6 # met: redistributions of source code must retain the above copyright
   7 # notice, this list of conditions and the following disclaimer;
   8 # redistributions in binary form must reproduce the above copyright
   9 # notice, this list of conditions and the following disclaimer in the
  10 # documentation and/or other materials provided with the distribution;
  11 # neither the name of the copyright holders nor the names of its
  12 # contributors may be used to endorse or promote products derived from
  13 # this software without specific prior written permission.
  14 #
  15 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  16 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  17 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  18 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  19 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  20 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  21 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  22 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  23 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  25 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26 #
  27 # Authors: Steve Reinhardt
  28
  29 import os
  30 import sys
  31 import re
  32 import string
  33 import inspect, traceback
  34 # get type names
  35 from types import *
  36
  37 from m5.util.grammar import Grammar
  38
  39 ###################
  40 # Utility functions
  41
  42 #
  43 # Indent every line in string 's' by two spaces
  44 # (except preprocessor directives).
  45 # Used to make nested code blocks look pretty.
  46 #
  47 def indent(s):
  48     return re.sub(r'(?m)^(?!#)', '  ', s)
  49
  50 #
  51 # Munge a somewhat arbitrarily formatted piece of Python code
  52 # (e.g. from a format 'let' block) into something whose indentation
  53 # will get by the Python parser.
  54 #
  55 # The two keys here are that Python will give a syntax error if
  56 # there's any whitespace at the beginning of the first line, and that
  57 # all lines at the same lexical nesting level must have identical
  58 # indentation.  Unfortunately the way code literals work, an entire
  59 # let block tends to have some initial indentation.  Rather than
  60 # trying to figure out what that is and strip it off, we prepend 'if
  61 # 1:' to make the let code the nested block inside the if (and have
  62 # the parser automatically deal with the indentation for us).
  63 #
  64 # We don't want to do this if (1) the code block is empty or (2) the
  65 # first line of the block doesn't have any whitespace at the front.
  66
  67 def fixPythonIndentation(s):
  68     # get rid of blank lines first
  69     s = re.sub(r'(?m)^\s*\n', '', s);
  70     if (s != '' and re.match(r'[ \t]', s[0])):
  71         s = 'if 1:\n' + s
  72     return s
  73
  74 class ISAParserError(Exception):
  75     """Error handler for parser errors"""
  76     def __init__(self, first, second=None):
  77         if second is None:
  78             self.lineno = 0
  79             self.string = first
  80         else:
  81             if hasattr(first, 'lexer'):
  82                 first = first.lexer.lineno
  83             self.lineno = first
  84             self.string = second
  85
  86     def display(self, filename_stack, print_traceback=False):
  87         # Output formatted to work under Emacs compile-mode.  Optional
  88         # 'print_traceback' arg, if set to True, prints a Python stack
  89         # backtrace too (can be handy when trying to debug the parser
  90         # itself).
  91
  92         spaces = ""
  93         for (filename, line) in filename_stack[:-1]:
  94             print "%sIn file included from %s:" % (spaces, filename)
  95             spaces += "  "
  96
  97         # Print a Python stack backtrace if requested.
  98         if print_traceback or not self.lineno:
  99             traceback.print_exc()
 100
 101         line_str = "%s:" % (filename_stack[-1][0], )
 102         if self.lineno:
 103             line_str += "%d:" % (self.lineno, )
 104
 105         return "%s%s %s" % (spaces, line_str, self.string)
 106
 107     def exit(self, filename_stack, print_traceback=False):
 108         # Just call exit.
 109
 110         sys.exit(self.display(filename_stack, print_traceback))
 111
 112 def error(*args):
 113     raise ISAParserError(*args)
 114
 115 ####################
 116 # Template objects.
 117 #
 118 # Template objects are format strings that allow substitution from
 119 # the attribute spaces of other objects (e.g. InstObjParams instances).
 120
 121 labelRE = re.compile(r'(?<!%)%\(([^\)]+)\)[sd]')
 122
 123 class Template(object):
 124     def __init__(self, t):
 125         self.template = t
 126
 127     def subst(self, d):
 128         myDict = None
 129
 130         # Protect non-Python-dict substitutions (e.g. if there's a printf
 131         # in the templated C++ code)
 132         template = protect_non_subst_percents(self.template)
 133         # CPU-model-specific substitutions are handled later (in GenCode).
 134         template = protect_cpu_symbols(template)
 135
 136         # Build a dict ('myDict') to use for the template substitution.
 137         # Start with the template namespace.  Make a copy since we're
 138         # going to modify it.
 139         myDict = parser.templateMap.copy()
 140
 141         if isinstance(d, InstObjParams):
 142             # If we're dealing with an InstObjParams object, we need
 143             # to be a little more sophisticated.  The instruction-wide
 144             # parameters are already formed, but the parameters which
 145             # are only function wide still need to be generated.
 146             compositeCode = ''
 147
 148             myDict.update(d.__dict__)
 149             # The "operands" and "snippets" attributes of the InstObjParams
 150             # objects are for internal use and not substitution.
 151             del myDict['operands']
 152             del myDict['snippets']
 153
 154             snippetLabels = [l for l in labelRE.findall(template)
 155                              if d.snippets.has_key(l)]
 156
 157             snippets = dict([(s, mungeSnippet(d.snippets[s]))
 158                              for s in snippetLabels])
 159
 160             myDict.update(snippets)
 161
 162             compositeCode = ' '.join(map(str, snippets.values()))
 163
 164             # Add in template itself in case it references any
 165             # operands explicitly (like Mem)
 166             compositeCode += ' ' + template
 167
 168             operands = SubOperandList(compositeCode, d.operands)
 169
 170             myDict['op_decl'] = operands.concatAttrStrings('op_decl')
 171
 172             is_src = lambda op: op.is_src
 173             is_dest = lambda op: op.is_dest
 174
 175             myDict['op_src_decl'] = \
 176                       operands.concatSomeAttrStrings(is_src, 'op_src_decl')
 177             myDict['op_dest_decl'] = \
 178                       operands.concatSomeAttrStrings(is_dest, 'op_dest_decl')
 179
 180             myDict['op_rd'] = operands.concatAttrStrings('op_rd')
 181             myDict['op_wb'] = operands.concatAttrStrings('op_wb')
 182
 183             if d.operands.memOperand:
 184                 myDict['mem_acc_size'] = d.operands.memOperand.mem_acc_size
 185                 myDict['mem_acc_type'] = d.operands.memOperand.mem_acc_type
 186
 187         elif isinstance(d, dict):
 188             # if the argument is a dictionary, we just use it.
 189             myDict.update(d)
 190         elif hasattr(d, '__dict__'):
 191             # if the argument is an object, we use its attribute map.
 192             myDict.update(d.__dict__)
 193         else:
 194             raise TypeError, "Template.subst() arg must be or have dictionary"
 195         return template % myDict
 196
 197     # Convert to string.  This handles the case when a template with a
 198     # CPU-specific term gets interpolated into another template or into
 199     # an output block.
 200     def __str__(self):
 201         return expand_cpu_symbols_to_string(self.template)
 202
 203 ################
 204 # Format object.
 205 #
 206 # A format object encapsulates an instruction format.  It must provide
 207 # a defineInst() method that generates the code for an instruction
 208 # definition.
 209
 210 exportContextSymbols = ('InstObjParams', 'makeList', 're', 'string')
 211
 212 exportContext = {}
 213
 214 def updateExportContext():
 215     exportContext.update(exportDict(*exportContextSymbols))
 216     exportContext.update(parser.templateMap)
 217
 218 def exportDict(*symNames):
 219     return dict([(s, eval(s)) for s in symNames])
 220
 221
 222 class Format(object):
 223     def __init__(self, id, params, code):
 224         # constructor: just save away arguments
 225         self.id = id
 226         self.params = params
 227         label = 'def format ' + id
 228         self.user_code = compile(fixPythonIndentation(code), label, 'exec')
 229         param_list = string.join(params, ", ")
 230         f = '''def defInst(_code, _context, %s):
 231                 my_locals = vars().copy()
 232                 exec _code in _context, my_locals
 233                 return my_locals\n''' % param_list
 234         c = compile(f, label + ' wrapper', 'exec')
 235         exec c
 236         self.func = defInst
 237
 238     def defineInst(self, name, args, lineno):
 239         context = {}
 240         updateExportContext()
 241         context.update(exportContext)
 242         if len(name):
 243             Name = name[0].upper()
 244             if len(name) > 1:
 245                 Name += name[1:]
 246         context.update({ 'name': name, 'Name': Name })
 247         try:
 248             vars = self.func(self.user_code, context, *args[0], **args[1])
 249         except Exception, exc:
 250             error(lineno, 'error defining "%s": %s.' % (name, exc))
 251         for k in vars.keys():
 252             if k not in ('header_output', 'decoder_output',
 253                          'exec_output', 'decode_block'):
 254                 del vars[k]
 255         return GenCode(**vars)
 256
 257 # Special null format to catch an implicit-format instruction
 258 # definition outside of any format block.
 259 class NoFormat(object):
 260     def __init__(self):
 261         self.defaultInst = ''
 262
 263     def defineInst(self, name, args, lineno):
 264         error(lineno,
 265               'instruction definition "%s" with no active format!' % name)
 266
 267 # This dictionary maps format name strings to Format objects.
 268 formatMap = {}
 269
 270 # Define a new format
 271 def defFormat(id, params, code, lineno):
 272     # make sure we haven't already defined this one
 273     if formatMap.get(id, None) != None:
 274         error(lineno, 'format %s redefined.' % id)
 275     # create new object and store in global map
 276     formatMap[id] = Format(id, params, code)
 277
 278 #####################################################################
 279 #
 280 #                           Support Classes
 281 #
 282 #####################################################################
 283
 284 # Expand template with CPU-specific references into a dictionary with
 285 # an entry for each CPU model name.  The entry key is the model name
 286 # and the corresponding value is the template with the CPU-specific
 287 # refs substituted for that model.
 288 def expand_cpu_symbols_to_dict(template):
 289     # Protect '%'s that don't go with CPU-specific terms
 290     t = re.sub(r'%(?!\(CPU_)', '%%', template)
 291     result = {}
 292     for cpu in cpu_models:
 293         result[cpu.name] = t % cpu.strings
 294     return result
 295
 296 # *If* the template has CPU-specific references, return a single
 297 # string containing a copy of the template for each CPU model with the
 298 # corresponding values substituted in.  If the template has no
 299 # CPU-specific references, it is returned unmodified.
 300 def expand_cpu_symbols_to_string(template):
 301     if template.find('%(CPU_') != -1:
 302         return reduce(lambda x,y: x+y,
 303                       expand_cpu_symbols_to_dict(template).values())
 304     else:
 305         return template
 306
 307 # Protect CPU-specific references by doubling the corresponding '%'s
 308 # (in preparation for substituting a different set of references into
 309 # the template).
 310 def protect_cpu_symbols(template):
 311     return re.sub(r'%(?=\(CPU_)', '%%', template)
 312
 313 # Protect any non-dict-substitution '%'s in a format string
 314 # (i.e. those not followed by '(')
 315 def protect_non_subst_percents(s):
 316     return re.sub(r'%(?!\()', '%%', s)
 317
 318 ###############
 319 # GenCode class
 320 #
 321 # The GenCode class encapsulates generated code destined for various
 322 # output files.  The header_output and decoder_output attributes are
 323 # strings containing code destined for decoder.hh and decoder.cc
 324 # respectively.  The decode_block attribute contains code to be
 325 # incorporated in the decode function itself (that will also end up in
 326 # decoder.cc).  The exec_output attribute is a dictionary with a key
 327 # for each CPU model name; the value associated with a particular key
 328 # is the string of code for that CPU model's exec.cc file.  The
 329 # has_decode_default attribute is used in the decode block to allow
 330 # explicit default clauses to override default default clauses.
 331
 332 class GenCode(object):
 333     # Constructor.  At this point we substitute out all CPU-specific
 334     # symbols.  For the exec output, these go into the per-model
 335     # dictionary.  For all other output types they get collapsed into
 336     # a single string.
 337     def __init__(self,
 338                  header_output = '', decoder_output = '', exec_output = '',
 339                  decode_block = '', has_decode_default = False):
 340         self.header_output = expand_cpu_symbols_to_string(header_output)
 341         self.decoder_output = expand_cpu_symbols_to_string(decoder_output)
 342         if isinstance(exec_output, dict):
 343             self.exec_output = exec_output
 344         elif isinstance(exec_output, str):
 345             # If the exec_output arg is a single string, we replicate
 346             # it for each of the CPU models, substituting and
 347             # %(CPU_foo)s params appropriately.
 348             self.exec_output = expand_cpu_symbols_to_dict(exec_output)
 349         self.decode_block = expand_cpu_symbols_to_string(decode_block)
 350         self.has_decode_default = has_decode_default
 351
 352     # Override '+' operator: generate a new GenCode object that
 353     # concatenates all the individual strings in the operands.
 354     def __add__(self, other):
 355         exec_output = {}
 356         for cpu in cpu_models:
 357             n = cpu.name
 358             exec_output[n] = self.exec_output[n] + other.exec_output[n]
 359         return GenCode(self.header_output + other.header_output,
 360                        self.decoder_output + other.decoder_output,
 361                        exec_output,
 362                        self.decode_block + other.decode_block,
 363                        self.has_decode_default or other.has_decode_default)
 364
 365     # Prepend a string (typically a comment) to all the strings.
 366     def prepend_all(self, pre):
 367         self.header_output = pre + self.header_output
 368         self.decoder_output  = pre + self.decoder_output
 369         self.decode_block = pre + self.decode_block
 370         for cpu in cpu_models:
 371             self.exec_output[cpu.name] = pre + self.exec_output[cpu.name]
 372
 373     # Wrap the decode block in a pair of strings (e.g., 'case foo:'
 374     # and 'break;').  Used to build the big nested switch statement.
 375     def wrap_decode_block(self, pre, post = ''):
 376         self.decode_block = pre + indent(self.decode_block) + post
 377
 378 #####################################################################
 379 #
 380 #                      Bitfield Operator Support
 381 #
 382 #####################################################################
 383
 384 bitOp1ArgRE = re.compile(r'<\s*(\w+)\s*:\s*>')
 385
 386 bitOpWordRE = re.compile(r'(?<![\w\.])([\w\.]+)<\s*(\w+)\s*:\s*(\w+)\s*>')
 387 bitOpExprRE = re.compile(r'\)<\s*(\w+)\s*:\s*(\w+)\s*>')
 388
 389 def substBitOps(code):
 390     # first convert single-bit selectors to two-index form
 391     # i.e., <n> --> <n:n>
 392     code = bitOp1ArgRE.sub(r'<\1:\1>', code)
 393     # simple case: selector applied to ID (name)
 394     # i.e., foo<a:b> --> bits(foo, a, b)
 395     code = bitOpWordRE.sub(r'bits(\1, \2, \3)', code)
 396     # if selector is applied to expression (ending in ')'),
 397     # we need to search backward for matching '('
 398     match = bitOpExprRE.search(code)
 399     while match:
 400         exprEnd = match.start()
 401         here = exprEnd - 1
 402         nestLevel = 1
 403         while nestLevel > 0:
 404             if code[here] == '(':
 405                 nestLevel -= 1
 406             elif code[here] == ')':
 407                 nestLevel += 1
 408             here -= 1
 409             if here < 0:
 410                 sys.exit("Didn't find '('!")
 411         exprStart = here+1
 412         newExpr = r'bits(%s, %s, %s)' % (code[exprStart:exprEnd+1],
 413                                          match.group(1), match.group(2))
 414         code = code[:exprStart] + newExpr + code[match.end():]
 415         match = bitOpExprRE.search(code)
 416     return code
 417
 418
 419 #####################################################################
 420 #
 421 #                             Code Parser
 422 #
 423 # The remaining code is the support for automatically extracting
 424 # instruction characteristics from pseudocode.
 425 #
 426 #####################################################################
 427
 428 # Force the argument to be a list.  Useful for flags, where a caller
 429 # can specify a singleton flag or a list of flags.  Also usful for
 430 # converting tuples to lists so they can be modified.
 431 def makeList(arg):
 432     if isinstance(arg, list):
 433         return arg
 434     elif isinstance(arg, tuple):
 435         return list(arg)
 436     elif not arg:
 437         return []
 438     else:
 439         return [ arg ]
 440
 441 # Generate operandTypeMap from the user's 'def operand_types'
 442 # statement.
 443 def buildOperandTypeMap(user_dict, lineno):
 444     global operandTypeMap
 445     operandTypeMap = {}
 446     for (ext, (desc, size)) in user_dict.iteritems():
 447         if desc == 'signed int':
 448             ctype = 'int%d_t' % size
 449             is_signed = 1
 450         elif desc == 'unsigned int':
 451             ctype = 'uint%d_t' % size
 452             is_signed = 0
 453         elif desc == 'float':
 454             is_signed = 1       # shouldn't really matter
 455             if size == 32:
 456                 ctype = 'float'
 457             elif size == 64:
 458                 ctype = 'double'
 459         elif desc == 'twin64 int':
 460             is_signed = 0
 461             ctype = 'Twin64_t'
 462         elif desc == 'twin32 int':
 463             is_signed = 0
 464             ctype = 'Twin32_t'
 465         if ctype == '':
 466             error(lineno, 'Unrecognized type description "%s" in user_dict')
 467         operandTypeMap[ext] = (size, ctype, is_signed)
 468
 469 class Operand(object):
 470     '''Base class for operand descriptors.  An instance of this class
 471     (or actually a class derived from this one) represents a specific
 472     operand for a code block (e.g, "Rc.sq" as a dest). Intermediate
 473     derived classes encapsulates the traits of a particular operand
 474     type (e.g., "32-bit integer register").'''
 475
 476     def buildReadCode(self, func = None):
 477         code = self.read_code % {"name": self.base_name,
 478                                  "func": func,
 479                                  "op_idx": self.src_reg_idx,
 480                                  "reg_idx": self.reg_spec,
 481                                  "size": self.size,
 482                                  "ctype": self.ctype}
 483         if self.size != self.dflt_size:
 484             return '%s = bits(%s, %d, 0);\n' % \
 485                    (self.base_name, code, self.size-1)
 486         else:
 487             return '%s = %s;\n' % \
 488                    (self.base_name, code)
 489
 490     def buildWriteCode(self, func = None):
 491         if (self.size != self.dflt_size and self.is_signed):
 492             final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
 493         else:
 494             final_val = self.base_name
 495         code = self.write_code % {"name": self.base_name,
 496                                   "func": func,
 497                                   "op_idx": self.dest_reg_idx,
 498                                   "reg_idx": self.reg_spec,
 499                                   "size": self.size,
 500                                   "ctype": self.ctype,
 501                                   "final_val": final_val}
 502         return '''
 503         {
 504             %s final_val = %s;
 505             %s;
 506             if (traceData) { traceData->setData(final_val); }
 507         }''' % (self.dflt_ctype, final_val, code)
 508
 509     def __init__(self, full_name, ext, is_src, is_dest):
 510         self.full_name = full_name
 511         self.ext = ext
 512         self.is_src = is_src
 513         self.is_dest = is_dest
 514         # The 'effective extension' (eff_ext) is either the actual
 515         # extension, if one was explicitly provided, or the default.
 516         if ext:
 517             self.eff_ext = ext
 518         else:
 519             self.eff_ext = self.dflt_ext
 520
 521         (self.size, self.ctype, self.is_signed) = operandTypeMap[self.eff_ext]
 522
 523         # note that mem_acc_size is undefined for non-mem operands...
 524         # template must be careful not to use it if it doesn't apply.
 525         if self.isMem():
 526             self.mem_acc_size = self.makeAccSize()
 527             if self.ctype in ['Twin32_t', 'Twin64_t']:
 528                 self.mem_acc_type = 'Twin'
 529             else:
 530                 self.mem_acc_type = 'uint'
 531
 532     # Finalize additional fields (primarily code fields).  This step
 533     # is done separately since some of these fields may depend on the
 534     # register index enumeration that hasn't been performed yet at the
 535     # time of __init__().
 536     def finalize(self):
 537         self.flags = self.getFlags()
 538         self.constructor = self.makeConstructor()
 539         self.op_decl = self.makeDecl()
 540
 541         if self.is_src:
 542             self.op_rd = self.makeRead()
 543             self.op_src_decl = self.makeDecl()
 544         else:
 545             self.op_rd = ''
 546             self.op_src_decl = ''
 547
 548         if self.is_dest:
 549             self.op_wb = self.makeWrite()
 550             self.op_dest_decl = self.makeDecl()
 551         else:
 552             self.op_wb = ''
 553             self.op_dest_decl = ''
 554
 555     def isMem(self):
 556         return 0
 557
 558     def isReg(self):
 559         return 0
 560
 561     def isFloatReg(self):
 562         return 0
 563
 564     def isIntReg(self):
 565         return 0
 566
 567     def isControlReg(self):
 568         return 0
 569
 570     def getFlags(self):
 571         # note the empty slice '[:]' gives us a copy of self.flags[0]
 572         # instead of a reference to it
 573         my_flags = self.flags[0][:]
 574         if self.is_src:
 575             my_flags += self.flags[1]
 576         if self.is_dest:
 577             my_flags += self.flags[2]
 578         return my_flags
 579
 580     def makeDecl(self):
 581         # Note that initializations in the declarations are solely
 582         # to avoid 'uninitialized variable' errors from the compiler.
 583         return self.ctype + ' ' + self.base_name + ' = 0;\n';
 584
 585 class IntRegOperand(Operand):
 586     def isReg(self):
 587         return 1
 588
 589     def isIntReg(self):
 590         return 1
 591
 592     def makeConstructor(self):
 593         c = ''
 594         if self.is_src:
 595             c += '\n\t_srcRegIdx[%d] = %s;' % \
 596                  (self.src_reg_idx, self.reg_spec)
 597         if self.is_dest:
 598             c += '\n\t_destRegIdx[%d] = %s;' % \
 599                  (self.dest_reg_idx, self.reg_spec)
 600         return c
 601
 602     def makeRead(self):
 603         if (self.ctype == 'float' or self.ctype == 'double'):
 604             error('Attempt to read integer register as FP')
 605         if self.read_code != None:
 606             return self.buildReadCode('readIntRegOperand')
 607         if (self.size == self.dflt_size):
 608             return '%s = xc->readIntRegOperand(this, %d);\n' % \
 609                    (self.base_name, self.src_reg_idx)
 610         elif (self.size > self.dflt_size):
 611             int_reg_val = 'xc->readIntRegOperand(this, %d)' % \
 612                           (self.src_reg_idx)
 613             if (self.is_signed):
 614                 int_reg_val = 'sext<%d>(%s)' % (self.dflt_size, int_reg_val)
 615             return '%s = %s;\n' % (self.base_name, int_reg_val)
 616         else:
 617             return '%s = bits(xc->readIntRegOperand(this, %d), %d, 0);\n' % \
 618                    (self.base_name, self.src_reg_idx, self.size-1)
 619
 620     def makeWrite(self):
 621         if (self.ctype == 'float' or self.ctype == 'double'):
 622             error('Attempt to write integer register as FP')
 623         if self.write_code != None:
 624             return self.buildWriteCode('setIntRegOperand')
 625         if (self.size != self.dflt_size and self.is_signed):
 626             final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
 627         else:
 628             final_val = self.base_name
 629         wb = '''
 630         {
 631             %s final_val = %s;
 632             xc->setIntRegOperand(this, %d, final_val);\n
 633             if (traceData) { traceData->setData(final_val); }
 634         }''' % (self.dflt_ctype, final_val, self.dest_reg_idx)
 635         return wb
 636
 637 class FloatRegOperand(Operand):
 638     def isReg(self):
 639         return 1
 640
 641     def isFloatReg(self):
 642         return 1
 643
 644     def makeConstructor(self):
 645         c = ''
 646         if self.is_src:
 647             c += '\n\t_srcRegIdx[%d] = %s + FP_Base_DepTag;' % \
 648                  (self.src_reg_idx, self.reg_spec)
 649         if self.is_dest:
 650             c += '\n\t_destRegIdx[%d] = %s + FP_Base_DepTag;' % \
 651                  (self.dest_reg_idx, self.reg_spec)
 652         return c
 653
 654     def makeRead(self):
 655         bit_select = 0
 656         if (self.ctype == 'float' or self.ctype == 'double'):
 657             func = 'readFloatRegOperand'
 658         else:
 659             func = 'readFloatRegOperandBits'
 660             if (self.size != self.dflt_size):
 661                 bit_select = 1
 662         base = 'xc->%s(this, %d)' % (func, self.src_reg_idx)
 663         if self.read_code != None:
 664             return self.buildReadCode(func)
 665         if bit_select:
 666             return '%s = bits(%s, %d, 0);\n' % \
 667                    (self.base_name, base, self.size-1)
 668         else:
 669             return '%s = %s;\n' % (self.base_name, base)
 670
 671     def makeWrite(self):
 672         final_val = self.base_name
 673         final_ctype = self.ctype
 674         if (self.ctype == 'float' or self.ctype == 'double'):
 675             func = 'setFloatRegOperand'
 676         elif (self.ctype == 'uint32_t' or self.ctype == 'uint64_t'):
 677             func = 'setFloatRegOperandBits'
 678         else:
 679             func = 'setFloatRegOperandBits'
 680             final_ctype = 'uint%d_t' % self.dflt_size
 681             if (self.size != self.dflt_size and self.is_signed):
 682                 final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
 683         if self.write_code != None:
 684             return self.buildWriteCode(func)
 685         wb = '''
 686         {
 687             %s final_val = %s;
 688             xc->%s(this, %d, final_val);\n
 689             if (traceData) { traceData->setData(final_val); }
 690         }''' % (final_ctype, final_val, func, self.dest_reg_idx)
 691         return wb
 692
 693 class ControlRegOperand(Operand):
 694     def isReg(self):
 695         return 1
 696
 697     def isControlReg(self):
 698         return 1
 699
 700     def makeConstructor(self):
 701         c = ''
 702         if self.is_src:
 703             c += '\n\t_srcRegIdx[%d] = %s + Ctrl_Base_DepTag;' % \
 704                  (self.src_reg_idx, self.reg_spec)
 705         if self.is_dest:
 706             c += '\n\t_destRegIdx[%d] = %s + Ctrl_Base_DepTag;' % \
 707                  (self.dest_reg_idx, self.reg_spec)
 708         return c
 709
 710     def makeRead(self):
 711         bit_select = 0
 712         if (self.ctype == 'float' or self.ctype == 'double'):
 713             error('Attempt to read control register as FP')
 714         if self.read_code != None:
 715             return self.buildReadCode('readMiscRegOperand')
 716         base = 'xc->readMiscRegOperand(this, %s)' % self.src_reg_idx
 717         if self.size == self.dflt_size:
 718             return '%s = %s;\n' % (self.base_name, base)
 719         else:
 720             return '%s = bits(%s, %d, 0);\n' % \
 721                    (self.base_name, base, self.size-1)
 722
 723     def makeWrite(self):
 724         if (self.ctype == 'float' or self.ctype == 'double'):
 725             error('Attempt to write control register as FP')
 726         if self.write_code != None:
 727             return self.buildWriteCode('setMiscRegOperand')
 728         wb = 'xc->setMiscRegOperand(this, %s, %s);\n' % \
 729              (self.dest_reg_idx, self.base_name)
 730         wb += 'if (traceData) { traceData->setData(%s); }' % \
 731               self.base_name
 732         return wb
 733
 734 class MemOperand(Operand):
 735     def isMem(self):
 736         return 1
 737
 738     def makeConstructor(self):
 739         return ''
 740
 741     def makeDecl(self):
 742         # Note that initializations in the declarations are solely
 743         # to avoid 'uninitialized variable' errors from the compiler.
 744         # Declare memory data variable.
 745         if self.ctype in ['Twin32_t','Twin64_t']:
 746             return "%s %s; %s.a = 0; %s.b = 0;\n" % \
 747                    (self.ctype, self.base_name, self.base_name, self.base_name)
 748         return '%s %s = 0;\n' % (self.ctype, self.base_name)
 749
 750     def makeRead(self):
 751         if self.read_code != None:
 752             return self.buildReadCode()
 753         return ''
 754
 755     def makeWrite(self):
 756         if self.write_code != None:
 757             return self.buildWriteCode()
 758         return ''
 759
 760     # Return the memory access size *in bits*, suitable for
 761     # forming a type via "uint%d_t".  Divide by 8 if you want bytes.
 762     def makeAccSize(self):
 763         return self.size
 764
 765 class PCOperand(Operand):
 766     def makeConstructor(self):
 767         return ''
 768
 769     def makeRead(self):
 770         return '%s = xc->readPC();\n' % self.base_name
 771
 772     def makeWrite(self):
 773         return 'xc->setPC(%s);\n' % self.base_name
 774
 775 class UPCOperand(Operand):
 776     def makeConstructor(self):
 777         return ''
 778
 779     def makeRead(self):
 780         if self.read_code != None:
 781             return self.buildReadCode('readMicroPC')
 782         return '%s = xc->readMicroPC();\n' % self.base_name
 783
 784     def makeWrite(self):
 785         if self.write_code != None:
 786             return self.buildWriteCode('setMicroPC')
 787         return 'xc->setMicroPC(%s);\n' % self.base_name
 788
 789 class NUPCOperand(Operand):
 790     def makeConstructor(self):
 791         return ''
 792
 793     def makeRead(self):
 794         if self.read_code != None:
 795             return self.buildReadCode('readNextMicroPC')
 796         return '%s = xc->readNextMicroPC();\n' % self.base_name
 797
 798     def makeWrite(self):
 799         if self.write_code != None:
 800             return self.buildWriteCode('setNextMicroPC')
 801         return 'xc->setNextMicroPC(%s);\n' % self.base_name
 802
 803 class NPCOperand(Operand):
 804     def makeConstructor(self):
 805         return ''
 806
 807     def makeRead(self):
 808         if self.read_code != None:
 809             return self.buildReadCode('readNextPC')
 810         return '%s = xc->readNextPC();\n' % self.base_name
 811
 812     def makeWrite(self):
 813         if self.write_code != None:
 814             return self.buildWriteCode('setNextPC')
 815         return 'xc->setNextPC(%s);\n' % self.base_name
 816
 817 class NNPCOperand(Operand):
 818     def makeConstructor(self):
 819         return ''
 820
 821     def makeRead(self):
 822         if self.read_code != None:
 823             return self.buildReadCode('readNextNPC')
 824         return '%s = xc->readNextNPC();\n' % self.base_name
 825
 826     def makeWrite(self):
 827         if self.write_code != None:
 828             return self.buildWriteCode('setNextNPC')
 829         return 'xc->setNextNPC(%s);\n' % self.base_name
 830
 831 def buildOperandNameMap(user_dict, lineno):
 832     global operandNameMap
 833     operandNameMap = {}
 834     for (op_name, val) in user_dict.iteritems():
 835         (base_cls_name, dflt_ext, reg_spec, flags, sort_pri) = val[:5]
 836         if len(val) > 5:
 837             read_code = val[5]
 838         else:
 839             read_code = None
 840         if len(val) > 6:
 841             write_code = val[6]
 842         else:
 843             write_code = None
 844         if len(val) > 7:
 845             error(lineno,
 846                   'error: too many attributes for operand "%s"' %
 847                   base_cls_name)
 848
 849         (dflt_size, dflt_ctype, dflt_is_signed) = operandTypeMap[dflt_ext]
 850         # Canonical flag structure is a triple of lists, where each list
 851         # indicates the set of flags implied by this operand always, when
 852         # used as a source, and when used as a dest, respectively.
 853         # For simplicity this can be initialized using a variety of fairly
 854         # obvious shortcuts; we convert these to canonical form here.
 855         if not flags:
 856             # no flags specified (e.g., 'None')
 857             flags = ( [], [], [] )
 858         elif isinstance(flags, str):
 859             # a single flag: assumed to be unconditional
 860             flags = ( [ flags ], [], [] )
 861         elif isinstance(flags, list):
 862             # a list of flags: also assumed to be unconditional
 863             flags = ( flags, [], [] )
 864         elif isinstance(flags, tuple):
 865             # it's a tuple: it should be a triple,
 866             # but each item could be a single string or a list
 867             (uncond_flags, src_flags, dest_flags) = flags
 868             flags = (makeList(uncond_flags),
 869                      makeList(src_flags), makeList(dest_flags))
 870         # Accumulate attributes of new operand class in tmp_dict
 871         tmp_dict = {}
 872         for attr in ('dflt_ext', 'reg_spec', 'flags', 'sort_pri',
 873                      'dflt_size', 'dflt_ctype', 'dflt_is_signed',
 874                      'read_code', 'write_code'):
 875             tmp_dict[attr] = eval(attr)
 876         tmp_dict['base_name'] = op_name
 877         # New class name will be e.g. "IntReg_Ra"
 878         cls_name = base_cls_name + '_' + op_name
 879         # Evaluate string arg to get class object.  Note that the
 880         # actual base class for "IntReg" is "IntRegOperand", i.e. we
 881         # have to append "Operand".
 882         try:
 883             base_cls = eval(base_cls_name + 'Operand')
 884         except NameError:
 885             error(lineno,
 886                   'error: unknown operand base class "%s"' % base_cls_name)
 887         # The following statement creates a new class called
 888         # <cls_name> as a subclass of <base_cls> with the attributes
 889         # in tmp_dict, just as if we evaluated a class declaration.
 890         operandNameMap[op_name] = type(cls_name, (base_cls,), tmp_dict)
 891
 892     # Define operand variables.
 893     operands = user_dict.keys()
 894
 895     operandsREString = (r'''
 896     (?<![\w\.])      # neg. lookbehind assertion: prevent partial matches
 897     ((%s)(?:\.(\w+))?)   # match: operand with optional '.' then suffix
 898     (?![\w\.])       # neg. lookahead assertion: prevent partial matches
 899     '''
 900                         % string.join(operands, '|'))
 901
 902     global operandsRE
 903     operandsRE = re.compile(operandsREString, re.MULTILINE|re.VERBOSE)
 904
 905     # Same as operandsREString, but extension is mandatory, and only two
 906     # groups are returned (base and ext, not full name as above).
 907     # Used for subtituting '_' for '.' to make C++ identifiers.
 908     operandsWithExtREString = (r'(?<![\w\.])(%s)\.(\w+)(?![\w\.])'
 909                                % string.join(operands, '|'))
 910
 911     global operandsWithExtRE
 912     operandsWithExtRE = re.compile(operandsWithExtREString, re.MULTILINE)
 913
 914 maxInstSrcRegs = 0
 915 maxInstDestRegs = 0
 916
 917 class OperandList(object):
 918     '''Find all the operands in the given code block.  Returns an operand
 919     descriptor list (instance of class OperandList).'''
 920     def __init__(self, code):
 921         self.items = []
 922         self.bases = {}
 923         # delete comments so we don't match on reg specifiers inside
 924         code = commentRE.sub('', code)
 925         # search for operands
 926         next_pos = 0
 927         while 1:
 928             match = operandsRE.search(code, next_pos)
 929             if not match:
 930                 # no more matches: we're done
 931                 break
 932             op = match.groups()
 933             # regexp groups are operand full name, base, and extension
 934             (op_full, op_base, op_ext) = op
 935             # if the token following the operand is an assignment, this is
 936             # a destination (LHS), else it's a source (RHS)
 937             is_dest = (assignRE.match(code, match.end()) != None)
 938             is_src = not is_dest
 939             # see if we've already seen this one
 940             op_desc = self.find_base(op_base)
 941             if op_desc:
 942                 if op_desc.ext != op_ext:
 943                     error('Inconsistent extensions for operand %s' % \
 944                           op_base)
 945                 op_desc.is_src = op_desc.is_src or is_src
 946                 op_desc.is_dest = op_desc.is_dest or is_dest
 947             else:
 948                 # new operand: create new descriptor
 949                 op_desc = operandNameMap[op_base](op_full, op_ext,
 950                                                   is_src, is_dest)
 951                 self.append(op_desc)
 952             # start next search after end of current match
 953             next_pos = match.end()
 954         self.sort()
 955         # enumerate source & dest register operands... used in building
 956         # constructor later
 957         self.numSrcRegs = 0
 958         self.numDestRegs = 0
 959         self.numFPDestRegs = 0
 960         self.numIntDestRegs = 0
 961         self.memOperand = None
 962         for op_desc in self.items:
 963             if op_desc.isReg():
 964                 if op_desc.is_src:
 965                     op_desc.src_reg_idx = self.numSrcRegs
 966                     self.numSrcRegs += 1
 967                 if op_desc.is_dest:
 968                     op_desc.dest_reg_idx = self.numDestRegs
 969                     self.numDestRegs += 1
 970                     if op_desc.isFloatReg():
 971                         self.numFPDestRegs += 1
 972                     elif op_desc.isIntReg():
 973                         self.numIntDestRegs += 1
 974             elif op_desc.isMem():
 975                 if self.memOperand:
 976                     error("Code block has more than one memory operand.")
 977                 self.memOperand = op_desc
 978         global maxInstSrcRegs
 979         global maxInstDestRegs
 980         if maxInstSrcRegs < self.numSrcRegs:
 981             maxInstSrcRegs = self.numSrcRegs
 982         if maxInstDestRegs < self.numDestRegs:
 983             maxInstDestRegs = self.numDestRegs
 984         # now make a final pass to finalize op_desc fields that may depend
 985         # on the register enumeration
 986         for op_desc in self.items:
 987             op_desc.finalize()
 988
 989     def __len__(self):
 990         return len(self.items)
 991
 992     def __getitem__(self, index):
 993         return self.items[index]
 994
 995     def append(self, op_desc):
 996         self.items.append(op_desc)
 997         self.bases[op_desc.base_name] = op_desc
 998
 999     def find_base(self, base_name):
1000         # like self.bases[base_name], but returns None if not found
1001         # (rather than raising exception)
1002         return self.bases.get(base_name)
1003
1004     # internal helper function for concat[Some]Attr{Strings|Lists}
1005     def __internalConcatAttrs(self, attr_name, filter, result):
1006         for op_desc in self.items:
1007             if filter(op_desc):
1008                 result += getattr(op_desc, attr_name)
1009         return result
1010
1011     # return a single string that is the concatenation of the (string)
1012     # values of the specified attribute for all operands
1013     def concatAttrStrings(self, attr_name):
1014         return self.__internalConcatAttrs(attr_name, lambda x: 1, '')
1015
1016     # like concatAttrStrings, but only include the values for the operands
1017     # for which the provided filter function returns true
1018     def concatSomeAttrStrings(self, filter, attr_name):
1019         return self.__internalConcatAttrs(attr_name, filter, '')
1020
1021     # return a single list that is the concatenation of the (list)
1022     # values of the specified attribute for all operands
1023     def concatAttrLists(self, attr_name):
1024         return self.__internalConcatAttrs(attr_name, lambda x: 1, [])
1025
1026     # like concatAttrLists, but only include the values for the operands
1027     # for which the provided filter function returns true
1028     def concatSomeAttrLists(self, filter, attr_name):
1029         return self.__internalConcatAttrs(attr_name, filter, [])
1030
1031     def sort(self):
1032         self.items.sort(lambda a, b: a.sort_pri - b.sort_pri)
1033
1034 class SubOperandList(OperandList):
1035     '''Find all the operands in the given code block.  Returns an operand
1036     descriptor list (instance of class OperandList).'''
1037     def __init__(self, code, master_list):
1038         self.items = []
1039         self.bases = {}
1040         # delete comments so we don't match on reg specifiers inside
1041         code = commentRE.sub('', code)
1042         # search for operands
1043         next_pos = 0
1044         while 1:
1045             match = operandsRE.search(code, next_pos)
1046             if not match:
1047                 # no more matches: we're done
1048                 break
1049             op = match.groups()
1050             # regexp groups are operand full name, base, and extension
1051             (op_full, op_base, op_ext) = op
1052             # find this op in the master list
1053             op_desc = master_list.find_base(op_base)
1054             if not op_desc:
1055                 error('Found operand %s which is not in the master list!' \
1056                       ' This is an internal error' % op_base)
1057             else:
1058                 # See if we've already found this operand
1059                 op_desc = self.find_base(op_base)
1060                 if not op_desc:
1061                     # if not, add a reference to it to this sub list
1062                     self.append(master_list.bases[op_base])
1063
1064             # start next search after end of current match
1065             next_pos = match.end()
1066         self.sort()
1067         self.memOperand = None
1068         for op_desc in self.items:
1069             if op_desc.isMem():
1070                 if self.memOperand:
1071                     error("Code block has more than one memory operand.")
1072                 self.memOperand = op_desc
1073
1074 # Regular expression object to match C++ comments
1075 # (used in findOperands())
1076 commentRE = re.compile(r'//.*\n')
1077
1078 # Regular expression object to match assignment statements
1079 # (used in findOperands())
1080 assignRE = re.compile(r'\s*=(?!=)', re.MULTILINE)
1081
1082 # Munge operand names in code string to make legal C++ variable names.
1083 # This means getting rid of the type extension if any.
1084 # (Will match base_name attribute of Operand object.)
1085 def substMungedOpNames(code):
1086     return operandsWithExtRE.sub(r'\1', code)
1087
1088 # Fix up code snippets for final substitution in templates.
1089 def mungeSnippet(s):
1090     if isinstance(s, str):
1091         return substMungedOpNames(substBitOps(s))
1092     else:
1093         return s
1094
1095 def makeFlagConstructor(flag_list):
1096     if len(flag_list) == 0:
1097         return ''
1098     # filter out repeated flags
1099     flag_list.sort()
1100     i = 1
1101     while i < len(flag_list):
1102         if flag_list[i] == flag_list[i-1]:
1103             del flag_list[i]
1104         else:
1105             i += 1
1106     pre = '\n\tflags['
1107     post = '] = true;'
1108     code = pre + string.join(flag_list, post + pre) + post
1109     return code
1110
1111 # Assume all instruction flags are of the form 'IsFoo'
1112 instFlagRE = re.compile(r'Is.*')
1113
1114 # OpClass constants end in 'Op' except No_OpClass
1115 opClassRE = re.compile(r'.*Op|No_OpClass')
1116
1117 class InstObjParams(object):
1118     def __init__(self, mnem, class_name, base_class = '',
1119                  snippets = {}, opt_args = []):
1120         self.mnemonic = mnem
1121         self.class_name = class_name
1122         self.base_class = base_class
1123         if not isinstance(snippets, dict):
1124             snippets = {'code' : snippets}
1125         compositeCode = ' '.join(map(str, snippets.values()))
1126         self.snippets = snippets
1127
1128         self.operands = OperandList(compositeCode)
1129         self.constructor = self.operands.concatAttrStrings('constructor')
1130         self.constructor += \
1131                  '\n\t_numSrcRegs = %d;' % self.operands.numSrcRegs
1132         self.constructor += \
1133                  '\n\t_numDestRegs = %d;' % self.operands.numDestRegs
1134         self.constructor += \
1135                  '\n\t_numFPDestRegs = %d;' % self.operands.numFPDestRegs
1136         self.constructor += \
1137                  '\n\t_numIntDestRegs = %d;' % self.operands.numIntDestRegs
1138         self.flags = self.operands.concatAttrLists('flags')
1139
1140         # Make a basic guess on the operand class (function unit type).
1141         # These are good enough for most cases, and can be overridden
1142         # later otherwise.
1143         if 'IsStore' in self.flags:
1144             self.op_class = 'MemWriteOp'
1145         elif 'IsLoad' in self.flags or 'IsPrefetch' in self.flags:
1146             self.op_class = 'MemReadOp'
1147         elif 'IsFloating' in self.flags:
1148             self.op_class = 'FloatAddOp'
1149         else:
1150             self.op_class = 'IntAluOp'
1151
1152         # Optional arguments are assumed to be either StaticInst flags
1153         # or an OpClass value.  To avoid having to import a complete
1154         # list of these values to match against, we do it ad-hoc
1155         # with regexps.
1156         for oa in opt_args:
1157             if instFlagRE.match(oa):
1158                 self.flags.append(oa)
1159             elif opClassRE.match(oa):
1160                 self.op_class = oa
1161             else:
1162                 error('InstObjParams: optional arg "%s" not recognized '
1163                       'as StaticInst::Flag or OpClass.' % oa)
1164
1165         # add flag initialization to contructor here to include
1166         # any flags added via opt_args
1167         self.constructor += makeFlagConstructor(self.flags)
1168
1169         # if 'IsFloating' is set, add call to the FP enable check
1170         # function (which should be provided by isa_desc via a declare)
1171         if 'IsFloating' in self.flags:
1172             self.fp_enable_check = 'fault = checkFpEnableFault(xc);'
1173         else:
1174             self.fp_enable_check = ''
1175
1176 ##############
1177 # Stack: a simple stack object.  Used for both formats (formatStack)
1178 # and default cases (defaultStack).  Simply wraps a list to give more
1179 # stack-like syntax and enable initialization with an argument list
1180 # (as opposed to an argument that's a list).
1181
1182 class Stack(list):
1183     def __init__(self, *items):
1184         list.__init__(self, items)
1185
1186     def push(self, item):
1187         self.append(item);
1188
1189     def top(self):
1190         return self[-1]
1191
1192 # The global format stack.
1193 formatStack = Stack(NoFormat())
1194
1195 # The global default case stack.
1196 defaultStack = Stack(None)
1197
1198 # Global stack that tracks current file and line number.
1199 # Each element is a tuple (filename, lineno) that records the
1200 # *current* filename and the line number in the *previous* file where
1201 # it was included.
1202 fileNameStack = Stack()
1203
1204
1205 #######################
1206 #
1207 # Output file template
1208 #
1209
1210 file_template = '''
1211 /*
1212  * DO NOT EDIT THIS FILE!!!
1213  *
1214  * It was automatically generated from the ISA description in %(filename)s
1215  */
1216
1217 %(includes)s
1218
1219 %(global_output)s
1220
1221 namespace %(namespace)s {
1222
1223 %(namespace_output)s
1224
1225 } // namespace %(namespace)s
1226
1227 %(decode_function)s
1228 '''
1229
1230 max_inst_regs_template = '''
1231 /*
1232  * DO NOT EDIT THIS FILE!!!
1233  *
1234  * It was automatically generated from the ISA description in %(filename)s
1235  */
1236
1237 namespace %(namespace)s {
1238
1239     const int MaxInstSrcRegs = %(MaxInstSrcRegs)d;
1240     const int MaxInstDestRegs = %(MaxInstDestRegs)d;
1241
1242 } // namespace %(namespace)s
1243
1244 '''
1245
1246 class ISAParser(Grammar):
1247     def __init__(self, output_dir):
1248         super(ISAParser, self).__init__()
1249         self.output_dir = output_dir
1250
1251         self.templateMap = {}
1252
1253     #####################################################################
1254     #
1255     #                                Lexer
1256     #
1257     # The PLY lexer module takes two things as input:
1258     # - A list of token names (the string list 'tokens')
1259     # - A regular expression describing a match for each token.  The
1260     #   regexp for token FOO can be provided in two ways:
1261     #   - as a string variable named t_FOO
1262     #   - as the doc string for a function named t_FOO.  In this case,
1263     #     the function is also executed, allowing an action to be
1264     #     associated with each token match.
1265     #
1266     #####################################################################
1267
1268     # Reserved words.  These are listed separately as they are matched
1269     # using the same regexp as generic IDs, but distinguished in the
1270     # t_ID() function.  The PLY documentation suggests this approach.
1271     reserved = (
1272         'BITFIELD', 'DECODE', 'DECODER', 'DEFAULT', 'DEF', 'EXEC', 'FORMAT',
1273         'HEADER', 'LET', 'NAMESPACE', 'OPERAND_TYPES', 'OPERANDS',
1274         'OUTPUT', 'SIGNED', 'TEMPLATE'
1275         )
1276
1277     # List of tokens.  The lex module requires this.
1278     tokens = reserved + (
1279         # identifier
1280         'ID',
1281
1282         # integer literal
1283         'INTLIT',
1284
1285         # string literal
1286         'STRLIT',
1287
1288         # code literal
1289         'CODELIT',
1290
1291         # ( ) [ ] { } < > , ; . : :: *
1292         'LPAREN', 'RPAREN',
1293         'LBRACKET', 'RBRACKET',
1294         'LBRACE', 'RBRACE',
1295         'LESS', 'GREATER', 'EQUALS',
1296         'COMMA', 'SEMI', 'DOT', 'COLON', 'DBLCOLON',
1297         'ASTERISK',
1298
1299         # C preprocessor directives
1300         'CPPDIRECTIVE'
1301
1302     # The following are matched but never returned. commented out to
1303     # suppress PLY warning
1304         # newfile directive
1305     #    'NEWFILE',
1306
1307         # endfile directive
1308     #    'ENDFILE'
1309     )
1310
1311     # Regular expressions for token matching
1312     t_LPAREN           = r'\('
1313     t_RPAREN           = r'\)'
1314     t_LBRACKET         = r'\['
1315     t_RBRACKET         = r'\]'
1316     t_LBRACE           = r'\{'
1317     t_RBRACE           = r'\}'
1318     t_LESS             = r'\<'
1319     t_GREATER          = r'\>'
1320     t_EQUALS           = r'='
1321     t_COMMA            = r','
1322     t_SEMI             = r';'
1323     t_DOT              = r'\.'
1324     t_COLON            = r':'
1325     t_DBLCOLON         = r'::'
1326     t_ASTERISK         = r'\*'
1327
1328     # Identifiers and reserved words
1329     reserved_map = { }
1330     for r in reserved:
1331         reserved_map[r.lower()] = r
1332
1333     def t_ID(self, t):
1334         r'[A-Za-z_]\w*'
1335         t.type = self.reserved_map.get(t.value, 'ID')
1336         return t
1337
1338     # Integer literal
1339     def t_INTLIT(self, t):
1340         r'-?(0x[\da-fA-F]+)|\d+'
1341         try:
1342             t.value = int(t.value,0)
1343         except ValueError:
1344             error(t, 'Integer value "%s" too large' % t.value)
1345             t.value = 0
1346         return t
1347
1348     # String literal.  Note that these use only single quotes, and
1349     # can span multiple lines.
1350     def t_STRLIT(self, t):
1351         r"(?m)'([^'])+'"
1352         # strip off quotes
1353         t.value = t.value[1:-1]
1354         t.lexer.lineno += t.value.count('\n')
1355         return t
1356
1357
1358     # "Code literal"... like a string literal, but delimiters are
1359     # '{{' and '}}' so they get formatted nicely under emacs c-mode
1360     def t_CODELIT(self, t):
1361         r"(?m)\{\{([^\}]|}(?!\}))+\}\}"
1362         # strip off {{ & }}
1363         t.value = t.value[2:-2]
1364         t.lexer.lineno += t.value.count('\n')
1365         return t
1366
1367     def t_CPPDIRECTIVE(self, t):
1368         r'^\#[^\#].*\n'
1369         t.lexer.lineno += t.value.count('\n')
1370         return t
1371
1372     def t_NEWFILE(self, t):
1373         r'^\#\#newfile\s+"[\w/.-]*"'
1374         fileNameStack.push((t.value[11:-1], t.lexer.lineno))
1375         t.lexer.lineno = 0
1376
1377     def t_ENDFILE(self, t):
1378         r'^\#\#endfile'
1379         (old_filename, t.lexer.lineno) = fileNameStack.pop()
1380
1381     #
1382     # The functions t_NEWLINE, t_ignore, and t_error are
1383     # special for the lex module.
1384     #
1385
1386     # Newlines
1387     def t_NEWLINE(self, t):
1388         r'\n+'
1389         t.lexer.lineno += t.value.count('\n')
1390
1391     # Comments
1392     def t_comment(self, t):
1393         r'//.*'
1394
1395     # Completely ignored characters
1396     t_ignore = ' \t\x0c'
1397
1398     # Error handler
1399     def t_error(self, t):
1400         error(t, "illegal character '%s'" % t.value[0])
1401         t.skip(1)
1402
1403     #####################################################################
1404     #
1405     #                                Parser
1406     #
1407     # Every function whose name starts with 'p_' defines a grammar
1408     # rule.  The rule is encoded in the function's doc string, while
1409     # the function body provides the action taken when the rule is
1410     # matched.  The argument to each function is a list of the values
1411     # of the rule's symbols: t[0] for the LHS, and t[1..n] for the
1412     # symbols on the RHS.  For tokens, the value is copied from the
1413     # t.value attribute provided by the lexer.  For non-terminals, the
1414     # value is assigned by the producing rule; i.e., the job of the
1415     # grammar rule function is to set the value for the non-terminal
1416     # on the LHS (by assigning to t[0]).
1417     #####################################################################
1418
1419     # The LHS of the first grammar rule is used as the start symbol
1420     # (in this case, 'specification').  Note that this rule enforces
1421     # that there will be exactly one namespace declaration, with 0 or
1422     # more global defs/decls before and after it.  The defs & decls
1423     # before the namespace decl will be outside the namespace; those
1424     # after will be inside.  The decoder function is always inside the
1425     # namespace.
1426     def p_specification(self, t):
1427         'specification : opt_defs_and_outputs name_decl opt_defs_and_outputs decode_block'
1428         global_code = t[1]
1429         isa_name = t[2]
1430         namespace = isa_name + "Inst"
1431         # wrap the decode block as a function definition
1432         t[4].wrap_decode_block('''
1433 StaticInstPtr
1434 %(isa_name)s::decodeInst(%(isa_name)s::ExtMachInst machInst)
1435 {
1436     using namespace %(namespace)s;
1437 ''' % vars(), '}')
1438         # both the latter output blocks and the decode block are in
1439         # the namespace
1440         namespace_code = t[3] + t[4]
1441         # pass it all back to the caller of yacc.parse()
1442         t[0] = (isa_name, namespace, global_code, namespace_code)
1443
1444     # ISA name declaration looks like "namespace <foo>;"
1445     def p_name_decl(self, t):
1446         'name_decl : NAMESPACE ID SEMI'
1447         t[0] = t[2]
1448
1449     # 'opt_defs_and_outputs' is a possibly empty sequence of
1450     # def and/or output statements.
1451     def p_opt_defs_and_outputs_0(self, t):
1452         'opt_defs_and_outputs : empty'
1453         t[0] = GenCode()
1454
1455     def p_opt_defs_and_outputs_1(self, t):
1456         'opt_defs_and_outputs : defs_and_outputs'
1457         t[0] = t[1]
1458
1459     def p_defs_and_outputs_0(self, t):
1460         'defs_and_outputs : def_or_output'
1461         t[0] = t[1]
1462
1463     def p_defs_and_outputs_1(self, t):
1464         'defs_and_outputs : defs_and_outputs def_or_output'
1465         t[0] = t[1] + t[2]
1466
1467     # The list of possible definition/output statements.
1468     def p_def_or_output(self, t):
1469         '''def_or_output : def_format
1470                          | def_bitfield
1471                          | def_bitfield_struct
1472                          | def_template
1473                          | def_operand_types
1474                          | def_operands
1475                          | output_header
1476                          | output_decoder
1477                          | output_exec
1478                          | global_let'''
1479         t[0] = t[1]
1480
1481     # Output blocks 'output <foo> {{...}}' (C++ code blocks) are copied
1482     # directly to the appropriate output section.
1483
1484     # Massage output block by substituting in template definitions and
1485     # bit operators.  We handle '%'s embedded in the string that don't
1486     # indicate template substitutions (or CPU-specific symbols, which
1487     # get handled in GenCode) by doubling them first so that the
1488     # format operation will reduce them back to single '%'s.
1489     def process_output(self, s):
1490         s = protect_non_subst_percents(s)
1491         # protects cpu-specific symbols too
1492         s = protect_cpu_symbols(s)
1493         return substBitOps(s % self.templateMap)
1494
1495     def p_output_header(self, t):
1496         'output_header : OUTPUT HEADER CODELIT SEMI'
1497         t[0] = GenCode(header_output = self.process_output(t[3]))
1498
1499     def p_output_decoder(self, t):
1500         'output_decoder : OUTPUT DECODER CODELIT SEMI'
1501         t[0] = GenCode(decoder_output = self.process_output(t[3]))
1502
1503     def p_output_exec(self, t):
1504         'output_exec : OUTPUT EXEC CODELIT SEMI'
1505         t[0] = GenCode(exec_output = self.process_output(t[3]))
1506
1507     # global let blocks 'let {{...}}' (Python code blocks) are
1508     # executed directly when seen.  Note that these execute in a
1509     # special variable context 'exportContext' to prevent the code
1510     # from polluting this script's namespace.
1511     def p_global_let(self, t):
1512         'global_let : LET CODELIT SEMI'
1513         updateExportContext()
1514         exportContext["header_output"] = ''
1515         exportContext["decoder_output"] = ''
1516         exportContext["exec_output"] = ''
1517         exportContext["decode_block"] = ''
1518         try:
1519             exec fixPythonIndentation(t[2]) in exportContext
1520         except Exception, exc:
1521             error(t, 'error: %s in global let block "%s".' % (exc, t[2]))
1522         t[0] = GenCode(header_output = exportContext["header_output"],
1523                        decoder_output = exportContext["decoder_output"],
1524                        exec_output = exportContext["exec_output"],
1525                        decode_block = exportContext["decode_block"])
1526
1527     # Define the mapping from operand type extensions to C++ types and
1528     # bit widths (stored in operandTypeMap).
1529     def p_def_operand_types(self, t):
1530         'def_operand_types : DEF OPERAND_TYPES CODELIT SEMI'
1531         try:
1532             user_dict = eval('{' + t[3] + '}')
1533         except Exception, exc:
1534             error(t,
1535                   'error: %s in def operand_types block "%s".' % (exc, t[3]))
1536         buildOperandTypeMap(user_dict, t.lexer.lineno)
1537         t[0] = GenCode() # contributes nothing to the output C++ file
1538
1539     # Define the mapping from operand names to operand classes and
1540     # other traits.  Stored in operandNameMap.
1541     def p_def_operands(self, t):
1542         'def_operands : DEF OPERANDS CODELIT SEMI'
1543         if not globals().has_key('operandTypeMap'):
1544             error(t, 'error: operand types must be defined before operands')
1545         try:
1546             user_dict = eval('{' + t[3] + '}', exportContext)
1547         except Exception, exc:
1548             error(t, 'error: %s in def operands block "%s".' % (exc, t[3]))
1549         buildOperandNameMap(user_dict, t.lexer.lineno)
1550         t[0] = GenCode() # contributes nothing to the output C++ file
1551
1552     # A bitfield definition looks like:
1553     # 'def [signed] bitfield <ID> [<first>:<last>]'
1554     # This generates a preprocessor macro in the output file.
1555     def p_def_bitfield_0(self, t):
1556         'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT COLON INTLIT GREATER SEMI'
1557         expr = 'bits(machInst, %2d, %2d)' % (t[6], t[8])
1558         if (t[2] == 'signed'):
1559             expr = 'sext<%d>(%s)' % (t[6] - t[8] + 1, expr)
1560         hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1561         t[0] = GenCode(header_output = hash_define)
1562
1563     # alternate form for single bit: 'def [signed] bitfield <ID> [<bit>]'
1564     def p_def_bitfield_1(self, t):
1565         'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT GREATER SEMI'
1566         expr = 'bits(machInst, %2d, %2d)' % (t[6], t[6])
1567         if (t[2] == 'signed'):
1568             expr = 'sext<%d>(%s)' % (1, expr)
1569         hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1570         t[0] = GenCode(header_output = hash_define)
1571
1572     # alternate form for structure member: 'def bitfield <ID> <ID>'
1573     def p_def_bitfield_struct(self, t):
1574         'def_bitfield_struct : DEF opt_signed BITFIELD ID id_with_dot SEMI'
1575         if (t[2] != ''):
1576             error(t, 'error: structure bitfields are always unsigned.')
1577         expr = 'machInst.%s' % t[5]
1578         hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1579         t[0] = GenCode(header_output = hash_define)
1580
1581     def p_id_with_dot_0(self, t):
1582         'id_with_dot : ID'
1583         t[0] = t[1]
1584
1585     def p_id_with_dot_1(self, t):
1586         'id_with_dot : ID DOT id_with_dot'
1587         t[0] = t[1] + t[2] + t[3]
1588
1589     def p_opt_signed_0(self, t):
1590         'opt_signed : SIGNED'
1591         t[0] = t[1]
1592
1593     def p_opt_signed_1(self, t):
1594         'opt_signed : empty'
1595         t[0] = ''
1596
1597     def p_def_template(self, t):
1598         'def_template : DEF TEMPLATE ID CODELIT SEMI'
1599         self.templateMap[t[3]] = Template(t[4])
1600         t[0] = GenCode()
1601
1602     # An instruction format definition looks like
1603     # "def format <fmt>(<params>) {{...}};"
1604     def p_def_format(self, t):
1605         'def_format : DEF FORMAT ID LPAREN param_list RPAREN CODELIT SEMI'
1606         (id, params, code) = (t[3], t[5], t[7])
1607         defFormat(id, params, code, t.lexer.lineno)
1608         t[0] = GenCode()
1609
1610     # The formal parameter list for an instruction format is a
1611     # possibly empty list of comma-separated parameters.  Positional
1612     # (standard, non-keyword) parameters must come first, followed by
1613     # keyword parameters, followed by a '*foo' parameter that gets
1614     # excess positional arguments (as in Python).  Each of these three
1615     # parameter categories is optional.
1616     #
1617     # Note that we do not support the '**foo' parameter for collecting
1618     # otherwise undefined keyword args.  Otherwise the parameter list
1619     # is (I believe) identical to what is supported in Python.
1620     #
1621     # The param list generates a tuple, where the first element is a
1622     # list of the positional params and the second element is a dict
1623     # containing the keyword params.
1624     def p_param_list_0(self, t):
1625         'param_list : positional_param_list COMMA nonpositional_param_list'
1626         t[0] = t[1] + t[3]
1627
1628     def p_param_list_1(self, t):
1629         '''param_list : positional_param_list
1630                       | nonpositional_param_list'''
1631         t[0] = t[1]
1632
1633     def p_positional_param_list_0(self, t):
1634         'positional_param_list : empty'
1635         t[0] = []
1636
1637     def p_positional_param_list_1(self, t):
1638         'positional_param_list : ID'
1639         t[0] = [t[1]]
1640
1641     def p_positional_param_list_2(self, t):
1642         'positional_param_list : positional_param_list COMMA ID'
1643         t[0] = t[1] + [t[3]]
1644
1645     def p_nonpositional_param_list_0(self, t):
1646         'nonpositional_param_list : keyword_param_list COMMA excess_args_param'
1647         t[0] = t[1] + t[3]
1648
1649     def p_nonpositional_param_list_1(self, t):
1650         '''nonpositional_param_list : keyword_param_list
1651                                     | excess_args_param'''
1652         t[0] = t[1]
1653
1654     def p_keyword_param_list_0(self, t):
1655         'keyword_param_list : keyword_param'
1656         t[0] = [t[1]]
1657
1658     def p_keyword_param_list_1(self, t):
1659         'keyword_param_list : keyword_param_list COMMA keyword_param'
1660         t[0] = t[1] + [t[3]]
1661
1662     def p_keyword_param(self, t):
1663         'keyword_param : ID EQUALS expr'
1664         t[0] = t[1] + ' = ' + t[3].__repr__()
1665
1666     def p_excess_args_param(self, t):
1667         'excess_args_param : ASTERISK ID'
1668         # Just concatenate them: '*ID'.  Wrap in list to be consistent
1669         # with positional_param_list and keyword_param_list.
1670         t[0] = [t[1] + t[2]]
1671
1672     # End of format definition-related rules.
1673     ##############
1674
1675     #
1676     # A decode block looks like:
1677     #       decode <field1> [, <field2>]* [default <inst>] { ... }
1678     #
1679     def p_decode_block(self, t):
1680         'decode_block : DECODE ID opt_default LBRACE decode_stmt_list RBRACE'
1681         default_defaults = defaultStack.pop()
1682         codeObj = t[5]
1683         # use the "default defaults" only if there was no explicit
1684         # default statement in decode_stmt_list
1685         if not codeObj.has_decode_default:
1686             codeObj += default_defaults
1687         codeObj.wrap_decode_block('switch (%s) {\n' % t[2], '}\n')
1688         t[0] = codeObj
1689
1690     # The opt_default statement serves only to push the "default
1691     # defaults" onto defaultStack.  This value will be used by nested
1692     # decode blocks, and used and popped off when the current
1693     # decode_block is processed (in p_decode_block() above).
1694     def p_opt_default_0(self, t):
1695         'opt_default : empty'
1696         # no default specified: reuse the one currently at the top of
1697         # the stack
1698         defaultStack.push(defaultStack.top())
1699         # no meaningful value returned
1700         t[0] = None
1701
1702     def p_opt_default_1(self, t):
1703         'opt_default : DEFAULT inst'
1704         # push the new default
1705         codeObj = t[2]
1706         codeObj.wrap_decode_block('\ndefault:\n', 'break;\n')
1707         defaultStack.push(codeObj)
1708         # no meaningful value returned
1709         t[0] = None
1710
1711     def p_decode_stmt_list_0(self, t):
1712         'decode_stmt_list : decode_stmt'
1713         t[0] = t[1]
1714
1715     def p_decode_stmt_list_1(self, t):
1716         'decode_stmt_list : decode_stmt decode_stmt_list'
1717         if (t[1].has_decode_default and t[2].has_decode_default):
1718             error(t, 'Two default cases in decode block')
1719         t[0] = t[1] + t[2]
1720
1721     #
1722     # Decode statement rules
1723     #
1724     # There are four types of statements allowed in a decode block:
1725     # 1. Format blocks 'format <foo> { ... }'
1726     # 2. Nested decode blocks
1727     # 3. Instruction definitions.
1728     # 4. C preprocessor directives.
1729
1730
1731     # Preprocessor directives found in a decode statement list are
1732     # passed through to the output, replicated to all of the output
1733     # code streams.  This works well for ifdefs, so we can ifdef out
1734     # both the declarations and the decode cases generated by an
1735     # instruction definition.  Handling them as part of the grammar
1736     # makes it easy to keep them in the right place with respect to
1737     # the code generated by the other statements.
1738     def p_decode_stmt_cpp(self, t):
1739         'decode_stmt : CPPDIRECTIVE'
1740         t[0] = GenCode(t[1], t[1], t[1], t[1])
1741
1742     # A format block 'format <foo> { ... }' sets the default
1743     # instruction format used to handle instruction definitions inside
1744     # the block.  This format can be overridden by using an explicit
1745     # format on the instruction definition or with a nested format
1746     # block.
1747     def p_decode_stmt_format(self, t):
1748         'decode_stmt : FORMAT push_format_id LBRACE decode_stmt_list RBRACE'
1749         # The format will be pushed on the stack when 'push_format_id'
1750         # is processed (see below).  Once the parser has recognized
1751         # the full production (though the right brace), we're done
1752         # with the format, so now we can pop it.
1753         formatStack.pop()
1754         t[0] = t[4]
1755
1756     # This rule exists so we can set the current format (& push the
1757     # stack) when we recognize the format name part of the format
1758     # block.
1759     def p_push_format_id(self, t):
1760         'push_format_id : ID'
1761         try:
1762             formatStack.push(formatMap[t[1]])
1763             t[0] = ('', '// format %s' % t[1])
1764         except KeyError:
1765             error(t, 'instruction format "%s" not defined.' % t[1])
1766
1767     # Nested decode block: if the value of the current field matches
1768     # the specified constant, do a nested decode on some other field.
1769     def p_decode_stmt_decode(self, t):
1770         'decode_stmt : case_label COLON decode_block'
1771         label = t[1]
1772         codeObj = t[3]
1773         # just wrap the decoding code from the block as a case in the
1774         # outer switch statement.
1775         codeObj.wrap_decode_block('\n%s:\n' % label)
1776         codeObj.has_decode_default = (label == 'default')
1777         t[0] = codeObj
1778
1779     # Instruction definition (finally!).
1780     def p_decode_stmt_inst(self, t):
1781         'decode_stmt : case_label COLON inst SEMI'
1782         label = t[1]
1783         codeObj = t[3]
1784         codeObj.wrap_decode_block('\n%s:' % label, 'break;\n')
1785         codeObj.has_decode_default = (label == 'default')
1786         t[0] = codeObj
1787
1788     # The case label is either a list of one or more constants or
1789     # 'default'
1790     def p_case_label_0(self, t):
1791         'case_label : intlit_list'
1792         def make_case(intlit):
1793             if intlit >= 2**32:
1794                 return 'case ULL(%#x)' % intlit
1795             else:
1796                 return 'case %#x' % intlit
1797         t[0] = ': '.join(map(make_case, t[1]))
1798
1799     def p_case_label_1(self, t):
1800         'case_label : DEFAULT'
1801         t[0] = 'default'
1802
1803     #
1804     # The constant list for a decode case label must be non-empty, but
1805     # may have one or more comma-separated integer literals in it.
1806     #
1807     def p_intlit_list_0(self, t):
1808         'intlit_list : INTLIT'
1809         t[0] = [t[1]]
1810
1811     def p_intlit_list_1(self, t):
1812         'intlit_list : intlit_list COMMA INTLIT'
1813         t[0] = t[1]
1814         t[0].append(t[3])
1815
1816     # Define an instruction using the current instruction format
1817     # (specified by an enclosing format block).
1818     # "<mnemonic>(<args>)"
1819     def p_inst_0(self, t):
1820         'inst : ID LPAREN arg_list RPAREN'
1821         # Pass the ID and arg list to the current format class to deal with.
1822         currentFormat = formatStack.top()
1823         codeObj = currentFormat.defineInst(t[1], t[3], t.lexer.lineno)
1824         args = ','.join(map(str, t[3]))
1825         args = re.sub('(?m)^', '//', args)
1826         args = re.sub('^//', '', args)
1827         comment = '\n// %s::%s(%s)\n' % (currentFormat.id, t[1], args)
1828         codeObj.prepend_all(comment)
1829         t[0] = codeObj
1830
1831     # Define an instruction using an explicitly specified format:
1832     # "<fmt>::<mnemonic>(<args>)"
1833     def p_inst_1(self, t):
1834         'inst : ID DBLCOLON ID LPAREN arg_list RPAREN'
1835         try:
1836             format = formatMap[t[1]]
1837         except KeyError:
1838             error(t, 'instruction format "%s" not defined.' % t[1])
1839         codeObj = format.defineInst(t[3], t[5], t.lexer.lineno)
1840         comment = '\n// %s::%s(%s)\n' % (t[1], t[3], t[5])
1841         codeObj.prepend_all(comment)
1842         t[0] = codeObj
1843
1844     # The arg list generates a tuple, where the first element is a
1845     # list of the positional args and the second element is a dict
1846     # containing the keyword args.
1847     def p_arg_list_0(self, t):
1848         'arg_list : positional_arg_list COMMA keyword_arg_list'
1849         t[0] = ( t[1], t[3] )
1850
1851     def p_arg_list_1(self, t):
1852         'arg_list : positional_arg_list'
1853         t[0] = ( t[1], {} )
1854
1855     def p_arg_list_2(self, t):
1856         'arg_list : keyword_arg_list'
1857         t[0] = ( [], t[1] )
1858
1859     def p_positional_arg_list_0(self, t):
1860         'positional_arg_list : empty'
1861         t[0] = []
1862
1863     def p_positional_arg_list_1(self, t):
1864         'positional_arg_list : expr'
1865         t[0] = [t[1]]
1866
1867     def p_positional_arg_list_2(self, t):
1868         'positional_arg_list : positional_arg_list COMMA expr'
1869         t[0] = t[1] + [t[3]]
1870
1871     def p_keyword_arg_list_0(self, t):
1872         'keyword_arg_list : keyword_arg'
1873         t[0] = t[1]
1874
1875     def p_keyword_arg_list_1(self, t):
1876         'keyword_arg_list : keyword_arg_list COMMA keyword_arg'
1877         t[0] = t[1]
1878         t[0].update(t[3])
1879
1880     def p_keyword_arg(self, t):
1881         'keyword_arg : ID EQUALS expr'
1882         t[0] = { t[1] : t[3] }
1883
1884     #
1885     # Basic expressions.  These constitute the argument values of
1886     # "function calls" (i.e. instruction definitions in the decode
1887     # block) and default values for formal parameters of format
1888     # functions.
1889     #
1890     # Right now, these are either strings, integers, or (recursively)
1891     # lists of exprs (using Python square-bracket list syntax).  Note
1892     # that bare identifiers are trated as string constants here (since
1893     # there isn't really a variable namespace to refer to).
1894     #
1895     def p_expr_0(self, t):
1896         '''expr : ID
1897                 | INTLIT
1898                 | STRLIT
1899                 | CODELIT'''
1900         t[0] = t[1]
1901
1902     def p_expr_1(self, t):
1903         '''expr : LBRACKET list_expr RBRACKET'''
1904         t[0] = t[2]
1905
1906     def p_list_expr_0(self, t):
1907         'list_expr : expr'
1908         t[0] = [t[1]]
1909
1910     def p_list_expr_1(self, t):
1911         'list_expr : list_expr COMMA expr'
1912         t[0] = t[1] + [t[3]]
1913
1914     def p_list_expr_2(self, t):
1915         'list_expr : empty'
1916         t[0] = []
1917
1918     #
1919     # Empty production... use in other rules for readability.
1920     #
1921     def p_empty(self, t):
1922         'empty :'
1923         pass
1924
1925     # Parse error handler.  Note that the argument here is the
1926     # offending *token*, not a grammar symbol (hence the need to use
1927     # t.value)
1928     def p_error(self, t):
1929         if t:
1930             error(t, "syntax error at '%s'" % t.value)
1931         else:
1932             error("unknown syntax error")
1933
1934     # END OF GRAMMAR RULES
1935
1936     def update_if_needed(self, file, contents):
1937         '''Update the output file only if the new contents are
1938         different from the current contents.  Minimizes the files that
1939         need to be rebuilt after minor changes.'''
1940
1941         file = os.path.join(self.output_dir, file)
1942         update = False
1943         if os.access(file, os.R_OK):
1944             f = open(file, 'r')
1945             old_contents = f.read()
1946             f.close()
1947             if contents != old_contents:
1948                 print 'Updating', file
1949                 os.remove(file) # in case it's write-protected
1950                 update = True
1951             else:
1952                 print 'File', file, 'is unchanged'
1953         else:
1954             print 'Generating', file
1955             update = True
1956         if update:
1957             f = open(file, 'w')
1958             f.write(contents)
1959             f.close()
1960
1961     # This regular expression matches '##include' directives
1962     includeRE = re.compile(r'^\s*##include\s+"(?P<filename>[\w/.-]*)".*$',
1963                            re.MULTILINE)
1964
1965     def replace_include(self, matchobj, dirname):
1966         """Function to replace a matched '##include' directive with the
1967         contents of the specified file (with nested ##includes
1968         replaced recursively).  'matchobj' is an re match object
1969         (from a match of includeRE) and 'dirname' is the directory
1970         relative to which the file path should be resolved."""
1971
1972         fname = matchobj.group('filename')
1973         full_fname = os.path.normpath(os.path.join(dirname, fname))
1974         contents = '##newfile "%s"\n%s\n##endfile\n' % \
1975                    (full_fname, self.read_and_flatten(full_fname))
1976         return contents
1977
1978     def read_and_flatten(self, filename):
1979         """Read a file and recursively flatten nested '##include' files."""
1980
1981         current_dir = os.path.dirname(filename)
1982         try:
1983             contents = open(filename).read()
1984         except IOError:
1985             error('Error including file "%s"' % filename)
1986
1987         fileNameStack.push((filename, 0))
1988
1989         # Find any includes and include them
1990         def replace(matchobj):
1991             return self.replace_include(matchobj, current_dir)
1992         contents = self.includeRE.sub(replace, contents)
1993
1994         fileNameStack.pop()
1995         return contents
1996
1997     def _parse_isa_desc(self, isa_desc_file):
1998         '''Read in and parse the ISA description.'''
1999
2000         # Read file and (recursively) all included files into a string.
2001         # PLY requires that the input be in a single string so we have to
2002         # do this up front.
2003         isa_desc = self.read_and_flatten(isa_desc_file)
2004
2005         # Initialize filename stack with outer file.
2006         fileNameStack.push((isa_desc_file, 0))
2007
2008         # Parse it.
2009         (isa_name, namespace, global_code, namespace_code) = \
2010                    self.parse(isa_desc)
2011
2012         # grab the last three path components of isa_desc_file to put in
2013         # the output
2014         filename = '/'.join(isa_desc_file.split('/')[-3:])
2015
2016         # generate decoder.hh
2017         includes = '#include "base/bitfield.hh" // for bitfield support'
2018         global_output = global_code.header_output
2019         namespace_output = namespace_code.header_output
2020         decode_function = ''
2021         self.update_if_needed('decoder.hh', file_template % vars())
2022
2023         # generate decoder.cc
2024         includes = '#include "decoder.hh"'
2025         global_output = global_code.decoder_output
2026         namespace_output = namespace_code.decoder_output
2027         # namespace_output += namespace_code.decode_block
2028         decode_function = namespace_code.decode_block
2029         self.update_if_needed('decoder.cc', file_template % vars())
2030
2031         # generate per-cpu exec files
2032         for cpu in cpu_models:
2033             includes = '#include "decoder.hh"\n'
2034             includes += cpu.includes
2035             global_output = global_code.exec_output[cpu.name]
2036             namespace_output = namespace_code.exec_output[cpu.name]
2037             decode_function = ''
2038             self.update_if_needed(cpu.filename, file_template % vars())
2039
2040         # The variable names here are hacky, but this will creat local
2041         # variables which will be referenced in vars() which have the
2042         # value of the globals.
2043         global maxInstSrcRegs
2044         MaxInstSrcRegs = maxInstSrcRegs
2045         global maxInstDestRegs
2046         MaxInstDestRegs = maxInstDestRegs
2047         # max_inst_regs.hh
2048         self.update_if_needed('max_inst_regs.hh',
2049                               max_inst_regs_template % vars())
2050
2051     def parse_isa_desc(self, *args, **kwargs):
2052         try:
2053             self._parse_isa_desc(*args, **kwargs)
2054         except ISAParserError, e:
2055             e.exit(fileNameStack)
2056
2057 # global list of CpuModel objects (see cpu_models.py)
2058 cpu_models = []
2059
2060 # Called as script: get args from command line.
2061 # Args are: <path to cpu_models.py> <isa desc file> <output dir> <cpu models>
2062 if __name__ == '__main__':
2063     execfile(sys.argv[1])  # read in CpuModel definitions
2064     cpu_models = [CpuModel.dict[cpu] for cpu in sys.argv[4:]]
2065     parser = ISAParser(sys.argv[3])
2066     parser.parse_isa_desc(sys.argv[2])