src/arch/isa_parser.py

   1 # Copyright (c) 2003-2005 The Regents of The University of Michigan
   2 # All rights reserved.
   3 #
   4 # Redistribution and use in source and binary forms, with or without
   5 # modification, are permitted provided that the following conditions are
   6 # met: redistributions of source code must retain the above copyright
   7 # notice, this list of conditions and the following disclaimer;
   8 # redistributions in binary form must reproduce the above copyright
   9 # notice, this list of conditions and the following disclaimer in the
  10 # documentation and/or other materials provided with the distribution;
  11 # neither the name of the copyright holders nor the names of its
  12 # contributors may be used to endorse or promote products derived from
  13 # this software without specific prior written permission.
  14 #
  15 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  16 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  17 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  18 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  19 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  20 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  21 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  22 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  23 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  25 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26 #
  27 # Authors: Steve Reinhardt
  28
  29 import os
  30 import sys
  31 import re
  32 import string
  33 import inspect, traceback
  34 # get type names
  35 from types import *
  36
  37 from m5.util.grammar import Grammar
  38
  39 debug=False
  40
  41 ###################
  42 # Utility functions
  43
  44 #
  45 # Indent every line in string 's' by two spaces
  46 # (except preprocessor directives).
  47 # Used to make nested code blocks look pretty.
  48 #
  49 def indent(s):
  50     return re.sub(r'(?m)^(?!#)', '  ', s)
  51
  52 #
  53 # Munge a somewhat arbitrarily formatted piece of Python code
  54 # (e.g. from a format 'let' block) into something whose indentation
  55 # will get by the Python parser.
  56 #
  57 # The two keys here are that Python will give a syntax error if
  58 # there's any whitespace at the beginning of the first line, and that
  59 # all lines at the same lexical nesting level must have identical
  60 # indentation.  Unfortunately the way code literals work, an entire
  61 # let block tends to have some initial indentation.  Rather than
  62 # trying to figure out what that is and strip it off, we prepend 'if
  63 # 1:' to make the let code the nested block inside the if (and have
  64 # the parser automatically deal with the indentation for us).
  65 #
  66 # We don't want to do this if (1) the code block is empty or (2) the
  67 # first line of the block doesn't have any whitespace at the front.
  68
  69 def fixPythonIndentation(s):
  70     # get rid of blank lines first
  71     s = re.sub(r'(?m)^\s*\n', '', s);
  72     if (s != '' and re.match(r'[ \t]', s[0])):
  73         s = 'if 1:\n' + s
  74     return s
  75
  76 class ISAParserError(Exception):
  77     """Error handler for parser errors"""
  78     def __init__(self, first, second=None):
  79         if second is None:
  80             self.lineno = 0
  81             self.string = first
  82         else:
  83             if hasattr(first, 'lexer'):
  84                 first = first.lexer.lineno
  85             self.lineno = first
  86             self.string = second
  87
  88     def display(self, filename_stack, print_traceback=debug):
  89         # Output formatted to work under Emacs compile-mode.  Optional
  90         # 'print_traceback' arg, if set to True, prints a Python stack
  91         # backtrace too (can be handy when trying to debug the parser
  92         # itself).
  93
  94         spaces = ""
  95         for (filename, line) in filename_stack[:-1]:
  96             print "%sIn file included from %s:" % (spaces, filename)
  97             spaces += "  "
  98
  99         # Print a Python stack backtrace if requested.
 100         if print_traceback or not self.lineno:
 101             traceback.print_exc()
 102
 103         line_str = "%s:" % (filename_stack[-1][0], )
 104         if self.lineno:
 105             line_str += "%d:" % (self.lineno, )
 106
 107         return "%s%s %s" % (spaces, line_str, self.string)
 108
 109     def exit(self, filename_stack, print_traceback=debug):
 110         # Just call exit.
 111
 112         sys.exit(self.display(filename_stack, print_traceback))
 113
 114 def error(*args):
 115     raise ISAParserError(*args)
 116
 117 ####################
 118 # Template objects.
 119 #
 120 # Template objects are format strings that allow substitution from
 121 # the attribute spaces of other objects (e.g. InstObjParams instances).
 122
 123 labelRE = re.compile(r'(?<!%)%\(([^\)]+)\)[sd]')
 124
 125 class Template(object):
 126     def __init__(self, parser, t):
 127         self.parser = parser
 128         self.template = t
 129
 130     def subst(self, d):
 131         myDict = None
 132
 133         # Protect non-Python-dict substitutions (e.g. if there's a printf
 134         # in the templated C++ code)
 135         template = self.parser.protectNonSubstPercents(self.template)
 136         # CPU-model-specific substitutions are handled later (in GenCode).
 137         template = self.parser.protectCpuSymbols(template)
 138
 139         # Build a dict ('myDict') to use for the template substitution.
 140         # Start with the template namespace.  Make a copy since we're
 141         # going to modify it.
 142         myDict = self.parser.templateMap.copy()
 143
 144         if isinstance(d, InstObjParams):
 145             # If we're dealing with an InstObjParams object, we need
 146             # to be a little more sophisticated.  The instruction-wide
 147             # parameters are already formed, but the parameters which
 148             # are only function wide still need to be generated.
 149             compositeCode = ''
 150
 151             myDict.update(d.__dict__)
 152             # The "operands" and "snippets" attributes of the InstObjParams
 153             # objects are for internal use and not substitution.
 154             del myDict['operands']
 155             del myDict['snippets']
 156
 157             snippetLabels = [l for l in labelRE.findall(template)
 158                              if d.snippets.has_key(l)]
 159
 160             snippets = dict([(s, self.parser.mungeSnippet(d.snippets[s]))
 161                              for s in snippetLabels])
 162
 163             myDict.update(snippets)
 164
 165             compositeCode = ' '.join(map(str, snippets.values()))
 166
 167             # Add in template itself in case it references any
 168             # operands explicitly (like Mem)
 169             compositeCode += ' ' + template
 170
 171             operands = SubOperandList(self.parser, compositeCode, d.operands)
 172
 173             myDict['op_decl'] = operands.concatAttrStrings('op_decl')
 174             if operands.readPC or operands.setPC:
 175                 myDict['op_decl'] += 'TheISA::PCState __parserAutoPCState;\n'
 176
 177             is_src = lambda op: op.is_src
 178             is_dest = lambda op: op.is_dest
 179
 180             myDict['op_src_decl'] = \
 181                       operands.concatSomeAttrStrings(is_src, 'op_src_decl')
 182             myDict['op_dest_decl'] = \
 183                       operands.concatSomeAttrStrings(is_dest, 'op_dest_decl')
 184             if operands.readPC:
 185                 myDict['op_src_decl'] += \
 186                     'TheISA::PCState __parserAutoPCState;\n'
 187             if operands.setPC:
 188                 myDict['op_dest_decl'] += \
 189                     'TheISA::PCState __parserAutoPCState;\n'
 190
 191             myDict['op_rd'] = operands.concatAttrStrings('op_rd')
 192             if operands.readPC:
 193                 myDict['op_rd'] = '__parserAutoPCState = xc->pcState();\n' + \
 194                                   myDict['op_rd']
 195
 196             # Compose the op_wb string. If we're going to write back the
 197             # PC state because we changed some of its elements, we'll need to
 198             # do that as early as possible. That allows later uncoordinated
 199             # modifications to the PC to layer appropriately.
 200             reordered = list(operands.items)
 201             reordered.reverse()
 202             op_wb_str = ''
 203             pcWbStr = 'xc->pcState(__parserAutoPCState);\n'
 204             for op_desc in reordered:
 205                 if op_desc.isPCPart() and op_desc.is_dest:
 206                     op_wb_str = op_desc.op_wb + pcWbStr + op_wb_str
 207                     pcWbStr = ''
 208                 else:
 209                     op_wb_str = op_desc.op_wb + op_wb_str
 210             myDict['op_wb'] = op_wb_str
 211
 212             if d.operands.memOperand:
 213                 myDict['mem_acc_size'] = d.operands.memOperand.mem_acc_size
 214                 myDict['mem_acc_type'] = d.operands.memOperand.mem_acc_type
 215
 216         elif isinstance(d, dict):
 217             # if the argument is a dictionary, we just use it.
 218             myDict.update(d)
 219         elif hasattr(d, '__dict__'):
 220             # if the argument is an object, we use its attribute map.
 221             myDict.update(d.__dict__)
 222         else:
 223             raise TypeError, "Template.subst() arg must be or have dictionary"
 224         return template % myDict
 225
 226     # Convert to string.  This handles the case when a template with a
 227     # CPU-specific term gets interpolated into another template or into
 228     # an output block.
 229     def __str__(self):
 230         return self.parser.expandCpuSymbolsToString(self.template)
 231
 232 ################
 233 # Format object.
 234 #
 235 # A format object encapsulates an instruction format.  It must provide
 236 # a defineInst() method that generates the code for an instruction
 237 # definition.
 238
 239 class Format(object):
 240     def __init__(self, id, params, code):
 241         self.id = id
 242         self.params = params
 243         label = 'def format ' + id
 244         self.user_code = compile(fixPythonIndentation(code), label, 'exec')
 245         param_list = string.join(params, ", ")
 246         f = '''def defInst(_code, _context, %s):
 247                 my_locals = vars().copy()
 248                 exec _code in _context, my_locals
 249                 return my_locals\n''' % param_list
 250         c = compile(f, label + ' wrapper', 'exec')
 251         exec c
 252         self.func = defInst
 253
 254     def defineInst(self, parser, name, args, lineno):
 255         parser.updateExportContext()
 256         context = parser.exportContext.copy()
 257         if len(name):
 258             Name = name[0].upper()
 259             if len(name) > 1:
 260                 Name += name[1:]
 261         context.update({ 'name' : name, 'Name' : Name })
 262         try:
 263             vars = self.func(self.user_code, context, *args[0], **args[1])
 264         except Exception, exc:
 265             if debug:
 266                 raise
 267             error(lineno, 'error defining "%s": %s.' % (name, exc))
 268         for k in vars.keys():
 269             if k not in ('header_output', 'decoder_output',
 270                          'exec_output', 'decode_block'):
 271                 del vars[k]
 272         return GenCode(parser, **vars)
 273
 274 # Special null format to catch an implicit-format instruction
 275 # definition outside of any format block.
 276 class NoFormat(object):
 277     def __init__(self):
 278         self.defaultInst = ''
 279
 280     def defineInst(self, parser, name, args, lineno):
 281         error(lineno,
 282               'instruction definition "%s" with no active format!' % name)
 283
 284 ###############
 285 # GenCode class
 286 #
 287 # The GenCode class encapsulates generated code destined for various
 288 # output files.  The header_output and decoder_output attributes are
 289 # strings containing code destined for decoder.hh and decoder.cc
 290 # respectively.  The decode_block attribute contains code to be
 291 # incorporated in the decode function itself (that will also end up in
 292 # decoder.cc).  The exec_output attribute is a dictionary with a key
 293 # for each CPU model name; the value associated with a particular key
 294 # is the string of code for that CPU model's exec.cc file.  The
 295 # has_decode_default attribute is used in the decode block to allow
 296 # explicit default clauses to override default default clauses.
 297
 298 class GenCode(object):
 299     # Constructor.  At this point we substitute out all CPU-specific
 300     # symbols.  For the exec output, these go into the per-model
 301     # dictionary.  For all other output types they get collapsed into
 302     # a single string.
 303     def __init__(self, parser,
 304                  header_output = '', decoder_output = '', exec_output = '',
 305                  decode_block = '', has_decode_default = False):
 306         self.parser = parser
 307         self.header_output = parser.expandCpuSymbolsToString(header_output)
 308         self.decoder_output = parser.expandCpuSymbolsToString(decoder_output)
 309         if isinstance(exec_output, dict):
 310             self.exec_output = exec_output
 311         elif isinstance(exec_output, str):
 312             # If the exec_output arg is a single string, we replicate
 313             # it for each of the CPU models, substituting and
 314             # %(CPU_foo)s params appropriately.
 315             self.exec_output = parser.expandCpuSymbolsToDict(exec_output)
 316         self.decode_block = parser.expandCpuSymbolsToString(decode_block)
 317         self.has_decode_default = has_decode_default
 318
 319     # Override '+' operator: generate a new GenCode object that
 320     # concatenates all the individual strings in the operands.
 321     def __add__(self, other):
 322         exec_output = {}
 323         for cpu in self.parser.cpuModels:
 324             n = cpu.name
 325             exec_output[n] = self.exec_output[n] + other.exec_output[n]
 326         return GenCode(self.parser,
 327                        self.header_output + other.header_output,
 328                        self.decoder_output + other.decoder_output,
 329                        exec_output,
 330                        self.decode_block + other.decode_block,
 331                        self.has_decode_default or other.has_decode_default)
 332
 333     # Prepend a string (typically a comment) to all the strings.
 334     def prepend_all(self, pre):
 335         self.header_output = pre + self.header_output
 336         self.decoder_output  = pre + self.decoder_output
 337         self.decode_block = pre + self.decode_block
 338         for cpu in self.parser.cpuModels:
 339             self.exec_output[cpu.name] = pre + self.exec_output[cpu.name]
 340
 341     # Wrap the decode block in a pair of strings (e.g., 'case foo:'
 342     # and 'break;').  Used to build the big nested switch statement.
 343     def wrap_decode_block(self, pre, post = ''):
 344         self.decode_block = pre + indent(self.decode_block) + post
 345
 346 #####################################################################
 347 #
 348 #                      Bitfield Operator Support
 349 #
 350 #####################################################################
 351
 352 bitOp1ArgRE = re.compile(r'<\s*(\w+)\s*:\s*>')
 353
 354 bitOpWordRE = re.compile(r'(?<![\w\.])([\w\.]+)<\s*(\w+)\s*:\s*(\w+)\s*>')
 355 bitOpExprRE = re.compile(r'\)<\s*(\w+)\s*:\s*(\w+)\s*>')
 356
 357 def substBitOps(code):
 358     # first convert single-bit selectors to two-index form
 359     # i.e., <n> --> <n:n>
 360     code = bitOp1ArgRE.sub(r'<\1:\1>', code)
 361     # simple case: selector applied to ID (name)
 362     # i.e., foo<a:b> --> bits(foo, a, b)
 363     code = bitOpWordRE.sub(r'bits(\1, \2, \3)', code)
 364     # if selector is applied to expression (ending in ')'),
 365     # we need to search backward for matching '('
 366     match = bitOpExprRE.search(code)
 367     while match:
 368         exprEnd = match.start()
 369         here = exprEnd - 1
 370         nestLevel = 1
 371         while nestLevel > 0:
 372             if code[here] == '(':
 373                 nestLevel -= 1
 374             elif code[here] == ')':
 375                 nestLevel += 1
 376             here -= 1
 377             if here < 0:
 378                 sys.exit("Didn't find '('!")
 379         exprStart = here+1
 380         newExpr = r'bits(%s, %s, %s)' % (code[exprStart:exprEnd+1],
 381                                          match.group(1), match.group(2))
 382         code = code[:exprStart] + newExpr + code[match.end():]
 383         match = bitOpExprRE.search(code)
 384     return code
 385
 386
 387 #####################################################################
 388 #
 389 #                             Code Parser
 390 #
 391 # The remaining code is the support for automatically extracting
 392 # instruction characteristics from pseudocode.
 393 #
 394 #####################################################################
 395
 396 # Force the argument to be a list.  Useful for flags, where a caller
 397 # can specify a singleton flag or a list of flags.  Also usful for
 398 # converting tuples to lists so they can be modified.
 399 def makeList(arg):
 400     if isinstance(arg, list):
 401         return arg
 402     elif isinstance(arg, tuple):
 403         return list(arg)
 404     elif not arg:
 405         return []
 406     else:
 407         return [ arg ]
 408
 409 class Operand(object):
 410     '''Base class for operand descriptors.  An instance of this class
 411     (or actually a class derived from this one) represents a specific
 412     operand for a code block (e.g, "Rc.sq" as a dest). Intermediate
 413     derived classes encapsulates the traits of a particular operand
 414     type (e.g., "32-bit integer register").'''
 415
 416     def buildReadCode(self, func = None):
 417         subst_dict = {"name": self.base_name,
 418                       "func": func,
 419                       "reg_idx": self.reg_spec,
 420                       "size": self.size,
 421                       "ctype": self.ctype}
 422         if hasattr(self, 'src_reg_idx'):
 423             subst_dict['op_idx'] = self.src_reg_idx
 424         code = self.read_code % subst_dict
 425         if self.size != self.dflt_size:
 426             return '%s = bits(%s, %d, 0);\n' % \
 427                    (self.base_name, code, self.size-1)
 428         else:
 429             return '%s = %s;\n' % \
 430                    (self.base_name, code)
 431
 432     def buildWriteCode(self, func = None):
 433         if (self.size != self.dflt_size and self.is_signed):
 434             final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
 435         else:
 436             final_val = self.base_name
 437         subst_dict = {"name": self.base_name,
 438                       "func": func,
 439                       "reg_idx": self.reg_spec,
 440                       "size": self.size,
 441                       "ctype": self.ctype,
 442                       "final_val": final_val}
 443         if hasattr(self, 'dest_reg_idx'):
 444             subst_dict['op_idx'] = self.dest_reg_idx
 445         code = self.write_code % subst_dict
 446         return '''
 447         {
 448             %s final_val = %s;
 449             %s;
 450             if (traceData) { traceData->setData(final_val); }
 451         }''' % (self.dflt_ctype, final_val, code)
 452
 453     def __init__(self, parser, full_name, ext, is_src, is_dest):
 454         self.full_name = full_name
 455         self.ext = ext
 456         self.is_src = is_src
 457         self.is_dest = is_dest
 458         # The 'effective extension' (eff_ext) is either the actual
 459         # extension, if one was explicitly provided, or the default.
 460         if ext:
 461             self.eff_ext = ext
 462         elif hasattr(self, 'dflt_ext'):
 463             self.eff_ext = self.dflt_ext
 464
 465         if hasattr(self, 'eff_ext'):
 466             self.size, self.ctype, self.is_signed = \
 467                         parser.operandTypeMap[self.eff_ext]
 468
 469         # note that mem_acc_size is undefined for non-mem operands...
 470         # template must be careful not to use it if it doesn't apply.
 471         if self.isMem():
 472             self.mem_acc_size = self.makeAccSize()
 473             if self.ctype in ['Twin32_t', 'Twin64_t']:
 474                 self.mem_acc_type = 'Twin'
 475             else:
 476                 self.mem_acc_type = 'uint'
 477
 478     # Finalize additional fields (primarily code fields).  This step
 479     # is done separately since some of these fields may depend on the
 480     # register index enumeration that hasn't been performed yet at the
 481     # time of __init__().
 482     def finalize(self):
 483         self.flags = self.getFlags()
 484         self.constructor = self.makeConstructor()
 485         self.op_decl = self.makeDecl()
 486
 487         if self.is_src:
 488             self.op_rd = self.makeRead()
 489             self.op_src_decl = self.makeDecl()
 490         else:
 491             self.op_rd = ''
 492             self.op_src_decl = ''
 493
 494         if self.is_dest:
 495             self.op_wb = self.makeWrite()
 496             self.op_dest_decl = self.makeDecl()
 497         else:
 498             self.op_wb = ''
 499             self.op_dest_decl = ''
 500
 501     def isMem(self):
 502         return 0
 503
 504     def isReg(self):
 505         return 0
 506
 507     def isFloatReg(self):
 508         return 0
 509
 510     def isIntReg(self):
 511         return 0
 512
 513     def isControlReg(self):
 514         return 0
 515
 516     def isPCState(self):
 517         return 0
 518
 519     def isPCPart(self):
 520         return self.isPCState() and self.reg_spec
 521
 522     def getFlags(self):
 523         # note the empty slice '[:]' gives us a copy of self.flags[0]
 524         # instead of a reference to it
 525         my_flags = self.flags[0][:]
 526         if self.is_src:
 527             my_flags += self.flags[1]
 528         if self.is_dest:
 529             my_flags += self.flags[2]
 530         return my_flags
 531
 532     def makeDecl(self):
 533         # Note that initializations in the declarations are solely
 534         # to avoid 'uninitialized variable' errors from the compiler.
 535         return self.ctype + ' ' + self.base_name + ' = 0;\n';
 536
 537 class IntRegOperand(Operand):
 538     def isReg(self):
 539         return 1
 540
 541     def isIntReg(self):
 542         return 1
 543
 544     def makeConstructor(self):
 545         c = ''
 546         if self.is_src:
 547             c += '\n\t_srcRegIdx[%d] = %s;' % \
 548                  (self.src_reg_idx, self.reg_spec)
 549         if self.is_dest:
 550             c += '\n\t_destRegIdx[%d] = %s;' % \
 551                  (self.dest_reg_idx, self.reg_spec)
 552         return c
 553
 554     def makeRead(self):
 555         if (self.ctype == 'float' or self.ctype == 'double'):
 556             error('Attempt to read integer register as FP')
 557         if self.read_code != None:
 558             return self.buildReadCode('readIntRegOperand')
 559         if (self.size == self.dflt_size):
 560             return '%s = xc->readIntRegOperand(this, %d);\n' % \
 561                    (self.base_name, self.src_reg_idx)
 562         elif (self.size > self.dflt_size):
 563             int_reg_val = 'xc->readIntRegOperand(this, %d)' % \
 564                           (self.src_reg_idx)
 565             if (self.is_signed):
 566                 int_reg_val = 'sext<%d>(%s)' % (self.dflt_size, int_reg_val)
 567             return '%s = %s;\n' % (self.base_name, int_reg_val)
 568         else:
 569             return '%s = bits(xc->readIntRegOperand(this, %d), %d, 0);\n' % \
 570                    (self.base_name, self.src_reg_idx, self.size-1)
 571
 572     def makeWrite(self):
 573         if (self.ctype == 'float' or self.ctype == 'double'):
 574             error('Attempt to write integer register as FP')
 575         if self.write_code != None:
 576             return self.buildWriteCode('setIntRegOperand')
 577         if (self.size != self.dflt_size and self.is_signed):
 578             final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
 579         else:
 580             final_val = self.base_name
 581         wb = '''
 582         {
 583             %s final_val = %s;
 584             xc->setIntRegOperand(this, %d, final_val);\n
 585             if (traceData) { traceData->setData(final_val); }
 586         }''' % (self.dflt_ctype, final_val, self.dest_reg_idx)
 587         return wb
 588
 589 class FloatRegOperand(Operand):
 590     def isReg(self):
 591         return 1
 592
 593     def isFloatReg(self):
 594         return 1
 595
 596     def makeConstructor(self):
 597         c = ''
 598         if self.is_src:
 599             c += '\n\t_srcRegIdx[%d] = %s + FP_Base_DepTag;' % \
 600                  (self.src_reg_idx, self.reg_spec)
 601         if self.is_dest:
 602             c += '\n\t_destRegIdx[%d] = %s + FP_Base_DepTag;' % \
 603                  (self.dest_reg_idx, self.reg_spec)
 604         return c
 605
 606     def makeRead(self):
 607         bit_select = 0
 608         if (self.ctype == 'float' or self.ctype == 'double'):
 609             func = 'readFloatRegOperand'
 610         else:
 611             func = 'readFloatRegOperandBits'
 612             if (self.size != self.dflt_size):
 613                 bit_select = 1
 614         base = 'xc->%s(this, %d)' % (func, self.src_reg_idx)
 615         if self.read_code != None:
 616             return self.buildReadCode(func)
 617         if bit_select:
 618             return '%s = bits(%s, %d, 0);\n' % \
 619                    (self.base_name, base, self.size-1)
 620         else:
 621             return '%s = %s;\n' % (self.base_name, base)
 622
 623     def makeWrite(self):
 624         final_val = self.base_name
 625         final_ctype = self.ctype
 626         if (self.ctype == 'float' or self.ctype == 'double'):
 627             func = 'setFloatRegOperand'
 628         elif (self.ctype == 'uint32_t' or self.ctype == 'uint64_t'):
 629             func = 'setFloatRegOperandBits'
 630         else:
 631             func = 'setFloatRegOperandBits'
 632             final_ctype = 'uint%d_t' % self.dflt_size
 633             if (self.size != self.dflt_size and self.is_signed):
 634                 final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
 635         if self.write_code != None:
 636             return self.buildWriteCode(func)
 637         wb = '''
 638         {
 639             %s final_val = %s;
 640             xc->%s(this, %d, final_val);\n
 641             if (traceData) { traceData->setData(final_val); }
 642         }''' % (final_ctype, final_val, func, self.dest_reg_idx)
 643         return wb
 644
 645 class ControlRegOperand(Operand):
 646     def isReg(self):
 647         return 1
 648
 649     def isControlReg(self):
 650         return 1
 651
 652     def makeConstructor(self):
 653         c = ''
 654         if self.is_src:
 655             c += '\n\t_srcRegIdx[%d] = %s + Ctrl_Base_DepTag;' % \
 656                  (self.src_reg_idx, self.reg_spec)
 657         if self.is_dest:
 658             c += '\n\t_destRegIdx[%d] = %s + Ctrl_Base_DepTag;' % \
 659                  (self.dest_reg_idx, self.reg_spec)
 660         return c
 661
 662     def makeRead(self):
 663         bit_select = 0
 664         if (self.ctype == 'float' or self.ctype == 'double'):
 665             error('Attempt to read control register as FP')
 666         if self.read_code != None:
 667             return self.buildReadCode('readMiscRegOperand')
 668         base = 'xc->readMiscRegOperand(this, %s)' % self.src_reg_idx
 669         if self.size == self.dflt_size:
 670             return '%s = %s;\n' % (self.base_name, base)
 671         else:
 672             return '%s = bits(%s, %d, 0);\n' % \
 673                    (self.base_name, base, self.size-1)
 674
 675     def makeWrite(self):
 676         if (self.ctype == 'float' or self.ctype == 'double'):
 677             error('Attempt to write control register as FP')
 678         if self.write_code != None:
 679             return self.buildWriteCode('setMiscRegOperand')
 680         wb = 'xc->setMiscRegOperand(this, %s, %s);\n' % \
 681              (self.dest_reg_idx, self.base_name)
 682         wb += 'if (traceData) { traceData->setData(%s); }' % \
 683               self.base_name
 684         return wb
 685
 686 class MemOperand(Operand):
 687     def isMem(self):
 688         return 1
 689
 690     def makeConstructor(self):
 691         return ''
 692
 693     def makeDecl(self):
 694         # Note that initializations in the declarations are solely
 695         # to avoid 'uninitialized variable' errors from the compiler.
 696         # Declare memory data variable.
 697         if self.ctype in ['Twin32_t','Twin64_t']:
 698             return "%s %s; %s.a = 0; %s.b = 0;\n" % \
 699                    (self.ctype, self.base_name, self.base_name, self.base_name)
 700         return '%s %s = 0;\n' % (self.ctype, self.base_name)
 701
 702     def makeRead(self):
 703         if self.read_code != None:
 704             return self.buildReadCode()
 705         return ''
 706
 707     def makeWrite(self):
 708         if self.write_code != None:
 709             return self.buildWriteCode()
 710         return ''
 711
 712     # Return the memory access size *in bits*, suitable for
 713     # forming a type via "uint%d_t".  Divide by 8 if you want bytes.
 714     def makeAccSize(self):
 715         return self.size
 716
 717 class PCStateOperand(Operand):
 718     def makeConstructor(self):
 719         return ''
 720
 721     def makeRead(self):
 722         if self.reg_spec:
 723             # A component of the PC state.
 724             return '%s = __parserAutoPCState.%s();\n' % \
 725                 (self.base_name, self.reg_spec)
 726         else:
 727             # The whole PC state itself.
 728             return '%s = xc->pcState();\n' % self.base_name
 729
 730     def makeWrite(self):
 731         if self.reg_spec:
 732             # A component of the PC state.
 733             return '__parserAutoPCState.%s(%s);\n' % \
 734                 (self.reg_spec, self.base_name)
 735         else:
 736             # The whole PC state itself.
 737             return 'xc->pcState(%s);\n' % self.base_name
 738
 739     def makeDecl(self):
 740         ctype = 'TheISA::PCState'
 741         if self.isPCPart():
 742             ctype = self.ctype
 743         return "%s %s;\n" % (ctype, self.base_name)
 744
 745     def isPCState(self):
 746         return 1
 747
 748 class OperandList(object):
 749     '''Find all the operands in the given code block.  Returns an operand
 750     descriptor list (instance of class OperandList).'''
 751     def __init__(self, parser, code):
 752         self.items = []
 753         self.bases = {}
 754         # delete comments so we don't match on reg specifiers inside
 755         code = commentRE.sub('', code)
 756         # search for operands
 757         next_pos = 0
 758         while 1:
 759             match = parser.operandsRE.search(code, next_pos)
 760             if not match:
 761                 # no more matches: we're done
 762                 break
 763             op = match.groups()
 764             # regexp groups are operand full name, base, and extension
 765             (op_full, op_base, op_ext) = op
 766             # if the token following the operand is an assignment, this is
 767             # a destination (LHS), else it's a source (RHS)
 768             is_dest = (assignRE.match(code, match.end()) != None)
 769             is_src = not is_dest
 770             # see if we've already seen this one
 771             op_desc = self.find_base(op_base)
 772             if op_desc:
 773                 if op_desc.ext != op_ext:
 774                     error('Inconsistent extensions for operand %s' % \
 775                           op_base)
 776                 op_desc.is_src = op_desc.is_src or is_src
 777                 op_desc.is_dest = op_desc.is_dest or is_dest
 778             else:
 779                 # new operand: create new descriptor
 780                 op_desc = parser.operandNameMap[op_base](parser,
 781                     op_full, op_ext, is_src, is_dest)
 782                 self.append(op_desc)
 783             # start next search after end of current match
 784             next_pos = match.end()
 785         self.sort()
 786         # enumerate source & dest register operands... used in building
 787         # constructor later
 788         self.numSrcRegs = 0
 789         self.numDestRegs = 0
 790         self.numFPDestRegs = 0
 791         self.numIntDestRegs = 0
 792         self.memOperand = None
 793         for op_desc in self.items:
 794             if op_desc.isReg():
 795                 if op_desc.is_src:
 796                     op_desc.src_reg_idx = self.numSrcRegs
 797                     self.numSrcRegs += 1
 798                 if op_desc.is_dest:
 799                     op_desc.dest_reg_idx = self.numDestRegs
 800                     self.numDestRegs += 1
 801                     if op_desc.isFloatReg():
 802                         self.numFPDestRegs += 1
 803                     elif op_desc.isIntReg():
 804                         self.numIntDestRegs += 1
 805             elif op_desc.isMem():
 806                 if self.memOperand:
 807                     error("Code block has more than one memory operand.")
 808                 self.memOperand = op_desc
 809         if parser.maxInstSrcRegs < self.numSrcRegs:
 810             parser.maxInstSrcRegs = self.numSrcRegs
 811         if parser.maxInstDestRegs < self.numDestRegs:
 812             parser.maxInstDestRegs = self.numDestRegs
 813         # now make a final pass to finalize op_desc fields that may depend
 814         # on the register enumeration
 815         for op_desc in self.items:
 816             op_desc.finalize()
 817
 818     def __len__(self):
 819         return len(self.items)
 820
 821     def __getitem__(self, index):
 822         return self.items[index]
 823
 824     def append(self, op_desc):
 825         self.items.append(op_desc)
 826         self.bases[op_desc.base_name] = op_desc
 827
 828     def find_base(self, base_name):
 829         # like self.bases[base_name], but returns None if not found
 830         # (rather than raising exception)
 831         return self.bases.get(base_name)
 832
 833     # internal helper function for concat[Some]Attr{Strings|Lists}
 834     def __internalConcatAttrs(self, attr_name, filter, result):
 835         for op_desc in self.items:
 836             if filter(op_desc):
 837                 result += getattr(op_desc, attr_name)
 838         return result
 839
 840     # return a single string that is the concatenation of the (string)
 841     # values of the specified attribute for all operands
 842     def concatAttrStrings(self, attr_name):
 843         return self.__internalConcatAttrs(attr_name, lambda x: 1, '')
 844
 845     # like concatAttrStrings, but only include the values for the operands
 846     # for which the provided filter function returns true
 847     def concatSomeAttrStrings(self, filter, attr_name):
 848         return self.__internalConcatAttrs(attr_name, filter, '')
 849
 850     # return a single list that is the concatenation of the (list)
 851     # values of the specified attribute for all operands
 852     def concatAttrLists(self, attr_name):
 853         return self.__internalConcatAttrs(attr_name, lambda x: 1, [])
 854
 855     # like concatAttrLists, but only include the values for the operands
 856     # for which the provided filter function returns true
 857     def concatSomeAttrLists(self, filter, attr_name):
 858         return self.__internalConcatAttrs(attr_name, filter, [])
 859
 860     def sort(self):
 861         self.items.sort(lambda a, b: a.sort_pri - b.sort_pri)
 862
 863 class SubOperandList(OperandList):
 864     '''Find all the operands in the given code block.  Returns an operand
 865     descriptor list (instance of class OperandList).'''
 866     def __init__(self, parser, code, master_list):
 867         self.items = []
 868         self.bases = {}
 869         # delete comments so we don't match on reg specifiers inside
 870         code = commentRE.sub('', code)
 871         # search for operands
 872         next_pos = 0
 873         while 1:
 874             match = parser.operandsRE.search(code, next_pos)
 875             if not match:
 876                 # no more matches: we're done
 877                 break
 878             op = match.groups()
 879             # regexp groups are operand full name, base, and extension
 880             (op_full, op_base, op_ext) = op
 881             # find this op in the master list
 882             op_desc = master_list.find_base(op_base)
 883             if not op_desc:
 884                 error('Found operand %s which is not in the master list!' \
 885                       ' This is an internal error' % op_base)
 886             else:
 887                 # See if we've already found this operand
 888                 op_desc = self.find_base(op_base)
 889                 if not op_desc:
 890                     # if not, add a reference to it to this sub list
 891                     self.append(master_list.bases[op_base])
 892
 893             # start next search after end of current match
 894             next_pos = match.end()
 895         self.sort()
 896         self.memOperand = None
 897         # Whether the whole PC needs to be read so parts of it can be accessed
 898         self.readPC = False
 899         # Whether the whole PC needs to be written after parts of it were
 900         # changed
 901         self.setPC = False
 902         # Whether this instruction manipulates the whole PC or parts of it.
 903         # Mixing the two is a bad idea and flagged as an error.
 904         self.pcPart = None
 905         for op_desc in self.items:
 906             if op_desc.isPCPart():
 907                 self.readPC = True
 908                 if op_desc.is_dest:
 909                     self.setPC = True
 910             if op_desc.isPCState():
 911                 if self.pcPart is not None:
 912                     if self.pcPart and not op_desc.isPCPart() or \
 913                             not self.pcPart and op_desc.isPCPart():
 914                         error("Mixed whole and partial PC state operands.")
 915                 self.pcPart = op_desc.isPCPart()
 916             if op_desc.isMem():
 917                 if self.memOperand:
 918                     error("Code block has more than one memory operand.")
 919                 self.memOperand = op_desc
 920
 921 # Regular expression object to match C++ comments
 922 # (used in findOperands())
 923 commentRE = re.compile(r'//.*\n')
 924
 925 # Regular expression object to match assignment statements
 926 # (used in findOperands())
 927 assignRE = re.compile(r'\s*=(?!=)', re.MULTILINE)
 928
 929 def makeFlagConstructor(flag_list):
 930     if len(flag_list) == 0:
 931         return ''
 932     # filter out repeated flags
 933     flag_list.sort()
 934     i = 1
 935     while i < len(flag_list):
 936         if flag_list[i] == flag_list[i-1]:
 937             del flag_list[i]
 938         else:
 939             i += 1
 940     pre = '\n\tflags['
 941     post = '] = true;'
 942     code = pre + string.join(flag_list, post + pre) + post
 943     return code
 944
 945 # Assume all instruction flags are of the form 'IsFoo'
 946 instFlagRE = re.compile(r'Is.*')
 947
 948 # OpClass constants end in 'Op' except No_OpClass
 949 opClassRE = re.compile(r'.*Op|No_OpClass')
 950
 951 class InstObjParams(object):
 952     def __init__(self, parser, mnem, class_name, base_class = '',
 953                  snippets = {}, opt_args = []):
 954         self.mnemonic = mnem
 955         self.class_name = class_name
 956         self.base_class = base_class
 957         if not isinstance(snippets, dict):
 958             snippets = {'code' : snippets}
 959         compositeCode = ' '.join(map(str, snippets.values()))
 960         self.snippets = snippets
 961
 962         self.operands = OperandList(parser, compositeCode)
 963         self.constructor = self.operands.concatAttrStrings('constructor')
 964         self.constructor += \
 965                  '\n\t_numSrcRegs = %d;' % self.operands.numSrcRegs
 966         self.constructor += \
 967                  '\n\t_numDestRegs = %d;' % self.operands.numDestRegs
 968         self.constructor += \
 969                  '\n\t_numFPDestRegs = %d;' % self.operands.numFPDestRegs
 970         self.constructor += \
 971                  '\n\t_numIntDestRegs = %d;' % self.operands.numIntDestRegs
 972         self.flags = self.operands.concatAttrLists('flags')
 973
 974         # Make a basic guess on the operand class (function unit type).
 975         # These are good enough for most cases, and can be overridden
 976         # later otherwise.
 977         if 'IsStore' in self.flags:
 978             self.op_class = 'MemWriteOp'
 979         elif 'IsLoad' in self.flags or 'IsPrefetch' in self.flags:
 980             self.op_class = 'MemReadOp'
 981         elif 'IsFloating' in self.flags:
 982             self.op_class = 'FloatAddOp'
 983         else:
 984             self.op_class = 'IntAluOp'
 985
 986         # Optional arguments are assumed to be either StaticInst flags
 987         # or an OpClass value.  To avoid having to import a complete
 988         # list of these values to match against, we do it ad-hoc
 989         # with regexps.
 990         for oa in opt_args:
 991             if instFlagRE.match(oa):
 992                 self.flags.append(oa)
 993             elif opClassRE.match(oa):
 994                 self.op_class = oa
 995             else:
 996                 error('InstObjParams: optional arg "%s" not recognized '
 997                       'as StaticInst::Flag or OpClass.' % oa)
 998
 999         # add flag initialization to contructor here to include
1000         # any flags added via opt_args
1001         self.constructor += makeFlagConstructor(self.flags)
1002
1003         # if 'IsFloating' is set, add call to the FP enable check
1004         # function (which should be provided by isa_desc via a declare)
1005         if 'IsFloating' in self.flags:
1006             self.fp_enable_check = 'fault = checkFpEnableFault(xc);'
1007         else:
1008             self.fp_enable_check = ''
1009
1010 ##############
1011 # Stack: a simple stack object.  Used for both formats (formatStack)
1012 # and default cases (defaultStack).  Simply wraps a list to give more
1013 # stack-like syntax and enable initialization with an argument list
1014 # (as opposed to an argument that's a list).
1015
1016 class Stack(list):
1017     def __init__(self, *items):
1018         list.__init__(self, items)
1019
1020     def push(self, item):
1021         self.append(item);
1022
1023     def top(self):
1024         return self[-1]
1025
1026 #######################
1027 #
1028 # Output file template
1029 #
1030
1031 file_template = '''
1032 /*
1033  * DO NOT EDIT THIS FILE!!!
1034  *
1035  * It was automatically generated from the ISA description in %(filename)s
1036  */
1037
1038 %(includes)s
1039
1040 %(global_output)s
1041
1042 namespace %(namespace)s {
1043
1044 %(namespace_output)s
1045
1046 } // namespace %(namespace)s
1047
1048 %(decode_function)s
1049 '''
1050
1051 max_inst_regs_template = '''
1052 /*
1053  * DO NOT EDIT THIS FILE!!!
1054  *
1055  * It was automatically generated from the ISA description in %(filename)s
1056  */
1057
1058 namespace %(namespace)s {
1059
1060     const int MaxInstSrcRegs = %(MaxInstSrcRegs)d;
1061     const int MaxInstDestRegs = %(MaxInstDestRegs)d;
1062
1063 } // namespace %(namespace)s
1064
1065 '''
1066
1067 class ISAParser(Grammar):
1068     def __init__(self, output_dir, cpu_models):
1069         super(ISAParser, self).__init__()
1070         self.output_dir = output_dir
1071
1072         self.cpuModels = cpu_models
1073
1074         # variable to hold templates
1075         self.templateMap = {}
1076
1077         # This dictionary maps format name strings to Format objects.
1078         self.formatMap = {}
1079
1080         # The format stack.
1081         self.formatStack = Stack(NoFormat())
1082
1083         # The default case stack.
1084         self.defaultStack = Stack(None)
1085
1086         # Stack that tracks current file and line number.  Each
1087         # element is a tuple (filename, lineno) that records the
1088         # *current* filename and the line number in the *previous*
1089         # file where it was included.
1090         self.fileNameStack = Stack()
1091
1092         symbols = ('makeList', 're', 'string')
1093         self.exportContext = dict([(s, eval(s)) for s in symbols])
1094
1095         self.maxInstSrcRegs = 0
1096         self.maxInstDestRegs = 0
1097
1098     #####################################################################
1099     #
1100     #                                Lexer
1101     #
1102     # The PLY lexer module takes two things as input:
1103     # - A list of token names (the string list 'tokens')
1104     # - A regular expression describing a match for each token.  The
1105     #   regexp for token FOO can be provided in two ways:
1106     #   - as a string variable named t_FOO
1107     #   - as the doc string for a function named t_FOO.  In this case,
1108     #     the function is also executed, allowing an action to be
1109     #     associated with each token match.
1110     #
1111     #####################################################################
1112
1113     # Reserved words.  These are listed separately as they are matched
1114     # using the same regexp as generic IDs, but distinguished in the
1115     # t_ID() function.  The PLY documentation suggests this approach.
1116     reserved = (
1117         'BITFIELD', 'DECODE', 'DECODER', 'DEFAULT', 'DEF', 'EXEC', 'FORMAT',
1118         'HEADER', 'LET', 'NAMESPACE', 'OPERAND_TYPES', 'OPERANDS',
1119         'OUTPUT', 'SIGNED', 'TEMPLATE'
1120         )
1121
1122     # List of tokens.  The lex module requires this.
1123     tokens = reserved + (
1124         # identifier
1125         'ID',
1126
1127         # integer literal
1128         'INTLIT',
1129
1130         # string literal
1131         'STRLIT',
1132
1133         # code literal
1134         'CODELIT',
1135
1136         # ( ) [ ] { } < > , ; . : :: *
1137         'LPAREN', 'RPAREN',
1138         'LBRACKET', 'RBRACKET',
1139         'LBRACE', 'RBRACE',
1140         'LESS', 'GREATER', 'EQUALS',
1141         'COMMA', 'SEMI', 'DOT', 'COLON', 'DBLCOLON',
1142         'ASTERISK',
1143
1144         # C preprocessor directives
1145         'CPPDIRECTIVE'
1146
1147     # The following are matched but never returned. commented out to
1148     # suppress PLY warning
1149         # newfile directive
1150     #    'NEWFILE',
1151
1152         # endfile directive
1153     #    'ENDFILE'
1154     )
1155
1156     # Regular expressions for token matching
1157     t_LPAREN           = r'\('
1158     t_RPAREN           = r'\)'
1159     t_LBRACKET         = r'\['
1160     t_RBRACKET         = r'\]'
1161     t_LBRACE           = r'\{'
1162     t_RBRACE           = r'\}'
1163     t_LESS             = r'\<'
1164     t_GREATER          = r'\>'
1165     t_EQUALS           = r'='
1166     t_COMMA            = r','
1167     t_SEMI             = r';'
1168     t_DOT              = r'\.'
1169     t_COLON            = r':'
1170     t_DBLCOLON         = r'::'
1171     t_ASTERISK         = r'\*'
1172
1173     # Identifiers and reserved words
1174     reserved_map = { }
1175     for r in reserved:
1176         reserved_map[r.lower()] = r
1177
1178     def t_ID(self, t):
1179         r'[A-Za-z_]\w*'
1180         t.type = self.reserved_map.get(t.value, 'ID')
1181         return t
1182
1183     # Integer literal
1184     def t_INTLIT(self, t):
1185         r'-?(0x[\da-fA-F]+)|\d+'
1186         try:
1187             t.value = int(t.value,0)
1188         except ValueError:
1189             error(t, 'Integer value "%s" too large' % t.value)
1190             t.value = 0
1191         return t
1192
1193     # String literal.  Note that these use only single quotes, and
1194     # can span multiple lines.
1195     def t_STRLIT(self, t):
1196         r"(?m)'([^'])+'"
1197         # strip off quotes
1198         t.value = t.value[1:-1]
1199         t.lexer.lineno += t.value.count('\n')
1200         return t
1201
1202
1203     # "Code literal"... like a string literal, but delimiters are
1204     # '{{' and '}}' so they get formatted nicely under emacs c-mode
1205     def t_CODELIT(self, t):
1206         r"(?m)\{\{([^\}]|}(?!\}))+\}\}"
1207         # strip off {{ & }}
1208         t.value = t.value[2:-2]
1209         t.lexer.lineno += t.value.count('\n')
1210         return t
1211
1212     def t_CPPDIRECTIVE(self, t):
1213         r'^\#[^\#].*\n'
1214         t.lexer.lineno += t.value.count('\n')
1215         return t
1216
1217     def t_NEWFILE(self, t):
1218         r'^\#\#newfile\s+"[\w/.-]*"'
1219         self.fileNameStack.push((t.value[11:-1], t.lexer.lineno))
1220         t.lexer.lineno = 0
1221
1222     def t_ENDFILE(self, t):
1223         r'^\#\#endfile'
1224         (old_filename, t.lexer.lineno) = self.fileNameStack.pop()
1225
1226     #
1227     # The functions t_NEWLINE, t_ignore, and t_error are
1228     # special for the lex module.
1229     #
1230
1231     # Newlines
1232     def t_NEWLINE(self, t):
1233         r'\n+'
1234         t.lexer.lineno += t.value.count('\n')
1235
1236     # Comments
1237     def t_comment(self, t):
1238         r'//.*'
1239
1240     # Completely ignored characters
1241     t_ignore = ' \t\x0c'
1242
1243     # Error handler
1244     def t_error(self, t):
1245         error(t, "illegal character '%s'" % t.value[0])
1246         t.skip(1)
1247
1248     #####################################################################
1249     #
1250     #                                Parser
1251     #
1252     # Every function whose name starts with 'p_' defines a grammar
1253     # rule.  The rule is encoded in the function's doc string, while
1254     # the function body provides the action taken when the rule is
1255     # matched.  The argument to each function is a list of the values
1256     # of the rule's symbols: t[0] for the LHS, and t[1..n] for the
1257     # symbols on the RHS.  For tokens, the value is copied from the
1258     # t.value attribute provided by the lexer.  For non-terminals, the
1259     # value is assigned by the producing rule; i.e., the job of the
1260     # grammar rule function is to set the value for the non-terminal
1261     # on the LHS (by assigning to t[0]).
1262     #####################################################################
1263
1264     # The LHS of the first grammar rule is used as the start symbol
1265     # (in this case, 'specification').  Note that this rule enforces
1266     # that there will be exactly one namespace declaration, with 0 or
1267     # more global defs/decls before and after it.  The defs & decls
1268     # before the namespace decl will be outside the namespace; those
1269     # after will be inside.  The decoder function is always inside the
1270     # namespace.
1271     def p_specification(self, t):
1272         'specification : opt_defs_and_outputs name_decl opt_defs_and_outputs decode_block'
1273         global_code = t[1]
1274         isa_name = t[2]
1275         namespace = isa_name + "Inst"
1276         # wrap the decode block as a function definition
1277         t[4].wrap_decode_block('''
1278 StaticInstPtr
1279 %(isa_name)s::decodeInst(%(isa_name)s::ExtMachInst machInst)
1280 {
1281     using namespace %(namespace)s;
1282 ''' % vars(), '}')
1283         # both the latter output blocks and the decode block are in
1284         # the namespace
1285         namespace_code = t[3] + t[4]
1286         # pass it all back to the caller of yacc.parse()
1287         t[0] = (isa_name, namespace, global_code, namespace_code)
1288
1289     # ISA name declaration looks like "namespace <foo>;"
1290     def p_name_decl(self, t):
1291         'name_decl : NAMESPACE ID SEMI'
1292         t[0] = t[2]
1293
1294     # 'opt_defs_and_outputs' is a possibly empty sequence of
1295     # def and/or output statements.
1296     def p_opt_defs_and_outputs_0(self, t):
1297         'opt_defs_and_outputs : empty'
1298         t[0] = GenCode(self)
1299
1300     def p_opt_defs_and_outputs_1(self, t):
1301         'opt_defs_and_outputs : defs_and_outputs'
1302         t[0] = t[1]
1303
1304     def p_defs_and_outputs_0(self, t):
1305         'defs_and_outputs : def_or_output'
1306         t[0] = t[1]
1307
1308     def p_defs_and_outputs_1(self, t):
1309         'defs_and_outputs : defs_and_outputs def_or_output'
1310         t[0] = t[1] + t[2]
1311
1312     # The list of possible definition/output statements.
1313     def p_def_or_output(self, t):
1314         '''def_or_output : def_format
1315                          | def_bitfield
1316                          | def_bitfield_struct
1317                          | def_template
1318                          | def_operand_types
1319                          | def_operands
1320                          | output_header
1321                          | output_decoder
1322                          | output_exec
1323                          | global_let'''
1324         t[0] = t[1]
1325
1326     # Output blocks 'output <foo> {{...}}' (C++ code blocks) are copied
1327     # directly to the appropriate output section.
1328
1329     # Massage output block by substituting in template definitions and
1330     # bit operators.  We handle '%'s embedded in the string that don't
1331     # indicate template substitutions (or CPU-specific symbols, which
1332     # get handled in GenCode) by doubling them first so that the
1333     # format operation will reduce them back to single '%'s.
1334     def process_output(self, s):
1335         s = self.protectNonSubstPercents(s)
1336         # protects cpu-specific symbols too
1337         s = self.protectCpuSymbols(s)
1338         return substBitOps(s % self.templateMap)
1339
1340     def p_output_header(self, t):
1341         'output_header : OUTPUT HEADER CODELIT SEMI'
1342         t[0] = GenCode(self, header_output = self.process_output(t[3]))
1343
1344     def p_output_decoder(self, t):
1345         'output_decoder : OUTPUT DECODER CODELIT SEMI'
1346         t[0] = GenCode(self, decoder_output = self.process_output(t[3]))
1347
1348     def p_output_exec(self, t):
1349         'output_exec : OUTPUT EXEC CODELIT SEMI'
1350         t[0] = GenCode(self, exec_output = self.process_output(t[3]))
1351
1352     # global let blocks 'let {{...}}' (Python code blocks) are
1353     # executed directly when seen.  Note that these execute in a
1354     # special variable context 'exportContext' to prevent the code
1355     # from polluting this script's namespace.
1356     def p_global_let(self, t):
1357         'global_let : LET CODELIT SEMI'
1358         self.updateExportContext()
1359         self.exportContext["header_output"] = ''
1360         self.exportContext["decoder_output"] = ''
1361         self.exportContext["exec_output"] = ''
1362         self.exportContext["decode_block"] = ''
1363         try:
1364             exec fixPythonIndentation(t[2]) in self.exportContext
1365         except Exception, exc:
1366             if debug:
1367                 raise
1368             error(t, 'error: %s in global let block "%s".' % (exc, t[2]))
1369         t[0] = GenCode(self,
1370                        header_output=self.exportContext["header_output"],
1371                        decoder_output=self.exportContext["decoder_output"],
1372                        exec_output=self.exportContext["exec_output"],
1373                        decode_block=self.exportContext["decode_block"])
1374
1375     # Define the mapping from operand type extensions to C++ types and
1376     # bit widths (stored in operandTypeMap).
1377     def p_def_operand_types(self, t):
1378         'def_operand_types : DEF OPERAND_TYPES CODELIT SEMI'
1379         try:
1380             user_dict = eval('{' + t[3] + '}')
1381         except Exception, exc:
1382             if debug:
1383                 raise
1384             error(t,
1385                   'error: %s in def operand_types block "%s".' % (exc, t[3]))
1386         self.buildOperandTypeMap(user_dict, t.lexer.lineno)
1387         t[0] = GenCode(self) # contributes nothing to the output C++ file
1388
1389     # Define the mapping from operand names to operand classes and
1390     # other traits.  Stored in operandNameMap.
1391     def p_def_operands(self, t):
1392         'def_operands : DEF OPERANDS CODELIT SEMI'
1393         if not hasattr(self, 'operandTypeMap'):
1394             error(t, 'error: operand types must be defined before operands')
1395         try:
1396             user_dict = eval('{' + t[3] + '}', self.exportContext)
1397         except Exception, exc:
1398             if debug:
1399                 raise
1400             error(t, 'error: %s in def operands block "%s".' % (exc, t[3]))
1401         self.buildOperandNameMap(user_dict, t.lexer.lineno)
1402         t[0] = GenCode(self) # contributes nothing to the output C++ file
1403
1404     # A bitfield definition looks like:
1405     # 'def [signed] bitfield <ID> [<first>:<last>]'
1406     # This generates a preprocessor macro in the output file.
1407     def p_def_bitfield_0(self, t):
1408         'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT COLON INTLIT GREATER SEMI'
1409         expr = 'bits(machInst, %2d, %2d)' % (t[6], t[8])
1410         if (t[2] == 'signed'):
1411             expr = 'sext<%d>(%s)' % (t[6] - t[8] + 1, expr)
1412         hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1413         t[0] = GenCode(self, header_output=hash_define)
1414
1415     # alternate form for single bit: 'def [signed] bitfield <ID> [<bit>]'
1416     def p_def_bitfield_1(self, t):
1417         'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT GREATER SEMI'
1418         expr = 'bits(machInst, %2d, %2d)' % (t[6], t[6])
1419         if (t[2] == 'signed'):
1420             expr = 'sext<%d>(%s)' % (1, expr)
1421         hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1422         t[0] = GenCode(self, header_output=hash_define)
1423
1424     # alternate form for structure member: 'def bitfield <ID> <ID>'
1425     def p_def_bitfield_struct(self, t):
1426         'def_bitfield_struct : DEF opt_signed BITFIELD ID id_with_dot SEMI'
1427         if (t[2] != ''):
1428             error(t, 'error: structure bitfields are always unsigned.')
1429         expr = 'machInst.%s' % t[5]
1430         hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1431         t[0] = GenCode(self, header_output=hash_define)
1432
1433     def p_id_with_dot_0(self, t):
1434         'id_with_dot : ID'
1435         t[0] = t[1]
1436
1437     def p_id_with_dot_1(self, t):
1438         'id_with_dot : ID DOT id_with_dot'
1439         t[0] = t[1] + t[2] + t[3]
1440
1441     def p_opt_signed_0(self, t):
1442         'opt_signed : SIGNED'
1443         t[0] = t[1]
1444
1445     def p_opt_signed_1(self, t):
1446         'opt_signed : empty'
1447         t[0] = ''
1448
1449     def p_def_template(self, t):
1450         'def_template : DEF TEMPLATE ID CODELIT SEMI'
1451         self.templateMap[t[3]] = Template(self, t[4])
1452         t[0] = GenCode(self)
1453
1454     # An instruction format definition looks like
1455     # "def format <fmt>(<params>) {{...}};"
1456     def p_def_format(self, t):
1457         'def_format : DEF FORMAT ID LPAREN param_list RPAREN CODELIT SEMI'
1458         (id, params, code) = (t[3], t[5], t[7])
1459         self.defFormat(id, params, code, t.lexer.lineno)
1460         t[0] = GenCode(self)
1461
1462     # The formal parameter list for an instruction format is a
1463     # possibly empty list of comma-separated parameters.  Positional
1464     # (standard, non-keyword) parameters must come first, followed by
1465     # keyword parameters, followed by a '*foo' parameter that gets
1466     # excess positional arguments (as in Python).  Each of these three
1467     # parameter categories is optional.
1468     #
1469     # Note that we do not support the '**foo' parameter for collecting
1470     # otherwise undefined keyword args.  Otherwise the parameter list
1471     # is (I believe) identical to what is supported in Python.
1472     #
1473     # The param list generates a tuple, where the first element is a
1474     # list of the positional params and the second element is a dict
1475     # containing the keyword params.
1476     def p_param_list_0(self, t):
1477         'param_list : positional_param_list COMMA nonpositional_param_list'
1478         t[0] = t[1] + t[3]
1479
1480     def p_param_list_1(self, t):
1481         '''param_list : positional_param_list
1482                       | nonpositional_param_list'''
1483         t[0] = t[1]
1484
1485     def p_positional_param_list_0(self, t):
1486         'positional_param_list : empty'
1487         t[0] = []
1488
1489     def p_positional_param_list_1(self, t):
1490         'positional_param_list : ID'
1491         t[0] = [t[1]]
1492
1493     def p_positional_param_list_2(self, t):
1494         'positional_param_list : positional_param_list COMMA ID'
1495         t[0] = t[1] + [t[3]]
1496
1497     def p_nonpositional_param_list_0(self, t):
1498         'nonpositional_param_list : keyword_param_list COMMA excess_args_param'
1499         t[0] = t[1] + t[3]
1500
1501     def p_nonpositional_param_list_1(self, t):
1502         '''nonpositional_param_list : keyword_param_list
1503                                     | excess_args_param'''
1504         t[0] = t[1]
1505
1506     def p_keyword_param_list_0(self, t):
1507         'keyword_param_list : keyword_param'
1508         t[0] = [t[1]]
1509
1510     def p_keyword_param_list_1(self, t):
1511         'keyword_param_list : keyword_param_list COMMA keyword_param'
1512         t[0] = t[1] + [t[3]]
1513
1514     def p_keyword_param(self, t):
1515         'keyword_param : ID EQUALS expr'
1516         t[0] = t[1] + ' = ' + t[3].__repr__()
1517
1518     def p_excess_args_param(self, t):
1519         'excess_args_param : ASTERISK ID'
1520         # Just concatenate them: '*ID'.  Wrap in list to be consistent
1521         # with positional_param_list and keyword_param_list.
1522         t[0] = [t[1] + t[2]]
1523
1524     # End of format definition-related rules.
1525     ##############
1526
1527     #
1528     # A decode block looks like:
1529     #       decode <field1> [, <field2>]* [default <inst>] { ... }
1530     #
1531     def p_decode_block(self, t):
1532         'decode_block : DECODE ID opt_default LBRACE decode_stmt_list RBRACE'
1533         default_defaults = self.defaultStack.pop()
1534         codeObj = t[5]
1535         # use the "default defaults" only if there was no explicit
1536         # default statement in decode_stmt_list
1537         if not codeObj.has_decode_default:
1538             codeObj += default_defaults
1539         codeObj.wrap_decode_block('switch (%s) {\n' % t[2], '}\n')
1540         t[0] = codeObj
1541
1542     # The opt_default statement serves only to push the "default
1543     # defaults" onto defaultStack.  This value will be used by nested
1544     # decode blocks, and used and popped off when the current
1545     # decode_block is processed (in p_decode_block() above).
1546     def p_opt_default_0(self, t):
1547         'opt_default : empty'
1548         # no default specified: reuse the one currently at the top of
1549         # the stack
1550         self.defaultStack.push(self.defaultStack.top())
1551         # no meaningful value returned
1552         t[0] = None
1553
1554     def p_opt_default_1(self, t):
1555         'opt_default : DEFAULT inst'
1556         # push the new default
1557         codeObj = t[2]
1558         codeObj.wrap_decode_block('\ndefault:\n', 'break;\n')
1559         self.defaultStack.push(codeObj)
1560         # no meaningful value returned
1561         t[0] = None
1562
1563     def p_decode_stmt_list_0(self, t):
1564         'decode_stmt_list : decode_stmt'
1565         t[0] = t[1]
1566
1567     def p_decode_stmt_list_1(self, t):
1568         'decode_stmt_list : decode_stmt decode_stmt_list'
1569         if (t[1].has_decode_default and t[2].has_decode_default):
1570             error(t, 'Two default cases in decode block')
1571         t[0] = t[1] + t[2]
1572
1573     #
1574     # Decode statement rules
1575     #
1576     # There are four types of statements allowed in a decode block:
1577     # 1. Format blocks 'format <foo> { ... }'
1578     # 2. Nested decode blocks
1579     # 3. Instruction definitions.
1580     # 4. C preprocessor directives.
1581
1582
1583     # Preprocessor directives found in a decode statement list are
1584     # passed through to the output, replicated to all of the output
1585     # code streams.  This works well for ifdefs, so we can ifdef out
1586     # both the declarations and the decode cases generated by an
1587     # instruction definition.  Handling them as part of the grammar
1588     # makes it easy to keep them in the right place with respect to
1589     # the code generated by the other statements.
1590     def p_decode_stmt_cpp(self, t):
1591         'decode_stmt : CPPDIRECTIVE'
1592         t[0] = GenCode(self, t[1], t[1], t[1], t[1])
1593
1594     # A format block 'format <foo> { ... }' sets the default
1595     # instruction format used to handle instruction definitions inside
1596     # the block.  This format can be overridden by using an explicit
1597     # format on the instruction definition or with a nested format
1598     # block.
1599     def p_decode_stmt_format(self, t):
1600         'decode_stmt : FORMAT push_format_id LBRACE decode_stmt_list RBRACE'
1601         # The format will be pushed on the stack when 'push_format_id'
1602         # is processed (see below).  Once the parser has recognized
1603         # the full production (though the right brace), we're done
1604         # with the format, so now we can pop it.
1605         self.formatStack.pop()
1606         t[0] = t[4]
1607
1608     # This rule exists so we can set the current format (& push the
1609     # stack) when we recognize the format name part of the format
1610     # block.
1611     def p_push_format_id(self, t):
1612         'push_format_id : ID'
1613         try:
1614             self.formatStack.push(self.formatMap[t[1]])
1615             t[0] = ('', '// format %s' % t[1])
1616         except KeyError:
1617             error(t, 'instruction format "%s" not defined.' % t[1])
1618
1619     # Nested decode block: if the value of the current field matches
1620     # the specified constant, do a nested decode on some other field.
1621     def p_decode_stmt_decode(self, t):
1622         'decode_stmt : case_label COLON decode_block'
1623         label = t[1]
1624         codeObj = t[3]
1625         # just wrap the decoding code from the block as a case in the
1626         # outer switch statement.
1627         codeObj.wrap_decode_block('\n%s:\n' % label)
1628         codeObj.has_decode_default = (label == 'default')
1629         t[0] = codeObj
1630
1631     # Instruction definition (finally!).
1632     def p_decode_stmt_inst(self, t):
1633         'decode_stmt : case_label COLON inst SEMI'
1634         label = t[1]
1635         codeObj = t[3]
1636         codeObj.wrap_decode_block('\n%s:' % label, 'break;\n')
1637         codeObj.has_decode_default = (label == 'default')
1638         t[0] = codeObj
1639
1640     # The case label is either a list of one or more constants or
1641     # 'default'
1642     def p_case_label_0(self, t):
1643         'case_label : intlit_list'
1644         def make_case(intlit):
1645             if intlit >= 2**32:
1646                 return 'case ULL(%#x)' % intlit
1647             else:
1648                 return 'case %#x' % intlit
1649         t[0] = ': '.join(map(make_case, t[1]))
1650
1651     def p_case_label_1(self, t):
1652         'case_label : DEFAULT'
1653         t[0] = 'default'
1654
1655     #
1656     # The constant list for a decode case label must be non-empty, but
1657     # may have one or more comma-separated integer literals in it.
1658     #
1659     def p_intlit_list_0(self, t):
1660         'intlit_list : INTLIT'
1661         t[0] = [t[1]]
1662
1663     def p_intlit_list_1(self, t):
1664         'intlit_list : intlit_list COMMA INTLIT'
1665         t[0] = t[1]
1666         t[0].append(t[3])
1667
1668     # Define an instruction using the current instruction format
1669     # (specified by an enclosing format block).
1670     # "<mnemonic>(<args>)"
1671     def p_inst_0(self, t):
1672         'inst : ID LPAREN arg_list RPAREN'
1673         # Pass the ID and arg list to the current format class to deal with.
1674         currentFormat = self.formatStack.top()
1675         codeObj = currentFormat.defineInst(self, t[1], t[3], t.lexer.lineno)
1676         args = ','.join(map(str, t[3]))
1677         args = re.sub('(?m)^', '//', args)
1678         args = re.sub('^//', '', args)
1679         comment = '\n// %s::%s(%s)\n' % (currentFormat.id, t[1], args)
1680         codeObj.prepend_all(comment)
1681         t[0] = codeObj
1682
1683     # Define an instruction using an explicitly specified format:
1684     # "<fmt>::<mnemonic>(<args>)"
1685     def p_inst_1(self, t):
1686         'inst : ID DBLCOLON ID LPAREN arg_list RPAREN'
1687         try:
1688             format = self.formatMap[t[1]]
1689         except KeyError:
1690             error(t, 'instruction format "%s" not defined.' % t[1])
1691
1692         codeObj = format.defineInst(self, t[3], t[5], t.lexer.lineno)
1693         comment = '\n// %s::%s(%s)\n' % (t[1], t[3], t[5])
1694         codeObj.prepend_all(comment)
1695         t[0] = codeObj
1696
1697     # The arg list generates a tuple, where the first element is a
1698     # list of the positional args and the second element is a dict
1699     # containing the keyword args.
1700     def p_arg_list_0(self, t):
1701         'arg_list : positional_arg_list COMMA keyword_arg_list'
1702         t[0] = ( t[1], t[3] )
1703
1704     def p_arg_list_1(self, t):
1705         'arg_list : positional_arg_list'
1706         t[0] = ( t[1], {} )
1707
1708     def p_arg_list_2(self, t):
1709         'arg_list : keyword_arg_list'
1710         t[0] = ( [], t[1] )
1711
1712     def p_positional_arg_list_0(self, t):
1713         'positional_arg_list : empty'
1714         t[0] = []
1715
1716     def p_positional_arg_list_1(self, t):
1717         'positional_arg_list : expr'
1718         t[0] = [t[1]]
1719
1720     def p_positional_arg_list_2(self, t):
1721         'positional_arg_list : positional_arg_list COMMA expr'
1722         t[0] = t[1] + [t[3]]
1723
1724     def p_keyword_arg_list_0(self, t):
1725         'keyword_arg_list : keyword_arg'
1726         t[0] = t[1]
1727
1728     def p_keyword_arg_list_1(self, t):
1729         'keyword_arg_list : keyword_arg_list COMMA keyword_arg'
1730         t[0] = t[1]
1731         t[0].update(t[3])
1732
1733     def p_keyword_arg(self, t):
1734         'keyword_arg : ID EQUALS expr'
1735         t[0] = { t[1] : t[3] }
1736
1737     #
1738     # Basic expressions.  These constitute the argument values of
1739     # "function calls" (i.e. instruction definitions in the decode
1740     # block) and default values for formal parameters of format
1741     # functions.
1742     #
1743     # Right now, these are either strings, integers, or (recursively)
1744     # lists of exprs (using Python square-bracket list syntax).  Note
1745     # that bare identifiers are trated as string constants here (since
1746     # there isn't really a variable namespace to refer to).
1747     #
1748     def p_expr_0(self, t):
1749         '''expr : ID
1750                 | INTLIT
1751                 | STRLIT
1752                 | CODELIT'''
1753         t[0] = t[1]
1754
1755     def p_expr_1(self, t):
1756         '''expr : LBRACKET list_expr RBRACKET'''
1757         t[0] = t[2]
1758
1759     def p_list_expr_0(self, t):
1760         'list_expr : expr'
1761         t[0] = [t[1]]
1762
1763     def p_list_expr_1(self, t):
1764         'list_expr : list_expr COMMA expr'
1765         t[0] = t[1] + [t[3]]
1766
1767     def p_list_expr_2(self, t):
1768         'list_expr : empty'
1769         t[0] = []
1770
1771     #
1772     # Empty production... use in other rules for readability.
1773     #
1774     def p_empty(self, t):
1775         'empty :'
1776         pass
1777
1778     # Parse error handler.  Note that the argument here is the
1779     # offending *token*, not a grammar symbol (hence the need to use
1780     # t.value)
1781     def p_error(self, t):
1782         if t:
1783             error(t, "syntax error at '%s'" % t.value)
1784         else:
1785             error("unknown syntax error")
1786
1787     # END OF GRAMMAR RULES
1788
1789     def updateExportContext(self):
1790
1791         # create a continuation that allows us to grab the current parser
1792         def wrapInstObjParams(*args):
1793             return InstObjParams(self, *args)
1794         self.exportContext['InstObjParams'] = wrapInstObjParams
1795         self.exportContext.update(self.templateMap)
1796
1797     def defFormat(self, id, params, code, lineno):
1798         '''Define a new format'''
1799
1800         # make sure we haven't already defined this one
1801         if id in self.formatMap:
1802             error(lineno, 'format %s redefined.' % id)
1803
1804         # create new object and store in global map
1805         self.formatMap[id] = Format(id, params, code)
1806
1807     def expandCpuSymbolsToDict(self, template):
1808         '''Expand template with CPU-specific references into a
1809         dictionary with an entry for each CPU model name.  The entry
1810         key is the model name and the corresponding value is the
1811         template with the CPU-specific refs substituted for that
1812         model.'''
1813
1814         # Protect '%'s that don't go with CPU-specific terms
1815         t = re.sub(r'%(?!\(CPU_)', '%%', template)
1816         result = {}
1817         for cpu in self.cpuModels:
1818             result[cpu.name] = t % cpu.strings
1819         return result
1820
1821     def expandCpuSymbolsToString(self, template):
1822         '''*If* the template has CPU-specific references, return a
1823         single string containing a copy of the template for each CPU
1824         model with the corresponding values substituted in.  If the
1825         template has no CPU-specific references, it is returned
1826         unmodified.'''
1827
1828         if template.find('%(CPU_') != -1:
1829             return reduce(lambda x,y: x+y,
1830                           self.expandCpuSymbolsToDict(template).values())
1831         else:
1832             return template
1833
1834     def protectCpuSymbols(self, template):
1835         '''Protect CPU-specific references by doubling the
1836         corresponding '%'s (in preparation for substituting a different
1837         set of references into the template).'''
1838
1839         return re.sub(r'%(?=\(CPU_)', '%%', template)
1840
1841     def protectNonSubstPercents(self, s):
1842         '''Protect any non-dict-substitution '%'s in a format string
1843         (i.e. those not followed by '(')'''
1844
1845         return re.sub(r'%(?!\()', '%%', s)
1846
1847     def buildOperandTypeMap(self, user_dict, lineno):
1848         """Generate operandTypeMap from the user's 'def operand_types'
1849         statement."""
1850         operand_type = {}
1851         for (ext, (desc, size)) in user_dict.iteritems():
1852             if desc == 'signed int':
1853                 ctype = 'int%d_t' % size
1854                 is_signed = 1
1855             elif desc == 'unsigned int':
1856                 ctype = 'uint%d_t' % size
1857                 is_signed = 0
1858             elif desc == 'float':
1859                 is_signed = 1       # shouldn't really matter
1860                 if size == 32:
1861                     ctype = 'float'
1862                 elif size == 64:
1863                     ctype = 'double'
1864             elif desc == 'twin64 int':
1865                 is_signed = 0
1866                 ctype = 'Twin64_t'
1867             elif desc == 'twin32 int':
1868                 is_signed = 0
1869                 ctype = 'Twin32_t'
1870             if ctype == '':
1871                 error(parser, lineno,
1872                       'Unrecognized type description "%s" in user_dict')
1873             operand_type[ext] = (size, ctype, is_signed)
1874
1875         self.operandTypeMap = operand_type
1876
1877     def buildOperandNameMap(self, user_dict, lineno):
1878         operand_name = {}
1879         for op_name, val in user_dict.iteritems():
1880             base_cls_name, dflt_ext, reg_spec, flags, sort_pri = val[:5]
1881             if len(val) > 5:
1882                 read_code = val[5]
1883             else:
1884                 read_code = None
1885             if len(val) > 6:
1886                 write_code = val[6]
1887             else:
1888                 write_code = None
1889             if len(val) > 7:
1890                 error(lineno,
1891                       'error: too many attributes for operand "%s"' %
1892                       base_cls_name)
1893
1894             # Canonical flag structure is a triple of lists, where each list
1895             # indicates the set of flags implied by this operand always, when
1896             # used as a source, and when used as a dest, respectively.
1897             # For simplicity this can be initialized using a variety of fairly
1898             # obvious shortcuts; we convert these to canonical form here.
1899             if not flags:
1900                 # no flags specified (e.g., 'None')
1901                 flags = ( [], [], [] )
1902             elif isinstance(flags, str):
1903                 # a single flag: assumed to be unconditional
1904                 flags = ( [ flags ], [], [] )
1905             elif isinstance(flags, list):
1906                 # a list of flags: also assumed to be unconditional
1907                 flags = ( flags, [], [] )
1908             elif isinstance(flags, tuple):
1909                 # it's a tuple: it should be a triple,
1910                 # but each item could be a single string or a list
1911                 (uncond_flags, src_flags, dest_flags) = flags
1912                 flags = (makeList(uncond_flags),
1913                          makeList(src_flags), makeList(dest_flags))
1914             # Accumulate attributes of new operand class in tmp_dict
1915             tmp_dict = {}
1916             attrList = ['reg_spec', 'flags', 'sort_pri',
1917                         'read_code', 'write_code']
1918             if dflt_ext:
1919                 (dflt_size, dflt_ctype, dflt_is_signed) = \
1920                             self.operandTypeMap[dflt_ext]
1921                 attrList.extend(['dflt_size', 'dflt_ctype',
1922                                  'dflt_is_signed', 'dflt_ext'])
1923             for attr in attrList:
1924                 tmp_dict[attr] = eval(attr)
1925             tmp_dict['base_name'] = op_name
1926             # New class name will be e.g. "IntReg_Ra"
1927             cls_name = base_cls_name + '_' + op_name
1928             # Evaluate string arg to get class object.  Note that the
1929             # actual base class for "IntReg" is "IntRegOperand", i.e. we
1930             # have to append "Operand".
1931             try:
1932                 base_cls = eval(base_cls_name + 'Operand')
1933             except NameError:
1934                 error(lineno,
1935                       'error: unknown operand base class "%s"' % base_cls_name)
1936             # The following statement creates a new class called
1937             # <cls_name> as a subclass of <base_cls> with the attributes
1938             # in tmp_dict, just as if we evaluated a class declaration.
1939             operand_name[op_name] = type(cls_name, (base_cls,), tmp_dict)
1940
1941         self.operandNameMap = operand_name
1942
1943         # Define operand variables.
1944         operands = user_dict.keys()
1945
1946         operandsREString = (r'''
1947         (?<![\w\.])      # neg. lookbehind assertion: prevent partial matches
1948         ((%s)(?:\.(\w+))?)   # match: operand with optional '.' then suffix
1949         (?![\w\.])       # neg. lookahead assertion: prevent partial matches
1950         '''
1951                             % string.join(operands, '|'))
1952
1953         self.operandsRE = re.compile(operandsREString, re.MULTILINE|re.VERBOSE)
1954
1955         # Same as operandsREString, but extension is mandatory, and only two
1956         # groups are returned (base and ext, not full name as above).
1957         # Used for subtituting '_' for '.' to make C++ identifiers.
1958         operandsWithExtREString = (r'(?<![\w\.])(%s)\.(\w+)(?![\w\.])'
1959                                    % string.join(operands, '|'))
1960
1961         self.operandsWithExtRE = \
1962             re.compile(operandsWithExtREString, re.MULTILINE)
1963
1964     def substMungedOpNames(self, code):
1965         '''Munge operand names in code string to make legal C++
1966         variable names.  This means getting rid of the type extension
1967         if any.  Will match base_name attribute of Operand object.)'''
1968         return self.operandsWithExtRE.sub(r'\1', code)
1969
1970     def mungeSnippet(self, s):
1971         '''Fix up code snippets for final substitution in templates.'''
1972         if isinstance(s, str):
1973             return self.substMungedOpNames(substBitOps(s))
1974         else:
1975             return s
1976
1977     def update_if_needed(self, file, contents):
1978         '''Update the output file only if the new contents are
1979         different from the current contents.  Minimizes the files that
1980         need to be rebuilt after minor changes.'''
1981
1982         file = os.path.join(self.output_dir, file)
1983         update = False
1984         if os.access(file, os.R_OK):
1985             f = open(file, 'r')
1986             old_contents = f.read()
1987             f.close()
1988             if contents != old_contents:
1989                 os.remove(file) # in case it's write-protected
1990                 update = True
1991             else:
1992                 print 'File', file, 'is unchanged'
1993         else:
1994             update = True
1995         if update:
1996             f = open(file, 'w')
1997             f.write(contents)
1998             f.close()
1999
2000     # This regular expression matches '##include' directives
2001     includeRE = re.compile(r'^\s*##include\s+"(?P<filename>[\w/.-]*)".*$',
2002                            re.MULTILINE)
2003
2004     def replace_include(self, matchobj, dirname):
2005         """Function to replace a matched '##include' directive with the
2006         contents of the specified file (with nested ##includes
2007         replaced recursively).  'matchobj' is an re match object
2008         (from a match of includeRE) and 'dirname' is the directory
2009         relative to which the file path should be resolved."""
2010
2011         fname = matchobj.group('filename')
2012         full_fname = os.path.normpath(os.path.join(dirname, fname))
2013         contents = '##newfile "%s"\n%s\n##endfile\n' % \
2014                    (full_fname, self.read_and_flatten(full_fname))
2015         return contents
2016
2017     def read_and_flatten(self, filename):
2018         """Read a file and recursively flatten nested '##include' files."""
2019
2020         current_dir = os.path.dirname(filename)
2021         try:
2022             contents = open(filename).read()
2023         except IOError:
2024             error('Error including file "%s"' % filename)
2025
2026         self.fileNameStack.push((filename, 0))
2027
2028         # Find any includes and include them
2029         def replace(matchobj):
2030             return self.replace_include(matchobj, current_dir)
2031         contents = self.includeRE.sub(replace, contents)
2032
2033         self.fileNameStack.pop()
2034         return contents
2035
2036     def _parse_isa_desc(self, isa_desc_file):
2037         '''Read in and parse the ISA description.'''
2038
2039         # Read file and (recursively) all included files into a string.
2040         # PLY requires that the input be in a single string so we have to
2041         # do this up front.
2042         isa_desc = self.read_and_flatten(isa_desc_file)
2043
2044         # Initialize filename stack with outer file.
2045         self.fileNameStack.push((isa_desc_file, 0))
2046
2047         # Parse it.
2048         (isa_name, namespace, global_code, namespace_code) = \
2049                    self.parse(isa_desc)
2050
2051         # grab the last three path components of isa_desc_file to put in
2052         # the output
2053         filename = '/'.join(isa_desc_file.split('/')[-3:])
2054
2055         # generate decoder.hh
2056         includes = '#include "base/bitfield.hh" // for bitfield support'
2057         global_output = global_code.header_output
2058         namespace_output = namespace_code.header_output
2059         decode_function = ''
2060         self.update_if_needed('decoder.hh', file_template % vars())
2061
2062         # generate decoder.cc
2063         includes = '#include "decoder.hh"'
2064         global_output = global_code.decoder_output
2065         namespace_output = namespace_code.decoder_output
2066         # namespace_output += namespace_code.decode_block
2067         decode_function = namespace_code.decode_block
2068         self.update_if_needed('decoder.cc', file_template % vars())
2069
2070         # generate per-cpu exec files
2071         for cpu in self.cpuModels:
2072             includes = '#include "decoder.hh"\n'
2073             includes += cpu.includes
2074             global_output = global_code.exec_output[cpu.name]
2075             namespace_output = namespace_code.exec_output[cpu.name]
2076             decode_function = ''
2077             self.update_if_needed(cpu.filename, file_template % vars())
2078
2079         # The variable names here are hacky, but this will creat local
2080         # variables which will be referenced in vars() which have the
2081         # value of the globals.
2082         MaxInstSrcRegs = self.maxInstSrcRegs
2083         MaxInstDestRegs = self.maxInstDestRegs
2084         # max_inst_regs.hh
2085         self.update_if_needed('max_inst_regs.hh',
2086                               max_inst_regs_template % vars())
2087
2088     def parse_isa_desc(self, *args, **kwargs):
2089         try:
2090             self._parse_isa_desc(*args, **kwargs)
2091         except ISAParserError, e:
2092             e.exit(self.fileNameStack)
2093
2094 # Called as script: get args from command line.
2095 # Args are: <path to cpu_models.py> <isa desc file> <output dir> <cpu models>
2096 if __name__ == '__main__':
2097     execfile(sys.argv[1])  # read in CpuModel definitions
2098     cpu_models = [CpuModel.dict[cpu] for cpu in sys.argv[4:]]
2099     ISAParser(sys.argv[3], cpu_models).parse_isa_desc(sys.argv[2])