src/arch/isa_parser.py

   1 # Copyright (c) 2003-2005 The Regents of The University of Michigan
   2 # All rights reserved.
   3 #
   4 # Redistribution and use in source and binary forms, with or without
   5 # modification, are permitted provided that the following conditions are
   6 # met: redistributions of source code must retain the above copyright
   7 # notice, this list of conditions and the following disclaimer;
   8 # redistributions in binary form must reproduce the above copyright
   9 # notice, this list of conditions and the following disclaimer in the
  10 # documentation and/or other materials provided with the distribution;
  11 # neither the name of the copyright holders nor the names of its
  12 # contributors may be used to endorse or promote products derived from
  13 # this software without specific prior written permission.
  14 #
  15 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  16 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  17 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  18 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  19 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  20 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  21 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  22 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  23 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  25 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26 #
  27 # Authors: Steve Reinhardt
  28
  29 import os
  30 import sys
  31 import re
  32 import string
  33 import inspect, traceback
  34 # get type names
  35 from types import *
  36
  37 from m5.util.grammar import Grammar
  38
  39 debug=False
  40
  41 ###################
  42 # Utility functions
  43
  44 #
  45 # Indent every line in string 's' by two spaces
  46 # (except preprocessor directives).
  47 # Used to make nested code blocks look pretty.
  48 #
  49 def indent(s):
  50     return re.sub(r'(?m)^(?!#)', '  ', s)
  51
  52 #
  53 # Munge a somewhat arbitrarily formatted piece of Python code
  54 # (e.g. from a format 'let' block) into something whose indentation
  55 # will get by the Python parser.
  56 #
  57 # The two keys here are that Python will give a syntax error if
  58 # there's any whitespace at the beginning of the first line, and that
  59 # all lines at the same lexical nesting level must have identical
  60 # indentation.  Unfortunately the way code literals work, an entire
  61 # let block tends to have some initial indentation.  Rather than
  62 # trying to figure out what that is and strip it off, we prepend 'if
  63 # 1:' to make the let code the nested block inside the if (and have
  64 # the parser automatically deal with the indentation for us).
  65 #
  66 # We don't want to do this if (1) the code block is empty or (2) the
  67 # first line of the block doesn't have any whitespace at the front.
  68
  69 def fixPythonIndentation(s):
  70     # get rid of blank lines first
  71     s = re.sub(r'(?m)^\s*\n', '', s);
  72     if (s != '' and re.match(r'[ \t]', s[0])):
  73         s = 'if 1:\n' + s
  74     return s
  75
  76 class ISAParserError(Exception):
  77     """Error handler for parser errors"""
  78     def __init__(self, first, second=None):
  79         if second is None:
  80             self.lineno = 0
  81             self.string = first
  82         else:
  83             if hasattr(first, 'lexer'):
  84                 first = first.lexer.lineno
  85             self.lineno = first
  86             self.string = second
  87
  88     def display(self, filename_stack, print_traceback=debug):
  89         # Output formatted to work under Emacs compile-mode.  Optional
  90         # 'print_traceback' arg, if set to True, prints a Python stack
  91         # backtrace too (can be handy when trying to debug the parser
  92         # itself).
  93
  94         spaces = ""
  95         for (filename, line) in filename_stack[:-1]:
  96             print "%sIn file included from %s:" % (spaces, filename)
  97             spaces += "  "
  98
  99         # Print a Python stack backtrace if requested.
 100         if print_traceback or not self.lineno:
 101             traceback.print_exc()
 102
 103         line_str = "%s:" % (filename_stack[-1][0], )
 104         if self.lineno:
 105             line_str += "%d:" % (self.lineno, )
 106
 107         return "%s%s %s" % (spaces, line_str, self.string)
 108
 109     def exit(self, filename_stack, print_traceback=debug):
 110         # Just call exit.
 111
 112         sys.exit(self.display(filename_stack, print_traceback))
 113
 114 def error(*args):
 115     raise ISAParserError(*args)
 116
 117 ####################
 118 # Template objects.
 119 #
 120 # Template objects are format strings that allow substitution from
 121 # the attribute spaces of other objects (e.g. InstObjParams instances).
 122
 123 labelRE = re.compile(r'(?<!%)%\(([^\)]+)\)[sd]')
 124
 125 class Template(object):
 126     def __init__(self, t):
 127         self.template = t
 128
 129     def subst(self, d):
 130         myDict = None
 131
 132         # Protect non-Python-dict substitutions (e.g. if there's a printf
 133         # in the templated C++ code)
 134         template = protect_non_subst_percents(self.template)
 135         # CPU-model-specific substitutions are handled later (in GenCode).
 136         template = protect_cpu_symbols(template)
 137
 138         # Build a dict ('myDict') to use for the template substitution.
 139         # Start with the template namespace.  Make a copy since we're
 140         # going to modify it.
 141         myDict = parser.templateMap.copy()
 142
 143         if isinstance(d, InstObjParams):
 144             # If we're dealing with an InstObjParams object, we need
 145             # to be a little more sophisticated.  The instruction-wide
 146             # parameters are already formed, but the parameters which
 147             # are only function wide still need to be generated.
 148             compositeCode = ''
 149
 150             myDict.update(d.__dict__)
 151             # The "operands" and "snippets" attributes of the InstObjParams
 152             # objects are for internal use and not substitution.
 153             del myDict['operands']
 154             del myDict['snippets']
 155
 156             snippetLabels = [l for l in labelRE.findall(template)
 157                              if d.snippets.has_key(l)]
 158
 159             snippets = dict([(s, mungeSnippet(d.snippets[s]))
 160                              for s in snippetLabels])
 161
 162             myDict.update(snippets)
 163
 164             compositeCode = ' '.join(map(str, snippets.values()))
 165
 166             # Add in template itself in case it references any
 167             # operands explicitly (like Mem)
 168             compositeCode += ' ' + template
 169
 170             operands = SubOperandList(compositeCode, d.operands)
 171
 172             myDict['op_decl'] = operands.concatAttrStrings('op_decl')
 173
 174             is_src = lambda op: op.is_src
 175             is_dest = lambda op: op.is_dest
 176
 177             myDict['op_src_decl'] = \
 178                       operands.concatSomeAttrStrings(is_src, 'op_src_decl')
 179             myDict['op_dest_decl'] = \
 180                       operands.concatSomeAttrStrings(is_dest, 'op_dest_decl')
 181
 182             myDict['op_rd'] = operands.concatAttrStrings('op_rd')
 183             myDict['op_wb'] = operands.concatAttrStrings('op_wb')
 184
 185             if d.operands.memOperand:
 186                 myDict['mem_acc_size'] = d.operands.memOperand.mem_acc_size
 187                 myDict['mem_acc_type'] = d.operands.memOperand.mem_acc_type
 188
 189         elif isinstance(d, dict):
 190             # if the argument is a dictionary, we just use it.
 191             myDict.update(d)
 192         elif hasattr(d, '__dict__'):
 193             # if the argument is an object, we use its attribute map.
 194             myDict.update(d.__dict__)
 195         else:
 196             raise TypeError, "Template.subst() arg must be or have dictionary"
 197         return template % myDict
 198
 199     # Convert to string.  This handles the case when a template with a
 200     # CPU-specific term gets interpolated into another template or into
 201     # an output block.
 202     def __str__(self):
 203         return expand_cpu_symbols_to_string(self.template)
 204
 205 ################
 206 # Format object.
 207 #
 208 # A format object encapsulates an instruction format.  It must provide
 209 # a defineInst() method that generates the code for an instruction
 210 # definition.
 211
 212 class Format(object):
 213     def __init__(self, parser, id, params, code):
 214         self.parser = parser
 215         self.id = id
 216         self.params = params
 217         label = 'def format ' + id
 218         self.user_code = compile(fixPythonIndentation(code), label, 'exec')
 219         param_list = string.join(params, ", ")
 220         f = '''def defInst(_code, _context, %s):
 221                 my_locals = vars().copy()
 222                 exec _code in _context, my_locals
 223                 return my_locals\n''' % param_list
 224         c = compile(f, label + ' wrapper', 'exec')
 225         exec c
 226         self.func = defInst
 227
 228     def defineInst(self, name, args, lineno):
 229         self.parser.updateExportContext()
 230         context = self.parser.exportContext.copy()
 231         if len(name):
 232             Name = name[0].upper()
 233             if len(name) > 1:
 234                 Name += name[1:]
 235         context.update({ 'name' : name, 'Name' : Name })
 236         try:
 237             vars = self.func(self.user_code, context, *args[0], **args[1])
 238         except Exception, exc:
 239             if debug:
 240                 raise
 241             error(lineno, 'error defining "%s": %s.' % (name, exc))
 242         for k in vars.keys():
 243             if k not in ('header_output', 'decoder_output',
 244                          'exec_output', 'decode_block'):
 245                 del vars[k]
 246         return GenCode(**vars)
 247
 248 # Special null format to catch an implicit-format instruction
 249 # definition outside of any format block.
 250 class NoFormat(object):
 251     def __init__(self):
 252         self.defaultInst = ''
 253
 254     def defineInst(self, name, args, lineno):
 255         error(lineno,
 256               'instruction definition "%s" with no active format!' % name)
 257
 258 #####################################################################
 259 #
 260 #                           Support Classes
 261 #
 262 #####################################################################
 263
 264 # Expand template with CPU-specific references into a dictionary with
 265 # an entry for each CPU model name.  The entry key is the model name
 266 # and the corresponding value is the template with the CPU-specific
 267 # refs substituted for that model.
 268 def expand_cpu_symbols_to_dict(template):
 269     # Protect '%'s that don't go with CPU-specific terms
 270     t = re.sub(r'%(?!\(CPU_)', '%%', template)
 271     result = {}
 272     for cpu in cpu_models:
 273         result[cpu.name] = t % cpu.strings
 274     return result
 275
 276 # *If* the template has CPU-specific references, return a single
 277 # string containing a copy of the template for each CPU model with the
 278 # corresponding values substituted in.  If the template has no
 279 # CPU-specific references, it is returned unmodified.
 280 def expand_cpu_symbols_to_string(template):
 281     if template.find('%(CPU_') != -1:
 282         return reduce(lambda x,y: x+y,
 283                       expand_cpu_symbols_to_dict(template).values())
 284     else:
 285         return template
 286
 287 # Protect CPU-specific references by doubling the corresponding '%'s
 288 # (in preparation for substituting a different set of references into
 289 # the template).
 290 def protect_cpu_symbols(template):
 291     return re.sub(r'%(?=\(CPU_)', '%%', template)
 292
 293 # Protect any non-dict-substitution '%'s in a format string
 294 # (i.e. those not followed by '(')
 295 def protect_non_subst_percents(s):
 296     return re.sub(r'%(?!\()', '%%', s)
 297
 298 ###############
 299 # GenCode class
 300 #
 301 # The GenCode class encapsulates generated code destined for various
 302 # output files.  The header_output and decoder_output attributes are
 303 # strings containing code destined for decoder.hh and decoder.cc
 304 # respectively.  The decode_block attribute contains code to be
 305 # incorporated in the decode function itself (that will also end up in
 306 # decoder.cc).  The exec_output attribute is a dictionary with a key
 307 # for each CPU model name; the value associated with a particular key
 308 # is the string of code for that CPU model's exec.cc file.  The
 309 # has_decode_default attribute is used in the decode block to allow
 310 # explicit default clauses to override default default clauses.
 311
 312 class GenCode(object):
 313     # Constructor.  At this point we substitute out all CPU-specific
 314     # symbols.  For the exec output, these go into the per-model
 315     # dictionary.  For all other output types they get collapsed into
 316     # a single string.
 317     def __init__(self,
 318                  header_output = '', decoder_output = '', exec_output = '',
 319                  decode_block = '', has_decode_default = False):
 320         self.header_output = expand_cpu_symbols_to_string(header_output)
 321         self.decoder_output = expand_cpu_symbols_to_string(decoder_output)
 322         if isinstance(exec_output, dict):
 323             self.exec_output = exec_output
 324         elif isinstance(exec_output, str):
 325             # If the exec_output arg is a single string, we replicate
 326             # it for each of the CPU models, substituting and
 327             # %(CPU_foo)s params appropriately.
 328             self.exec_output = expand_cpu_symbols_to_dict(exec_output)
 329         self.decode_block = expand_cpu_symbols_to_string(decode_block)
 330         self.has_decode_default = has_decode_default
 331
 332     # Override '+' operator: generate a new GenCode object that
 333     # concatenates all the individual strings in the operands.
 334     def __add__(self, other):
 335         exec_output = {}
 336         for cpu in cpu_models:
 337             n = cpu.name
 338             exec_output[n] = self.exec_output[n] + other.exec_output[n]
 339         return GenCode(self.header_output + other.header_output,
 340                        self.decoder_output + other.decoder_output,
 341                        exec_output,
 342                        self.decode_block + other.decode_block,
 343                        self.has_decode_default or other.has_decode_default)
 344
 345     # Prepend a string (typically a comment) to all the strings.
 346     def prepend_all(self, pre):
 347         self.header_output = pre + self.header_output
 348         self.decoder_output  = pre + self.decoder_output
 349         self.decode_block = pre + self.decode_block
 350         for cpu in cpu_models:
 351             self.exec_output[cpu.name] = pre + self.exec_output[cpu.name]
 352
 353     # Wrap the decode block in a pair of strings (e.g., 'case foo:'
 354     # and 'break;').  Used to build the big nested switch statement.
 355     def wrap_decode_block(self, pre, post = ''):
 356         self.decode_block = pre + indent(self.decode_block) + post
 357
 358 #####################################################################
 359 #
 360 #                      Bitfield Operator Support
 361 #
 362 #####################################################################
 363
 364 bitOp1ArgRE = re.compile(r'<\s*(\w+)\s*:\s*>')
 365
 366 bitOpWordRE = re.compile(r'(?<![\w\.])([\w\.]+)<\s*(\w+)\s*:\s*(\w+)\s*>')
 367 bitOpExprRE = re.compile(r'\)<\s*(\w+)\s*:\s*(\w+)\s*>')
 368
 369 def substBitOps(code):
 370     # first convert single-bit selectors to two-index form
 371     # i.e., <n> --> <n:n>
 372     code = bitOp1ArgRE.sub(r'<\1:\1>', code)
 373     # simple case: selector applied to ID (name)
 374     # i.e., foo<a:b> --> bits(foo, a, b)
 375     code = bitOpWordRE.sub(r'bits(\1, \2, \3)', code)
 376     # if selector is applied to expression (ending in ')'),
 377     # we need to search backward for matching '('
 378     match = bitOpExprRE.search(code)
 379     while match:
 380         exprEnd = match.start()
 381         here = exprEnd - 1
 382         nestLevel = 1
 383         while nestLevel > 0:
 384             if code[here] == '(':
 385                 nestLevel -= 1
 386             elif code[here] == ')':
 387                 nestLevel += 1
 388             here -= 1
 389             if here < 0:
 390                 sys.exit("Didn't find '('!")
 391         exprStart = here+1
 392         newExpr = r'bits(%s, %s, %s)' % (code[exprStart:exprEnd+1],
 393                                          match.group(1), match.group(2))
 394         code = code[:exprStart] + newExpr + code[match.end():]
 395         match = bitOpExprRE.search(code)
 396     return code
 397
 398
 399 #####################################################################
 400 #
 401 #                             Code Parser
 402 #
 403 # The remaining code is the support for automatically extracting
 404 # instruction characteristics from pseudocode.
 405 #
 406 #####################################################################
 407
 408 # Force the argument to be a list.  Useful for flags, where a caller
 409 # can specify a singleton flag or a list of flags.  Also usful for
 410 # converting tuples to lists so they can be modified.
 411 def makeList(arg):
 412     if isinstance(arg, list):
 413         return arg
 414     elif isinstance(arg, tuple):
 415         return list(arg)
 416     elif not arg:
 417         return []
 418     else:
 419         return [ arg ]
 420
 421 # Generate operandTypeMap from the user's 'def operand_types'
 422 # statement.
 423 def buildOperandTypeMap(user_dict, lineno):
 424     global operandTypeMap
 425     operandTypeMap = {}
 426     for (ext, (desc, size)) in user_dict.iteritems():
 427         if desc == 'signed int':
 428             ctype = 'int%d_t' % size
 429             is_signed = 1
 430         elif desc == 'unsigned int':
 431             ctype = 'uint%d_t' % size
 432             is_signed = 0
 433         elif desc == 'float':
 434             is_signed = 1       # shouldn't really matter
 435             if size == 32:
 436                 ctype = 'float'
 437             elif size == 64:
 438                 ctype = 'double'
 439         elif desc == 'twin64 int':
 440             is_signed = 0
 441             ctype = 'Twin64_t'
 442         elif desc == 'twin32 int':
 443             is_signed = 0
 444             ctype = 'Twin32_t'
 445         if ctype == '':
 446             error(lineno, 'Unrecognized type description "%s" in user_dict')
 447         operandTypeMap[ext] = (size, ctype, is_signed)
 448
 449 class Operand(object):
 450     '''Base class for operand descriptors.  An instance of this class
 451     (or actually a class derived from this one) represents a specific
 452     operand for a code block (e.g, "Rc.sq" as a dest). Intermediate
 453     derived classes encapsulates the traits of a particular operand
 454     type (e.g., "32-bit integer register").'''
 455
 456     def buildReadCode(self, func = None):
 457         code = self.read_code % {"name": self.base_name,
 458                                  "func": func,
 459                                  "op_idx": self.src_reg_idx,
 460                                  "reg_idx": self.reg_spec,
 461                                  "size": self.size,
 462                                  "ctype": self.ctype}
 463         if self.size != self.dflt_size:
 464             return '%s = bits(%s, %d, 0);\n' % \
 465                    (self.base_name, code, self.size-1)
 466         else:
 467             return '%s = %s;\n' % \
 468                    (self.base_name, code)
 469
 470     def buildWriteCode(self, func = None):
 471         if (self.size != self.dflt_size and self.is_signed):
 472             final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
 473         else:
 474             final_val = self.base_name
 475         code = self.write_code % {"name": self.base_name,
 476                                   "func": func,
 477                                   "op_idx": self.dest_reg_idx,
 478                                   "reg_idx": self.reg_spec,
 479                                   "size": self.size,
 480                                   "ctype": self.ctype,
 481                                   "final_val": final_val}
 482         return '''
 483         {
 484             %s final_val = %s;
 485             %s;
 486             if (traceData) { traceData->setData(final_val); }
 487         }''' % (self.dflt_ctype, final_val, code)
 488
 489     def __init__(self, full_name, ext, is_src, is_dest):
 490         self.full_name = full_name
 491         self.ext = ext
 492         self.is_src = is_src
 493         self.is_dest = is_dest
 494         # The 'effective extension' (eff_ext) is either the actual
 495         # extension, if one was explicitly provided, or the default.
 496         if ext:
 497             self.eff_ext = ext
 498         else:
 499             self.eff_ext = self.dflt_ext
 500
 501         (self.size, self.ctype, self.is_signed) = operandTypeMap[self.eff_ext]
 502
 503         # note that mem_acc_size is undefined for non-mem operands...
 504         # template must be careful not to use it if it doesn't apply.
 505         if self.isMem():
 506             self.mem_acc_size = self.makeAccSize()
 507             if self.ctype in ['Twin32_t', 'Twin64_t']:
 508                 self.mem_acc_type = 'Twin'
 509             else:
 510                 self.mem_acc_type = 'uint'
 511
 512     # Finalize additional fields (primarily code fields).  This step
 513     # is done separately since some of these fields may depend on the
 514     # register index enumeration that hasn't been performed yet at the
 515     # time of __init__().
 516     def finalize(self):
 517         self.flags = self.getFlags()
 518         self.constructor = self.makeConstructor()
 519         self.op_decl = self.makeDecl()
 520
 521         if self.is_src:
 522             self.op_rd = self.makeRead()
 523             self.op_src_decl = self.makeDecl()
 524         else:
 525             self.op_rd = ''
 526             self.op_src_decl = ''
 527
 528         if self.is_dest:
 529             self.op_wb = self.makeWrite()
 530             self.op_dest_decl = self.makeDecl()
 531         else:
 532             self.op_wb = ''
 533             self.op_dest_decl = ''
 534
 535     def isMem(self):
 536         return 0
 537
 538     def isReg(self):
 539         return 0
 540
 541     def isFloatReg(self):
 542         return 0
 543
 544     def isIntReg(self):
 545         return 0
 546
 547     def isControlReg(self):
 548         return 0
 549
 550     def getFlags(self):
 551         # note the empty slice '[:]' gives us a copy of self.flags[0]
 552         # instead of a reference to it
 553         my_flags = self.flags[0][:]
 554         if self.is_src:
 555             my_flags += self.flags[1]
 556         if self.is_dest:
 557             my_flags += self.flags[2]
 558         return my_flags
 559
 560     def makeDecl(self):
 561         # Note that initializations in the declarations are solely
 562         # to avoid 'uninitialized variable' errors from the compiler.
 563         return self.ctype + ' ' + self.base_name + ' = 0;\n';
 564
 565 class IntRegOperand(Operand):
 566     def isReg(self):
 567         return 1
 568
 569     def isIntReg(self):
 570         return 1
 571
 572     def makeConstructor(self):
 573         c = ''
 574         if self.is_src:
 575             c += '\n\t_srcRegIdx[%d] = %s;' % \
 576                  (self.src_reg_idx, self.reg_spec)
 577         if self.is_dest:
 578             c += '\n\t_destRegIdx[%d] = %s;' % \
 579                  (self.dest_reg_idx, self.reg_spec)
 580         return c
 581
 582     def makeRead(self):
 583         if (self.ctype == 'float' or self.ctype == 'double'):
 584             error('Attempt to read integer register as FP')
 585         if self.read_code != None:
 586             return self.buildReadCode('readIntRegOperand')
 587         if (self.size == self.dflt_size):
 588             return '%s = xc->readIntRegOperand(this, %d);\n' % \
 589                    (self.base_name, self.src_reg_idx)
 590         elif (self.size > self.dflt_size):
 591             int_reg_val = 'xc->readIntRegOperand(this, %d)' % \
 592                           (self.src_reg_idx)
 593             if (self.is_signed):
 594                 int_reg_val = 'sext<%d>(%s)' % (self.dflt_size, int_reg_val)
 595             return '%s = %s;\n' % (self.base_name, int_reg_val)
 596         else:
 597             return '%s = bits(xc->readIntRegOperand(this, %d), %d, 0);\n' % \
 598                    (self.base_name, self.src_reg_idx, self.size-1)
 599
 600     def makeWrite(self):
 601         if (self.ctype == 'float' or self.ctype == 'double'):
 602             error('Attempt to write integer register as FP')
 603         if self.write_code != None:
 604             return self.buildWriteCode('setIntRegOperand')
 605         if (self.size != self.dflt_size and self.is_signed):
 606             final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
 607         else:
 608             final_val = self.base_name
 609         wb = '''
 610         {
 611             %s final_val = %s;
 612             xc->setIntRegOperand(this, %d, final_val);\n
 613             if (traceData) { traceData->setData(final_val); }
 614         }''' % (self.dflt_ctype, final_val, self.dest_reg_idx)
 615         return wb
 616
 617 class FloatRegOperand(Operand):
 618     def isReg(self):
 619         return 1
 620
 621     def isFloatReg(self):
 622         return 1
 623
 624     def makeConstructor(self):
 625         c = ''
 626         if self.is_src:
 627             c += '\n\t_srcRegIdx[%d] = %s + FP_Base_DepTag;' % \
 628                  (self.src_reg_idx, self.reg_spec)
 629         if self.is_dest:
 630             c += '\n\t_destRegIdx[%d] = %s + FP_Base_DepTag;' % \
 631                  (self.dest_reg_idx, self.reg_spec)
 632         return c
 633
 634     def makeRead(self):
 635         bit_select = 0
 636         if (self.ctype == 'float' or self.ctype == 'double'):
 637             func = 'readFloatRegOperand'
 638         else:
 639             func = 'readFloatRegOperandBits'
 640             if (self.size != self.dflt_size):
 641                 bit_select = 1
 642         base = 'xc->%s(this, %d)' % (func, self.src_reg_idx)
 643         if self.read_code != None:
 644             return self.buildReadCode(func)
 645         if bit_select:
 646             return '%s = bits(%s, %d, 0);\n' % \
 647                    (self.base_name, base, self.size-1)
 648         else:
 649             return '%s = %s;\n' % (self.base_name, base)
 650
 651     def makeWrite(self):
 652         final_val = self.base_name
 653         final_ctype = self.ctype
 654         if (self.ctype == 'float' or self.ctype == 'double'):
 655             func = 'setFloatRegOperand'
 656         elif (self.ctype == 'uint32_t' or self.ctype == 'uint64_t'):
 657             func = 'setFloatRegOperandBits'
 658         else:
 659             func = 'setFloatRegOperandBits'
 660             final_ctype = 'uint%d_t' % self.dflt_size
 661             if (self.size != self.dflt_size and self.is_signed):
 662                 final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
 663         if self.write_code != None:
 664             return self.buildWriteCode(func)
 665         wb = '''
 666         {
 667             %s final_val = %s;
 668             xc->%s(this, %d, final_val);\n
 669             if (traceData) { traceData->setData(final_val); }
 670         }''' % (final_ctype, final_val, func, self.dest_reg_idx)
 671         return wb
 672
 673 class ControlRegOperand(Operand):
 674     def isReg(self):
 675         return 1
 676
 677     def isControlReg(self):
 678         return 1
 679
 680     def makeConstructor(self):
 681         c = ''
 682         if self.is_src:
 683             c += '\n\t_srcRegIdx[%d] = %s + Ctrl_Base_DepTag;' % \
 684                  (self.src_reg_idx, self.reg_spec)
 685         if self.is_dest:
 686             c += '\n\t_destRegIdx[%d] = %s + Ctrl_Base_DepTag;' % \
 687                  (self.dest_reg_idx, self.reg_spec)
 688         return c
 689
 690     def makeRead(self):
 691         bit_select = 0
 692         if (self.ctype == 'float' or self.ctype == 'double'):
 693             error('Attempt to read control register as FP')
 694         if self.read_code != None:
 695             return self.buildReadCode('readMiscRegOperand')
 696         base = 'xc->readMiscRegOperand(this, %s)' % self.src_reg_idx
 697         if self.size == self.dflt_size:
 698             return '%s = %s;\n' % (self.base_name, base)
 699         else:
 700             return '%s = bits(%s, %d, 0);\n' % \
 701                    (self.base_name, base, self.size-1)
 702
 703     def makeWrite(self):
 704         if (self.ctype == 'float' or self.ctype == 'double'):
 705             error('Attempt to write control register as FP')
 706         if self.write_code != None:
 707             return self.buildWriteCode('setMiscRegOperand')
 708         wb = 'xc->setMiscRegOperand(this, %s, %s);\n' % \
 709              (self.dest_reg_idx, self.base_name)
 710         wb += 'if (traceData) { traceData->setData(%s); }' % \
 711               self.base_name
 712         return wb
 713
 714 class MemOperand(Operand):
 715     def isMem(self):
 716         return 1
 717
 718     def makeConstructor(self):
 719         return ''
 720
 721     def makeDecl(self):
 722         # Note that initializations in the declarations are solely
 723         # to avoid 'uninitialized variable' errors from the compiler.
 724         # Declare memory data variable.
 725         if self.ctype in ['Twin32_t','Twin64_t']:
 726             return "%s %s; %s.a = 0; %s.b = 0;\n" % \
 727                    (self.ctype, self.base_name, self.base_name, self.base_name)
 728         return '%s %s = 0;\n' % (self.ctype, self.base_name)
 729
 730     def makeRead(self):
 731         if self.read_code != None:
 732             return self.buildReadCode()
 733         return ''
 734
 735     def makeWrite(self):
 736         if self.write_code != None:
 737             return self.buildWriteCode()
 738         return ''
 739
 740     # Return the memory access size *in bits*, suitable for
 741     # forming a type via "uint%d_t".  Divide by 8 if you want bytes.
 742     def makeAccSize(self):
 743         return self.size
 744
 745 class PCOperand(Operand):
 746     def makeConstructor(self):
 747         return ''
 748
 749     def makeRead(self):
 750         return '%s = xc->readPC();\n' % self.base_name
 751
 752     def makeWrite(self):
 753         return 'xc->setPC(%s);\n' % self.base_name
 754
 755 class UPCOperand(Operand):
 756     def makeConstructor(self):
 757         return ''
 758
 759     def makeRead(self):
 760         if self.read_code != None:
 761             return self.buildReadCode('readMicroPC')
 762         return '%s = xc->readMicroPC();\n' % self.base_name
 763
 764     def makeWrite(self):
 765         if self.write_code != None:
 766             return self.buildWriteCode('setMicroPC')
 767         return 'xc->setMicroPC(%s);\n' % self.base_name
 768
 769 class NUPCOperand(Operand):
 770     def makeConstructor(self):
 771         return ''
 772
 773     def makeRead(self):
 774         if self.read_code != None:
 775             return self.buildReadCode('readNextMicroPC')
 776         return '%s = xc->readNextMicroPC();\n' % self.base_name
 777
 778     def makeWrite(self):
 779         if self.write_code != None:
 780             return self.buildWriteCode('setNextMicroPC')
 781         return 'xc->setNextMicroPC(%s);\n' % self.base_name
 782
 783 class NPCOperand(Operand):
 784     def makeConstructor(self):
 785         return ''
 786
 787     def makeRead(self):
 788         if self.read_code != None:
 789             return self.buildReadCode('readNextPC')
 790         return '%s = xc->readNextPC();\n' % self.base_name
 791
 792     def makeWrite(self):
 793         if self.write_code != None:
 794             return self.buildWriteCode('setNextPC')
 795         return 'xc->setNextPC(%s);\n' % self.base_name
 796
 797 class NNPCOperand(Operand):
 798     def makeConstructor(self):
 799         return ''
 800
 801     def makeRead(self):
 802         if self.read_code != None:
 803             return self.buildReadCode('readNextNPC')
 804         return '%s = xc->readNextNPC();\n' % self.base_name
 805
 806     def makeWrite(self):
 807         if self.write_code != None:
 808             return self.buildWriteCode('setNextNPC')
 809         return 'xc->setNextNPC(%s);\n' % self.base_name
 810
 811 def buildOperandNameMap(user_dict, lineno):
 812     global operandNameMap
 813     operandNameMap = {}
 814     for (op_name, val) in user_dict.iteritems():
 815         (base_cls_name, dflt_ext, reg_spec, flags, sort_pri) = val[:5]
 816         if len(val) > 5:
 817             read_code = val[5]
 818         else:
 819             read_code = None
 820         if len(val) > 6:
 821             write_code = val[6]
 822         else:
 823             write_code = None
 824         if len(val) > 7:
 825             error(lineno,
 826                   'error: too many attributes for operand "%s"' %
 827                   base_cls_name)
 828
 829         (dflt_size, dflt_ctype, dflt_is_signed) = operandTypeMap[dflt_ext]
 830         # Canonical flag structure is a triple of lists, where each list
 831         # indicates the set of flags implied by this operand always, when
 832         # used as a source, and when used as a dest, respectively.
 833         # For simplicity this can be initialized using a variety of fairly
 834         # obvious shortcuts; we convert these to canonical form here.
 835         if not flags:
 836             # no flags specified (e.g., 'None')
 837             flags = ( [], [], [] )
 838         elif isinstance(flags, str):
 839             # a single flag: assumed to be unconditional
 840             flags = ( [ flags ], [], [] )
 841         elif isinstance(flags, list):
 842             # a list of flags: also assumed to be unconditional
 843             flags = ( flags, [], [] )
 844         elif isinstance(flags, tuple):
 845             # it's a tuple: it should be a triple,
 846             # but each item could be a single string or a list
 847             (uncond_flags, src_flags, dest_flags) = flags
 848             flags = (makeList(uncond_flags),
 849                      makeList(src_flags), makeList(dest_flags))
 850         # Accumulate attributes of new operand class in tmp_dict
 851         tmp_dict = {}
 852         for attr in ('dflt_ext', 'reg_spec', 'flags', 'sort_pri',
 853                      'dflt_size', 'dflt_ctype', 'dflt_is_signed',
 854                      'read_code', 'write_code'):
 855             tmp_dict[attr] = eval(attr)
 856         tmp_dict['base_name'] = op_name
 857         # New class name will be e.g. "IntReg_Ra"
 858         cls_name = base_cls_name + '_' + op_name
 859         # Evaluate string arg to get class object.  Note that the
 860         # actual base class for "IntReg" is "IntRegOperand", i.e. we
 861         # have to append "Operand".
 862         try:
 863             base_cls = eval(base_cls_name + 'Operand')
 864         except NameError:
 865             if debug:
 866                 raise
 867             error(lineno,
 868                   'error: unknown operand base class "%s"' % base_cls_name)
 869         # The following statement creates a new class called
 870         # <cls_name> as a subclass of <base_cls> with the attributes
 871         # in tmp_dict, just as if we evaluated a class declaration.
 872         operandNameMap[op_name] = type(cls_name, (base_cls,), tmp_dict)
 873
 874     # Define operand variables.
 875     operands = user_dict.keys()
 876
 877     operandsREString = (r'''
 878     (?<![\w\.])      # neg. lookbehind assertion: prevent partial matches
 879     ((%s)(?:\.(\w+))?)   # match: operand with optional '.' then suffix
 880     (?![\w\.])       # neg. lookahead assertion: prevent partial matches
 881     '''
 882                         % string.join(operands, '|'))
 883
 884     global operandsRE
 885     operandsRE = re.compile(operandsREString, re.MULTILINE|re.VERBOSE)
 886
 887     # Same as operandsREString, but extension is mandatory, and only two
 888     # groups are returned (base and ext, not full name as above).
 889     # Used for subtituting '_' for '.' to make C++ identifiers.
 890     operandsWithExtREString = (r'(?<![\w\.])(%s)\.(\w+)(?![\w\.])'
 891                                % string.join(operands, '|'))
 892
 893     global operandsWithExtRE
 894     operandsWithExtRE = re.compile(operandsWithExtREString, re.MULTILINE)
 895
 896 class OperandList(object):
 897     '''Find all the operands in the given code block.  Returns an operand
 898     descriptor list (instance of class OperandList).'''
 899     def __init__(self, code):
 900         self.items = []
 901         self.bases = {}
 902         # delete comments so we don't match on reg specifiers inside
 903         code = commentRE.sub('', code)
 904         # search for operands
 905         next_pos = 0
 906         while 1:
 907             match = operandsRE.search(code, next_pos)
 908             if not match:
 909                 # no more matches: we're done
 910                 break
 911             op = match.groups()
 912             # regexp groups are operand full name, base, and extension
 913             (op_full, op_base, op_ext) = op
 914             # if the token following the operand is an assignment, this is
 915             # a destination (LHS), else it's a source (RHS)
 916             is_dest = (assignRE.match(code, match.end()) != None)
 917             is_src = not is_dest
 918             # see if we've already seen this one
 919             op_desc = self.find_base(op_base)
 920             if op_desc:
 921                 if op_desc.ext != op_ext:
 922                     error('Inconsistent extensions for operand %s' % \
 923                           op_base)
 924                 op_desc.is_src = op_desc.is_src or is_src
 925                 op_desc.is_dest = op_desc.is_dest or is_dest
 926             else:
 927                 # new operand: create new descriptor
 928                 op_desc = operandNameMap[op_base](op_full, op_ext,
 929                                                   is_src, is_dest)
 930                 self.append(op_desc)
 931             # start next search after end of current match
 932             next_pos = match.end()
 933         self.sort()
 934         # enumerate source & dest register operands... used in building
 935         # constructor later
 936         self.numSrcRegs = 0
 937         self.numDestRegs = 0
 938         self.numFPDestRegs = 0
 939         self.numIntDestRegs = 0
 940         self.memOperand = None
 941         for op_desc in self.items:
 942             if op_desc.isReg():
 943                 if op_desc.is_src:
 944                     op_desc.src_reg_idx = self.numSrcRegs
 945                     self.numSrcRegs += 1
 946                 if op_desc.is_dest:
 947                     op_desc.dest_reg_idx = self.numDestRegs
 948                     self.numDestRegs += 1
 949                     if op_desc.isFloatReg():
 950                         self.numFPDestRegs += 1
 951                     elif op_desc.isIntReg():
 952                         self.numIntDestRegs += 1
 953             elif op_desc.isMem():
 954                 if self.memOperand:
 955                     error("Code block has more than one memory operand.")
 956                 self.memOperand = op_desc
 957         if parser.maxInstSrcRegs < self.numSrcRegs:
 958             parser.maxInstSrcRegs = self.numSrcRegs
 959         if parser.maxInstDestRegs < self.numDestRegs:
 960             parser.maxInstDestRegs = self.numDestRegs
 961         # now make a final pass to finalize op_desc fields that may depend
 962         # on the register enumeration
 963         for op_desc in self.items:
 964             op_desc.finalize()
 965
 966     def __len__(self):
 967         return len(self.items)
 968
 969     def __getitem__(self, index):
 970         return self.items[index]
 971
 972     def append(self, op_desc):
 973         self.items.append(op_desc)
 974         self.bases[op_desc.base_name] = op_desc
 975
 976     def find_base(self, base_name):
 977         # like self.bases[base_name], but returns None if not found
 978         # (rather than raising exception)
 979         return self.bases.get(base_name)
 980
 981     # internal helper function for concat[Some]Attr{Strings|Lists}
 982     def __internalConcatAttrs(self, attr_name, filter, result):
 983         for op_desc in self.items:
 984             if filter(op_desc):
 985                 result += getattr(op_desc, attr_name)
 986         return result
 987
 988     # return a single string that is the concatenation of the (string)
 989     # values of the specified attribute for all operands
 990     def concatAttrStrings(self, attr_name):
 991         return self.__internalConcatAttrs(attr_name, lambda x: 1, '')
 992
 993     # like concatAttrStrings, but only include the values for the operands
 994     # for which the provided filter function returns true
 995     def concatSomeAttrStrings(self, filter, attr_name):
 996         return self.__internalConcatAttrs(attr_name, filter, '')
 997
 998     # return a single list that is the concatenation of the (list)
 999     # values of the specified attribute for all operands
1000     def concatAttrLists(self, attr_name):
1001         return self.__internalConcatAttrs(attr_name, lambda x: 1, [])
1002
1003     # like concatAttrLists, but only include the values for the operands
1004     # for which the provided filter function returns true
1005     def concatSomeAttrLists(self, filter, attr_name):
1006         return self.__internalConcatAttrs(attr_name, filter, [])
1007
1008     def sort(self):
1009         self.items.sort(lambda a, b: a.sort_pri - b.sort_pri)
1010
1011 class SubOperandList(OperandList):
1012     '''Find all the operands in the given code block.  Returns an operand
1013     descriptor list (instance of class OperandList).'''
1014     def __init__(self, code, master_list):
1015         self.items = []
1016         self.bases = {}
1017         # delete comments so we don't match on reg specifiers inside
1018         code = commentRE.sub('', code)
1019         # search for operands
1020         next_pos = 0
1021         while 1:
1022             match = operandsRE.search(code, next_pos)
1023             if not match:
1024                 # no more matches: we're done
1025                 break
1026             op = match.groups()
1027             # regexp groups are operand full name, base, and extension
1028             (op_full, op_base, op_ext) = op
1029             # find this op in the master list
1030             op_desc = master_list.find_base(op_base)
1031             if not op_desc:
1032                 error('Found operand %s which is not in the master list!' \
1033                       ' This is an internal error' % op_base)
1034             else:
1035                 # See if we've already found this operand
1036                 op_desc = self.find_base(op_base)
1037                 if not op_desc:
1038                     # if not, add a reference to it to this sub list
1039                     self.append(master_list.bases[op_base])
1040
1041             # start next search after end of current match
1042             next_pos = match.end()
1043         self.sort()
1044         self.memOperand = None
1045         for op_desc in self.items:
1046             if op_desc.isMem():
1047                 if self.memOperand:
1048                     error("Code block has more than one memory operand.")
1049                 self.memOperand = op_desc
1050
1051 # Regular expression object to match C++ comments
1052 # (used in findOperands())
1053 commentRE = re.compile(r'//.*\n')
1054
1055 # Regular expression object to match assignment statements
1056 # (used in findOperands())
1057 assignRE = re.compile(r'\s*=(?!=)', re.MULTILINE)
1058
1059 # Munge operand names in code string to make legal C++ variable names.
1060 # This means getting rid of the type extension if any.
1061 # (Will match base_name attribute of Operand object.)
1062 def substMungedOpNames(code):
1063     return operandsWithExtRE.sub(r'\1', code)
1064
1065 # Fix up code snippets for final substitution in templates.
1066 def mungeSnippet(s):
1067     if isinstance(s, str):
1068         return substMungedOpNames(substBitOps(s))
1069     else:
1070         return s
1071
1072 def makeFlagConstructor(flag_list):
1073     if len(flag_list) == 0:
1074         return ''
1075     # filter out repeated flags
1076     flag_list.sort()
1077     i = 1
1078     while i < len(flag_list):
1079         if flag_list[i] == flag_list[i-1]:
1080             del flag_list[i]
1081         else:
1082             i += 1
1083     pre = '\n\tflags['
1084     post = '] = true;'
1085     code = pre + string.join(flag_list, post + pre) + post
1086     return code
1087
1088 # Assume all instruction flags are of the form 'IsFoo'
1089 instFlagRE = re.compile(r'Is.*')
1090
1091 # OpClass constants end in 'Op' except No_OpClass
1092 opClassRE = re.compile(r'.*Op|No_OpClass')
1093
1094 class InstObjParams(object):
1095     def __init__(self, mnem, class_name, base_class = '',
1096                  snippets = {}, opt_args = []):
1097         self.mnemonic = mnem
1098         self.class_name = class_name
1099         self.base_class = base_class
1100         if not isinstance(snippets, dict):
1101             snippets = {'code' : snippets}
1102         compositeCode = ' '.join(map(str, snippets.values()))
1103         self.snippets = snippets
1104
1105         self.operands = OperandList(compositeCode)
1106         self.constructor = self.operands.concatAttrStrings('constructor')
1107         self.constructor += \
1108                  '\n\t_numSrcRegs = %d;' % self.operands.numSrcRegs
1109         self.constructor += \
1110                  '\n\t_numDestRegs = %d;' % self.operands.numDestRegs
1111         self.constructor += \
1112                  '\n\t_numFPDestRegs = %d;' % self.operands.numFPDestRegs
1113         self.constructor += \
1114                  '\n\t_numIntDestRegs = %d;' % self.operands.numIntDestRegs
1115         self.flags = self.operands.concatAttrLists('flags')
1116
1117         # Make a basic guess on the operand class (function unit type).
1118         # These are good enough for most cases, and can be overridden
1119         # later otherwise.
1120         if 'IsStore' in self.flags:
1121             self.op_class = 'MemWriteOp'
1122         elif 'IsLoad' in self.flags or 'IsPrefetch' in self.flags:
1123             self.op_class = 'MemReadOp'
1124         elif 'IsFloating' in self.flags:
1125             self.op_class = 'FloatAddOp'
1126         else:
1127             self.op_class = 'IntAluOp'
1128
1129         # Optional arguments are assumed to be either StaticInst flags
1130         # or an OpClass value.  To avoid having to import a complete
1131         # list of these values to match against, we do it ad-hoc
1132         # with regexps.
1133         for oa in opt_args:
1134             if instFlagRE.match(oa):
1135                 self.flags.append(oa)
1136             elif opClassRE.match(oa):
1137                 self.op_class = oa
1138             else:
1139                 error('InstObjParams: optional arg "%s" not recognized '
1140                       'as StaticInst::Flag or OpClass.' % oa)
1141
1142         # add flag initialization to contructor here to include
1143         # any flags added via opt_args
1144         self.constructor += makeFlagConstructor(self.flags)
1145
1146         # if 'IsFloating' is set, add call to the FP enable check
1147         # function (which should be provided by isa_desc via a declare)
1148         if 'IsFloating' in self.flags:
1149             self.fp_enable_check = 'fault = checkFpEnableFault(xc);'
1150         else:
1151             self.fp_enable_check = ''
1152
1153 ##############
1154 # Stack: a simple stack object.  Used for both formats (formatStack)
1155 # and default cases (defaultStack).  Simply wraps a list to give more
1156 # stack-like syntax and enable initialization with an argument list
1157 # (as opposed to an argument that's a list).
1158
1159 class Stack(list):
1160     def __init__(self, *items):
1161         list.__init__(self, items)
1162
1163     def push(self, item):
1164         self.append(item);
1165
1166     def top(self):
1167         return self[-1]
1168
1169 # Global stack that tracks current file and line number.
1170 # Each element is a tuple (filename, lineno) that records the
1171 # *current* filename and the line number in the *previous* file where
1172 # it was included.
1173 fileNameStack = Stack()
1174
1175
1176 #######################
1177 #
1178 # Output file template
1179 #
1180
1181 file_template = '''
1182 /*
1183  * DO NOT EDIT THIS FILE!!!
1184  *
1185  * It was automatically generated from the ISA description in %(filename)s
1186  */
1187
1188 %(includes)s
1189
1190 %(global_output)s
1191
1192 namespace %(namespace)s {
1193
1194 %(namespace_output)s
1195
1196 } // namespace %(namespace)s
1197
1198 %(decode_function)s
1199 '''
1200
1201 max_inst_regs_template = '''
1202 /*
1203  * DO NOT EDIT THIS FILE!!!
1204  *
1205  * It was automatically generated from the ISA description in %(filename)s
1206  */
1207
1208 namespace %(namespace)s {
1209
1210     const int MaxInstSrcRegs = %(MaxInstSrcRegs)d;
1211     const int MaxInstDestRegs = %(MaxInstDestRegs)d;
1212
1213 } // namespace %(namespace)s
1214
1215 '''
1216
1217 class ISAParser(Grammar):
1218     def __init__(self, output_dir):
1219         super(ISAParser, self).__init__()
1220         self.output_dir = output_dir
1221
1222         self.templateMap = {}
1223
1224         # This dictionary maps format name strings to Format objects.
1225         self.formatMap = {}
1226
1227         # The format stack.
1228         self.formatStack = Stack(NoFormat())
1229
1230         # The default case stack.
1231         self.defaultStack = Stack(None)
1232
1233         self.exportContext = {}
1234
1235         self.maxInstSrcRegs = 0
1236         self.maxInstDestRegs = 0
1237
1238     #####################################################################
1239     #
1240     #                                Lexer
1241     #
1242     # The PLY lexer module takes two things as input:
1243     # - A list of token names (the string list 'tokens')
1244     # - A regular expression describing a match for each token.  The
1245     #   regexp for token FOO can be provided in two ways:
1246     #   - as a string variable named t_FOO
1247     #   - as the doc string for a function named t_FOO.  In this case,
1248     #     the function is also executed, allowing an action to be
1249     #     associated with each token match.
1250     #
1251     #####################################################################
1252
1253     # Reserved words.  These are listed separately as they are matched
1254     # using the same regexp as generic IDs, but distinguished in the
1255     # t_ID() function.  The PLY documentation suggests this approach.
1256     reserved = (
1257         'BITFIELD', 'DECODE', 'DECODER', 'DEFAULT', 'DEF', 'EXEC', 'FORMAT',
1258         'HEADER', 'LET', 'NAMESPACE', 'OPERAND_TYPES', 'OPERANDS',
1259         'OUTPUT', 'SIGNED', 'TEMPLATE'
1260         )
1261
1262     # List of tokens.  The lex module requires this.
1263     tokens = reserved + (
1264         # identifier
1265         'ID',
1266
1267         # integer literal
1268         'INTLIT',
1269
1270         # string literal
1271         'STRLIT',
1272
1273         # code literal
1274         'CODELIT',
1275
1276         # ( ) [ ] { } < > , ; . : :: *
1277         'LPAREN', 'RPAREN',
1278         'LBRACKET', 'RBRACKET',
1279         'LBRACE', 'RBRACE',
1280         'LESS', 'GREATER', 'EQUALS',
1281         'COMMA', 'SEMI', 'DOT', 'COLON', 'DBLCOLON',
1282         'ASTERISK',
1283
1284         # C preprocessor directives
1285         'CPPDIRECTIVE'
1286
1287     # The following are matched but never returned. commented out to
1288     # suppress PLY warning
1289         # newfile directive
1290     #    'NEWFILE',
1291
1292         # endfile directive
1293     #    'ENDFILE'
1294     )
1295
1296     # Regular expressions for token matching
1297     t_LPAREN           = r'\('
1298     t_RPAREN           = r'\)'
1299     t_LBRACKET         = r'\['
1300     t_RBRACKET         = r'\]'
1301     t_LBRACE           = r'\{'
1302     t_RBRACE           = r'\}'
1303     t_LESS             = r'\<'
1304     t_GREATER          = r'\>'
1305     t_EQUALS           = r'='
1306     t_COMMA            = r','
1307     t_SEMI             = r';'
1308     t_DOT              = r'\.'
1309     t_COLON            = r':'
1310     t_DBLCOLON         = r'::'
1311     t_ASTERISK         = r'\*'
1312
1313     # Identifiers and reserved words
1314     reserved_map = { }
1315     for r in reserved:
1316         reserved_map[r.lower()] = r
1317
1318     def t_ID(self, t):
1319         r'[A-Za-z_]\w*'
1320         t.type = self.reserved_map.get(t.value, 'ID')
1321         return t
1322
1323     # Integer literal
1324     def t_INTLIT(self, t):
1325         r'-?(0x[\da-fA-F]+)|\d+'
1326         try:
1327             t.value = int(t.value,0)
1328         except ValueError:
1329             error(t, 'Integer value "%s" too large' % t.value)
1330             t.value = 0
1331         return t
1332
1333     # String literal.  Note that these use only single quotes, and
1334     # can span multiple lines.
1335     def t_STRLIT(self, t):
1336         r"(?m)'([^'])+'"
1337         # strip off quotes
1338         t.value = t.value[1:-1]
1339         t.lexer.lineno += t.value.count('\n')
1340         return t
1341
1342
1343     # "Code literal"... like a string literal, but delimiters are
1344     # '{{' and '}}' so they get formatted nicely under emacs c-mode
1345     def t_CODELIT(self, t):
1346         r"(?m)\{\{([^\}]|}(?!\}))+\}\}"
1347         # strip off {{ & }}
1348         t.value = t.value[2:-2]
1349         t.lexer.lineno += t.value.count('\n')
1350         return t
1351
1352     def t_CPPDIRECTIVE(self, t):
1353         r'^\#[^\#].*\n'
1354         t.lexer.lineno += t.value.count('\n')
1355         return t
1356
1357     def t_NEWFILE(self, t):
1358         r'^\#\#newfile\s+"[\w/.-]*"'
1359         fileNameStack.push((t.value[11:-1], t.lexer.lineno))
1360         t.lexer.lineno = 0
1361
1362     def t_ENDFILE(self, t):
1363         r'^\#\#endfile'
1364         (old_filename, t.lexer.lineno) = fileNameStack.pop()
1365
1366     #
1367     # The functions t_NEWLINE, t_ignore, and t_error are
1368     # special for the lex module.
1369     #
1370
1371     # Newlines
1372     def t_NEWLINE(self, t):
1373         r'\n+'
1374         t.lexer.lineno += t.value.count('\n')
1375
1376     # Comments
1377     def t_comment(self, t):
1378         r'//.*'
1379
1380     # Completely ignored characters
1381     t_ignore = ' \t\x0c'
1382
1383     # Error handler
1384     def t_error(self, t):
1385         error(t, "illegal character '%s'" % t.value[0])
1386         t.skip(1)
1387
1388     #####################################################################
1389     #
1390     #                                Parser
1391     #
1392     # Every function whose name starts with 'p_' defines a grammar
1393     # rule.  The rule is encoded in the function's doc string, while
1394     # the function body provides the action taken when the rule is
1395     # matched.  The argument to each function is a list of the values
1396     # of the rule's symbols: t[0] for the LHS, and t[1..n] for the
1397     # symbols on the RHS.  For tokens, the value is copied from the
1398     # t.value attribute provided by the lexer.  For non-terminals, the
1399     # value is assigned by the producing rule; i.e., the job of the
1400     # grammar rule function is to set the value for the non-terminal
1401     # on the LHS (by assigning to t[0]).
1402     #####################################################################
1403
1404     # The LHS of the first grammar rule is used as the start symbol
1405     # (in this case, 'specification').  Note that this rule enforces
1406     # that there will be exactly one namespace declaration, with 0 or
1407     # more global defs/decls before and after it.  The defs & decls
1408     # before the namespace decl will be outside the namespace; those
1409     # after will be inside.  The decoder function is always inside the
1410     # namespace.
1411     def p_specification(self, t):
1412         'specification : opt_defs_and_outputs name_decl opt_defs_and_outputs decode_block'
1413         global_code = t[1]
1414         isa_name = t[2]
1415         namespace = isa_name + "Inst"
1416         # wrap the decode block as a function definition
1417         t[4].wrap_decode_block('''
1418 StaticInstPtr
1419 %(isa_name)s::decodeInst(%(isa_name)s::ExtMachInst machInst)
1420 {
1421     using namespace %(namespace)s;
1422 ''' % vars(), '}')
1423         # both the latter output blocks and the decode block are in
1424         # the namespace
1425         namespace_code = t[3] + t[4]
1426         # pass it all back to the caller of yacc.parse()
1427         t[0] = (isa_name, namespace, global_code, namespace_code)
1428
1429     # ISA name declaration looks like "namespace <foo>;"
1430     def p_name_decl(self, t):
1431         'name_decl : NAMESPACE ID SEMI'
1432         t[0] = t[2]
1433
1434     # 'opt_defs_and_outputs' is a possibly empty sequence of
1435     # def and/or output statements.
1436     def p_opt_defs_and_outputs_0(self, t):
1437         'opt_defs_and_outputs : empty'
1438         t[0] = GenCode()
1439
1440     def p_opt_defs_and_outputs_1(self, t):
1441         'opt_defs_and_outputs : defs_and_outputs'
1442         t[0] = t[1]
1443
1444     def p_defs_and_outputs_0(self, t):
1445         'defs_and_outputs : def_or_output'
1446         t[0] = t[1]
1447
1448     def p_defs_and_outputs_1(self, t):
1449         'defs_and_outputs : defs_and_outputs def_or_output'
1450         t[0] = t[1] + t[2]
1451
1452     # The list of possible definition/output statements.
1453     def p_def_or_output(self, t):
1454         '''def_or_output : def_format
1455                          | def_bitfield
1456                          | def_bitfield_struct
1457                          | def_template
1458                          | def_operand_types
1459                          | def_operands
1460                          | output_header
1461                          | output_decoder
1462                          | output_exec
1463                          | global_let'''
1464         t[0] = t[1]
1465
1466     # Output blocks 'output <foo> {{...}}' (C++ code blocks) are copied
1467     # directly to the appropriate output section.
1468
1469     # Massage output block by substituting in template definitions and
1470     # bit operators.  We handle '%'s embedded in the string that don't
1471     # indicate template substitutions (or CPU-specific symbols, which
1472     # get handled in GenCode) by doubling them first so that the
1473     # format operation will reduce them back to single '%'s.
1474     def process_output(self, s):
1475         s = protect_non_subst_percents(s)
1476         # protects cpu-specific symbols too
1477         s = protect_cpu_symbols(s)
1478         return substBitOps(s % self.templateMap)
1479
1480     def p_output_header(self, t):
1481         'output_header : OUTPUT HEADER CODELIT SEMI'
1482         t[0] = GenCode(header_output = self.process_output(t[3]))
1483
1484     def p_output_decoder(self, t):
1485         'output_decoder : OUTPUT DECODER CODELIT SEMI'
1486         t[0] = GenCode(decoder_output = self.process_output(t[3]))
1487
1488     def p_output_exec(self, t):
1489         'output_exec : OUTPUT EXEC CODELIT SEMI'
1490         t[0] = GenCode(exec_output = self.process_output(t[3]))
1491
1492     # global let blocks 'let {{...}}' (Python code blocks) are
1493     # executed directly when seen.  Note that these execute in a
1494     # special variable context 'exportContext' to prevent the code
1495     # from polluting this script's namespace.
1496     def p_global_let(self, t):
1497         'global_let : LET CODELIT SEMI'
1498         self.updateExportContext()
1499         self.exportContext["header_output"] = ''
1500         self.exportContext["decoder_output"] = ''
1501         self.exportContext["exec_output"] = ''
1502         self.exportContext["decode_block"] = ''
1503         try:
1504             exec fixPythonIndentation(t[2]) in self.exportContext
1505         except Exception, exc:
1506             if debug:
1507                 raise
1508             error(t, 'error: %s in global let block "%s".' % (exc, t[2]))
1509         t[0] = GenCode(header_output=self.exportContext["header_output"],
1510                        decoder_output=self.exportContext["decoder_output"],
1511                        exec_output=self.exportContext["exec_output"],
1512                        decode_block=self.exportContext["decode_block"])
1513
1514     # Define the mapping from operand type extensions to C++ types and
1515     # bit widths (stored in operandTypeMap).
1516     def p_def_operand_types(self, t):
1517         'def_operand_types : DEF OPERAND_TYPES CODELIT SEMI'
1518         try:
1519             user_dict = eval('{' + t[3] + '}')
1520         except Exception, exc:
1521             if debug:
1522                 raise
1523             error(t,
1524                   'error: %s in def operand_types block "%s".' % (exc, t[3]))
1525         buildOperandTypeMap(user_dict, t.lexer.lineno)
1526         t[0] = GenCode() # contributes nothing to the output C++ file
1527
1528     # Define the mapping from operand names to operand classes and
1529     # other traits.  Stored in operandNameMap.
1530     def p_def_operands(self, t):
1531         'def_operands : DEF OPERANDS CODELIT SEMI'
1532         if not globals().has_key('operandTypeMap'):
1533             error(t, 'error: operand types must be defined before operands')
1534         try:
1535             user_dict = eval('{' + t[3] + '}', self.exportContext)
1536         except Exception, exc:
1537             if debug:
1538                 raise
1539             error(t, 'error: %s in def operands block "%s".' % (exc, t[3]))
1540         buildOperandNameMap(user_dict, t.lexer.lineno)
1541         t[0] = GenCode() # contributes nothing to the output C++ file
1542
1543     # A bitfield definition looks like:
1544     # 'def [signed] bitfield <ID> [<first>:<last>]'
1545     # This generates a preprocessor macro in the output file.
1546     def p_def_bitfield_0(self, t):
1547         'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT COLON INTLIT GREATER SEMI'
1548         expr = 'bits(machInst, %2d, %2d)' % (t[6], t[8])
1549         if (t[2] == 'signed'):
1550             expr = 'sext<%d>(%s)' % (t[6] - t[8] + 1, expr)
1551         hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1552         t[0] = GenCode(header_output = hash_define)
1553
1554     # alternate form for single bit: 'def [signed] bitfield <ID> [<bit>]'
1555     def p_def_bitfield_1(self, t):
1556         'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT GREATER SEMI'
1557         expr = 'bits(machInst, %2d, %2d)' % (t[6], t[6])
1558         if (t[2] == 'signed'):
1559             expr = 'sext<%d>(%s)' % (1, expr)
1560         hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1561         t[0] = GenCode(header_output = hash_define)
1562
1563     # alternate form for structure member: 'def bitfield <ID> <ID>'
1564     def p_def_bitfield_struct(self, t):
1565         'def_bitfield_struct : DEF opt_signed BITFIELD ID id_with_dot SEMI'
1566         if (t[2] != ''):
1567             error(t, 'error: structure bitfields are always unsigned.')
1568         expr = 'machInst.%s' % t[5]
1569         hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1570         t[0] = GenCode(header_output = hash_define)
1571
1572     def p_id_with_dot_0(self, t):
1573         'id_with_dot : ID'
1574         t[0] = t[1]
1575
1576     def p_id_with_dot_1(self, t):
1577         'id_with_dot : ID DOT id_with_dot'
1578         t[0] = t[1] + t[2] + t[3]
1579
1580     def p_opt_signed_0(self, t):
1581         'opt_signed : SIGNED'
1582         t[0] = t[1]
1583
1584     def p_opt_signed_1(self, t):
1585         'opt_signed : empty'
1586         t[0] = ''
1587
1588     def p_def_template(self, t):
1589         'def_template : DEF TEMPLATE ID CODELIT SEMI'
1590         self.templateMap[t[3]] = Template(t[4])
1591         t[0] = GenCode()
1592
1593     # An instruction format definition looks like
1594     # "def format <fmt>(<params>) {{...}};"
1595     def p_def_format(self, t):
1596         'def_format : DEF FORMAT ID LPAREN param_list RPAREN CODELIT SEMI'
1597         (id, params, code) = (t[3], t[5], t[7])
1598         self.defFormat(id, params, code, t.lexer.lineno)
1599         t[0] = GenCode()
1600
1601     # The formal parameter list for an instruction format is a
1602     # possibly empty list of comma-separated parameters.  Positional
1603     # (standard, non-keyword) parameters must come first, followed by
1604     # keyword parameters, followed by a '*foo' parameter that gets
1605     # excess positional arguments (as in Python).  Each of these three
1606     # parameter categories is optional.
1607     #
1608     # Note that we do not support the '**foo' parameter for collecting
1609     # otherwise undefined keyword args.  Otherwise the parameter list
1610     # is (I believe) identical to what is supported in Python.
1611     #
1612     # The param list generates a tuple, where the first element is a
1613     # list of the positional params and the second element is a dict
1614     # containing the keyword params.
1615     def p_param_list_0(self, t):
1616         'param_list : positional_param_list COMMA nonpositional_param_list'
1617         t[0] = t[1] + t[3]
1618
1619     def p_param_list_1(self, t):
1620         '''param_list : positional_param_list
1621                       | nonpositional_param_list'''
1622         t[0] = t[1]
1623
1624     def p_positional_param_list_0(self, t):
1625         'positional_param_list : empty'
1626         t[0] = []
1627
1628     def p_positional_param_list_1(self, t):
1629         'positional_param_list : ID'
1630         t[0] = [t[1]]
1631
1632     def p_positional_param_list_2(self, t):
1633         'positional_param_list : positional_param_list COMMA ID'
1634         t[0] = t[1] + [t[3]]
1635
1636     def p_nonpositional_param_list_0(self, t):
1637         'nonpositional_param_list : keyword_param_list COMMA excess_args_param'
1638         t[0] = t[1] + t[3]
1639
1640     def p_nonpositional_param_list_1(self, t):
1641         '''nonpositional_param_list : keyword_param_list
1642                                     | excess_args_param'''
1643         t[0] = t[1]
1644
1645     def p_keyword_param_list_0(self, t):
1646         'keyword_param_list : keyword_param'
1647         t[0] = [t[1]]
1648
1649     def p_keyword_param_list_1(self, t):
1650         'keyword_param_list : keyword_param_list COMMA keyword_param'
1651         t[0] = t[1] + [t[3]]
1652
1653     def p_keyword_param(self, t):
1654         'keyword_param : ID EQUALS expr'
1655         t[0] = t[1] + ' = ' + t[3].__repr__()
1656
1657     def p_excess_args_param(self, t):
1658         'excess_args_param : ASTERISK ID'
1659         # Just concatenate them: '*ID'.  Wrap in list to be consistent
1660         # with positional_param_list and keyword_param_list.
1661         t[0] = [t[1] + t[2]]
1662
1663     # End of format definition-related rules.
1664     ##############
1665
1666     #
1667     # A decode block looks like:
1668     #       decode <field1> [, <field2>]* [default <inst>] { ... }
1669     #
1670     def p_decode_block(self, t):
1671         'decode_block : DECODE ID opt_default LBRACE decode_stmt_list RBRACE'
1672         default_defaults = self.defaultStack.pop()
1673         codeObj = t[5]
1674         # use the "default defaults" only if there was no explicit
1675         # default statement in decode_stmt_list
1676         if not codeObj.has_decode_default:
1677             codeObj += default_defaults
1678         codeObj.wrap_decode_block('switch (%s) {\n' % t[2], '}\n')
1679         t[0] = codeObj
1680
1681     # The opt_default statement serves only to push the "default
1682     # defaults" onto defaultStack.  This value will be used by nested
1683     # decode blocks, and used and popped off when the current
1684     # decode_block is processed (in p_decode_block() above).
1685     def p_opt_default_0(self, t):
1686         'opt_default : empty'
1687         # no default specified: reuse the one currently at the top of
1688         # the stack
1689         self.defaultStack.push(self.defaultStack.top())
1690         # no meaningful value returned
1691         t[0] = None
1692
1693     def p_opt_default_1(self, t):
1694         'opt_default : DEFAULT inst'
1695         # push the new default
1696         codeObj = t[2]
1697         codeObj.wrap_decode_block('\ndefault:\n', 'break;\n')
1698         self.defaultStack.push(codeObj)
1699         # no meaningful value returned
1700         t[0] = None
1701
1702     def p_decode_stmt_list_0(self, t):
1703         'decode_stmt_list : decode_stmt'
1704         t[0] = t[1]
1705
1706     def p_decode_stmt_list_1(self, t):
1707         'decode_stmt_list : decode_stmt decode_stmt_list'
1708         if (t[1].has_decode_default and t[2].has_decode_default):
1709             error(t, 'Two default cases in decode block')
1710         t[0] = t[1] + t[2]
1711
1712     #
1713     # Decode statement rules
1714     #
1715     # There are four types of statements allowed in a decode block:
1716     # 1. Format blocks 'format <foo> { ... }'
1717     # 2. Nested decode blocks
1718     # 3. Instruction definitions.
1719     # 4. C preprocessor directives.
1720
1721
1722     # Preprocessor directives found in a decode statement list are
1723     # passed through to the output, replicated to all of the output
1724     # code streams.  This works well for ifdefs, so we can ifdef out
1725     # both the declarations and the decode cases generated by an
1726     # instruction definition.  Handling them as part of the grammar
1727     # makes it easy to keep them in the right place with respect to
1728     # the code generated by the other statements.
1729     def p_decode_stmt_cpp(self, t):
1730         'decode_stmt : CPPDIRECTIVE'
1731         t[0] = GenCode(t[1], t[1], t[1], t[1])
1732
1733     # A format block 'format <foo> { ... }' sets the default
1734     # instruction format used to handle instruction definitions inside
1735     # the block.  This format can be overridden by using an explicit
1736     # format on the instruction definition or with a nested format
1737     # block.
1738     def p_decode_stmt_format(self, t):
1739         'decode_stmt : FORMAT push_format_id LBRACE decode_stmt_list RBRACE'
1740         # The format will be pushed on the stack when 'push_format_id'
1741         # is processed (see below).  Once the parser has recognized
1742         # the full production (though the right brace), we're done
1743         # with the format, so now we can pop it.
1744         self.formatStack.pop()
1745         t[0] = t[4]
1746
1747     # This rule exists so we can set the current format (& push the
1748     # stack) when we recognize the format name part of the format
1749     # block.
1750     def p_push_format_id(self, t):
1751         'push_format_id : ID'
1752         try:
1753             self.formatStack.push(self.formatMap[t[1]])
1754             t[0] = ('', '// format %s' % t[1])
1755         except KeyError:
1756             error(t, 'instruction format "%s" not defined.' % t[1])
1757
1758     # Nested decode block: if the value of the current field matches
1759     # the specified constant, do a nested decode on some other field.
1760     def p_decode_stmt_decode(self, t):
1761         'decode_stmt : case_label COLON decode_block'
1762         label = t[1]
1763         codeObj = t[3]
1764         # just wrap the decoding code from the block as a case in the
1765         # outer switch statement.
1766         codeObj.wrap_decode_block('\n%s:\n' % label)
1767         codeObj.has_decode_default = (label == 'default')
1768         t[0] = codeObj
1769
1770     # Instruction definition (finally!).
1771     def p_decode_stmt_inst(self, t):
1772         'decode_stmt : case_label COLON inst SEMI'
1773         label = t[1]
1774         codeObj = t[3]
1775         codeObj.wrap_decode_block('\n%s:' % label, 'break;\n')
1776         codeObj.has_decode_default = (label == 'default')
1777         t[0] = codeObj
1778
1779     # The case label is either a list of one or more constants or
1780     # 'default'
1781     def p_case_label_0(self, t):
1782         'case_label : intlit_list'
1783         def make_case(intlit):
1784             if intlit >= 2**32:
1785                 return 'case ULL(%#x)' % intlit
1786             else:
1787                 return 'case %#x' % intlit
1788         t[0] = ': '.join(map(make_case, t[1]))
1789
1790     def p_case_label_1(self, t):
1791         'case_label : DEFAULT'
1792         t[0] = 'default'
1793
1794     #
1795     # The constant list for a decode case label must be non-empty, but
1796     # may have one or more comma-separated integer literals in it.
1797     #
1798     def p_intlit_list_0(self, t):
1799         'intlit_list : INTLIT'
1800         t[0] = [t[1]]
1801
1802     def p_intlit_list_1(self, t):
1803         'intlit_list : intlit_list COMMA INTLIT'
1804         t[0] = t[1]
1805         t[0].append(t[3])
1806
1807     # Define an instruction using the current instruction format
1808     # (specified by an enclosing format block).
1809     # "<mnemonic>(<args>)"
1810     def p_inst_0(self, t):
1811         'inst : ID LPAREN arg_list RPAREN'
1812         # Pass the ID and arg list to the current format class to deal with.
1813         currentFormat = self.formatStack.top()
1814         codeObj = currentFormat.defineInst(t[1], t[3], t.lexer.lineno)
1815         args = ','.join(map(str, t[3]))
1816         args = re.sub('(?m)^', '//', args)
1817         args = re.sub('^//', '', args)
1818         comment = '\n// %s::%s(%s)\n' % (currentFormat.id, t[1], args)
1819         codeObj.prepend_all(comment)
1820         t[0] = codeObj
1821
1822     # Define an instruction using an explicitly specified format:
1823     # "<fmt>::<mnemonic>(<args>)"
1824     def p_inst_1(self, t):
1825         'inst : ID DBLCOLON ID LPAREN arg_list RPAREN'
1826         try:
1827             format = self.formatMap[t[1]]
1828         except KeyError:
1829             error(t, 'instruction format "%s" not defined.' % t[1])
1830         codeObj = format.defineInst(t[3], t[5], t.lexer.lineno)
1831         comment = '\n// %s::%s(%s)\n' % (t[1], t[3], t[5])
1832         codeObj.prepend_all(comment)
1833         t[0] = codeObj
1834
1835     # The arg list generates a tuple, where the first element is a
1836     # list of the positional args and the second element is a dict
1837     # containing the keyword args.
1838     def p_arg_list_0(self, t):
1839         'arg_list : positional_arg_list COMMA keyword_arg_list'
1840         t[0] = ( t[1], t[3] )
1841
1842     def p_arg_list_1(self, t):
1843         'arg_list : positional_arg_list'
1844         t[0] = ( t[1], {} )
1845
1846     def p_arg_list_2(self, t):
1847         'arg_list : keyword_arg_list'
1848         t[0] = ( [], t[1] )
1849
1850     def p_positional_arg_list_0(self, t):
1851         'positional_arg_list : empty'
1852         t[0] = []
1853
1854     def p_positional_arg_list_1(self, t):
1855         'positional_arg_list : expr'
1856         t[0] = [t[1]]
1857
1858     def p_positional_arg_list_2(self, t):
1859         'positional_arg_list : positional_arg_list COMMA expr'
1860         t[0] = t[1] + [t[3]]
1861
1862     def p_keyword_arg_list_0(self, t):
1863         'keyword_arg_list : keyword_arg'
1864         t[0] = t[1]
1865
1866     def p_keyword_arg_list_1(self, t):
1867         'keyword_arg_list : keyword_arg_list COMMA keyword_arg'
1868         t[0] = t[1]
1869         t[0].update(t[3])
1870
1871     def p_keyword_arg(self, t):
1872         'keyword_arg : ID EQUALS expr'
1873         t[0] = { t[1] : t[3] }
1874
1875     #
1876     # Basic expressions.  These constitute the argument values of
1877     # "function calls" (i.e. instruction definitions in the decode
1878     # block) and default values for formal parameters of format
1879     # functions.
1880     #
1881     # Right now, these are either strings, integers, or (recursively)
1882     # lists of exprs (using Python square-bracket list syntax).  Note
1883     # that bare identifiers are trated as string constants here (since
1884     # there isn't really a variable namespace to refer to).
1885     #
1886     def p_expr_0(self, t):
1887         '''expr : ID
1888                 | INTLIT
1889                 | STRLIT
1890                 | CODELIT'''
1891         t[0] = t[1]
1892
1893     def p_expr_1(self, t):
1894         '''expr : LBRACKET list_expr RBRACKET'''
1895         t[0] = t[2]
1896
1897     def p_list_expr_0(self, t):
1898         'list_expr : expr'
1899         t[0] = [t[1]]
1900
1901     def p_list_expr_1(self, t):
1902         'list_expr : list_expr COMMA expr'
1903         t[0] = t[1] + [t[3]]
1904
1905     def p_list_expr_2(self, t):
1906         'list_expr : empty'
1907         t[0] = []
1908
1909     #
1910     # Empty production... use in other rules for readability.
1911     #
1912     def p_empty(self, t):
1913         'empty :'
1914         pass
1915
1916     # Parse error handler.  Note that the argument here is the
1917     # offending *token*, not a grammar symbol (hence the need to use
1918     # t.value)
1919     def p_error(self, t):
1920         if t:
1921             error(t, "syntax error at '%s'" % t.value)
1922         else:
1923             error("unknown syntax error")
1924
1925     # END OF GRAMMAR RULES
1926
1927     exportContextSymbols = ('InstObjParams', 'makeList', 're', 'string')
1928     def updateExportContext(self):
1929         exportDict = dict([(s, eval(s)) for s in self.exportContextSymbols])
1930         self.exportContext.update(exportDict)
1931         self.exportContext.update(parser.templateMap)
1932
1933     def defFormat(self, id, params, code, lineno):
1934         '''Define a new format'''
1935
1936         # make sure we haven't already defined this one
1937         if id in self.formatMap:
1938             error(lineno, 'format %s redefined.' % id)
1939
1940         # create new object and store in global map
1941         self.formatMap[id] = Format(self, id, params, code)
1942
1943     def update_if_needed(self, file, contents):
1944         '''Update the output file only if the new contents are
1945         different from the current contents.  Minimizes the files that
1946         need to be rebuilt after minor changes.'''
1947
1948         file = os.path.join(self.output_dir, file)
1949         update = False
1950         if os.access(file, os.R_OK):
1951             f = open(file, 'r')
1952             old_contents = f.read()
1953             f.close()
1954             if contents != old_contents:
1955                 print 'Updating', file
1956                 os.remove(file) # in case it's write-protected
1957                 update = True
1958             else:
1959                 print 'File', file, 'is unchanged'
1960         else:
1961             print 'Generating', file
1962             update = True
1963         if update:
1964             f = open(file, 'w')
1965             f.write(contents)
1966             f.close()
1967
1968     # This regular expression matches '##include' directives
1969     includeRE = re.compile(r'^\s*##include\s+"(?P<filename>[\w/.-]*)".*$',
1970                            re.MULTILINE)
1971
1972     def replace_include(self, matchobj, dirname):
1973         """Function to replace a matched '##include' directive with the
1974         contents of the specified file (with nested ##includes
1975         replaced recursively).  'matchobj' is an re match object
1976         (from a match of includeRE) and 'dirname' is the directory
1977         relative to which the file path should be resolved."""
1978
1979         fname = matchobj.group('filename')
1980         full_fname = os.path.normpath(os.path.join(dirname, fname))
1981         contents = '##newfile "%s"\n%s\n##endfile\n' % \
1982                    (full_fname, self.read_and_flatten(full_fname))
1983         return contents
1984
1985     def read_and_flatten(self, filename):
1986         """Read a file and recursively flatten nested '##include' files."""
1987
1988         current_dir = os.path.dirname(filename)
1989         try:
1990             contents = open(filename).read()
1991         except IOError:
1992             error('Error including file "%s"' % filename)
1993
1994         fileNameStack.push((filename, 0))
1995
1996         # Find any includes and include them
1997         def replace(matchobj):
1998             return self.replace_include(matchobj, current_dir)
1999         contents = self.includeRE.sub(replace, contents)
2000
2001         fileNameStack.pop()
2002         return contents
2003
2004     def _parse_isa_desc(self, isa_desc_file):
2005         '''Read in and parse the ISA description.'''
2006
2007         # Read file and (recursively) all included files into a string.
2008         # PLY requires that the input be in a single string so we have to
2009         # do this up front.
2010         isa_desc = self.read_and_flatten(isa_desc_file)
2011
2012         # Initialize filename stack with outer file.
2013         fileNameStack.push((isa_desc_file, 0))
2014
2015         # Parse it.
2016         (isa_name, namespace, global_code, namespace_code) = \
2017                    self.parse(isa_desc)
2018
2019         # grab the last three path components of isa_desc_file to put in
2020         # the output
2021         filename = '/'.join(isa_desc_file.split('/')[-3:])
2022
2023         # generate decoder.hh
2024         includes = '#include "base/bitfield.hh" // for bitfield support'
2025         global_output = global_code.header_output
2026         namespace_output = namespace_code.header_output
2027         decode_function = ''
2028         self.update_if_needed('decoder.hh', file_template % vars())
2029
2030         # generate decoder.cc
2031         includes = '#include "decoder.hh"'
2032         global_output = global_code.decoder_output
2033         namespace_output = namespace_code.decoder_output
2034         # namespace_output += namespace_code.decode_block
2035         decode_function = namespace_code.decode_block
2036         self.update_if_needed('decoder.cc', file_template % vars())
2037
2038         # generate per-cpu exec files
2039         for cpu in cpu_models:
2040             includes = '#include "decoder.hh"\n'
2041             includes += cpu.includes
2042             global_output = global_code.exec_output[cpu.name]
2043             namespace_output = namespace_code.exec_output[cpu.name]
2044             decode_function = ''
2045             self.update_if_needed(cpu.filename, file_template % vars())
2046
2047         # The variable names here are hacky, but this will creat local
2048         # variables which will be referenced in vars() which have the
2049         # value of the globals.
2050         MaxInstSrcRegs = self.maxInstSrcRegs
2051         MaxInstDestRegs = self.maxInstDestRegs
2052         # max_inst_regs.hh
2053         self.update_if_needed('max_inst_regs.hh',
2054                               max_inst_regs_template % vars())
2055
2056     def parse_isa_desc(self, *args, **kwargs):
2057         try:
2058             self._parse_isa_desc(*args, **kwargs)
2059         except ISAParserError, e:
2060             e.exit(fileNameStack)
2061
2062 # global list of CpuModel objects (see cpu_models.py)
2063 cpu_models = []
2064
2065 # Called as script: get args from command line.
2066 # Args are: <path to cpu_models.py> <isa desc file> <output dir> <cpu models>
2067 if __name__ == '__main__':
2068     execfile(sys.argv[1])  # read in CpuModel definitions
2069     cpu_models = [CpuModel.dict[cpu] for cpu in sys.argv[4:]]
2070     parser = ISAParser(sys.argv[3])
2071     parser.parse_isa_desc(sys.argv[2])