src/arch/isa_parser.py

   1 # Copyright (c) 2003-2005 The Regents of The University of Michigan
   2 # All rights reserved.
   3 #
   4 # Redistribution and use in source and binary forms, with or without
   5 # modification, are permitted provided that the following conditions are
   6 # met: redistributions of source code must retain the above copyright
   7 # notice, this list of conditions and the following disclaimer;
   8 # redistributions in binary form must reproduce the above copyright
   9 # notice, this list of conditions and the following disclaimer in the
  10 # documentation and/or other materials provided with the distribution;
  11 # neither the name of the copyright holders nor the names of its
  12 # contributors may be used to endorse or promote products derived from
  13 # this software without specific prior written permission.
  14 #
  15 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  16 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  17 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  18 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  19 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  20 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  21 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  22 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  23 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  25 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26 #
  27 # Authors: Steve Reinhardt
  28
  29 import os
  30 import sys
  31 import re
  32 import string
  33 import inspect, traceback
  34 # get type names
  35 from types import *
  36
  37 from m5.util.grammar import Grammar
  38
  39 debug=False
  40
  41 ###################
  42 # Utility functions
  43
  44 #
  45 # Indent every line in string 's' by two spaces
  46 # (except preprocessor directives).
  47 # Used to make nested code blocks look pretty.
  48 #
  49 def indent(s):
  50     return re.sub(r'(?m)^(?!#)', '  ', s)
  51
  52 #
  53 # Munge a somewhat arbitrarily formatted piece of Python code
  54 # (e.g. from a format 'let' block) into something whose indentation
  55 # will get by the Python parser.
  56 #
  57 # The two keys here are that Python will give a syntax error if
  58 # there's any whitespace at the beginning of the first line, and that
  59 # all lines at the same lexical nesting level must have identical
  60 # indentation.  Unfortunately the way code literals work, an entire
  61 # let block tends to have some initial indentation.  Rather than
  62 # trying to figure out what that is and strip it off, we prepend 'if
  63 # 1:' to make the let code the nested block inside the if (and have
  64 # the parser automatically deal with the indentation for us).
  65 #
  66 # We don't want to do this if (1) the code block is empty or (2) the
  67 # first line of the block doesn't have any whitespace at the front.
  68
  69 def fixPythonIndentation(s):
  70     # get rid of blank lines first
  71     s = re.sub(r'(?m)^\s*\n', '', s);
  72     if (s != '' and re.match(r'[ \t]', s[0])):
  73         s = 'if 1:\n' + s
  74     return s
  75
  76 class ISAParserError(Exception):
  77     """Error handler for parser errors"""
  78     def __init__(self, first, second=None):
  79         if second is None:
  80             self.lineno = 0
  81             self.string = first
  82         else:
  83             if hasattr(first, 'lexer'):
  84                 first = first.lexer.lineno
  85             self.lineno = first
  86             self.string = second
  87
  88     def display(self, filename_stack, print_traceback=debug):
  89         # Output formatted to work under Emacs compile-mode.  Optional
  90         # 'print_traceback' arg, if set to True, prints a Python stack
  91         # backtrace too (can be handy when trying to debug the parser
  92         # itself).
  93
  94         spaces = ""
  95         for (filename, line) in filename_stack[:-1]:
  96             print "%sIn file included from %s:" % (spaces, filename)
  97             spaces += "  "
  98
  99         # Print a Python stack backtrace if requested.
 100         if print_traceback or not self.lineno:
 101             traceback.print_exc()
 102
 103         line_str = "%s:" % (filename_stack[-1][0], )
 104         if self.lineno:
 105             line_str += "%d:" % (self.lineno, )
 106
 107         return "%s%s %s" % (spaces, line_str, self.string)
 108
 109     def exit(self, filename_stack, print_traceback=debug):
 110         # Just call exit.
 111
 112         sys.exit(self.display(filename_stack, print_traceback))
 113
 114 def error(*args):
 115     raise ISAParserError(*args)
 116
 117 ####################
 118 # Template objects.
 119 #
 120 # Template objects are format strings that allow substitution from
 121 # the attribute spaces of other objects (e.g. InstObjParams instances).
 122
 123 labelRE = re.compile(r'(?<!%)%\(([^\)]+)\)[sd]')
 124
 125 class Template(object):
 126     def __init__(self, t):
 127         self.template = t
 128
 129     def subst(self, d):
 130         myDict = None
 131
 132         # Protect non-Python-dict substitutions (e.g. if there's a printf
 133         # in the templated C++ code)
 134         template = protect_non_subst_percents(self.template)
 135         # CPU-model-specific substitutions are handled later (in GenCode).
 136         template = protect_cpu_symbols(template)
 137
 138         # Build a dict ('myDict') to use for the template substitution.
 139         # Start with the template namespace.  Make a copy since we're
 140         # going to modify it.
 141         myDict = parser.templateMap.copy()
 142
 143         if isinstance(d, InstObjParams):
 144             # If we're dealing with an InstObjParams object, we need
 145             # to be a little more sophisticated.  The instruction-wide
 146             # parameters are already formed, but the parameters which
 147             # are only function wide still need to be generated.
 148             compositeCode = ''
 149
 150             myDict.update(d.__dict__)
 151             # The "operands" and "snippets" attributes of the InstObjParams
 152             # objects are for internal use and not substitution.
 153             del myDict['operands']
 154             del myDict['snippets']
 155
 156             snippetLabels = [l for l in labelRE.findall(template)
 157                              if d.snippets.has_key(l)]
 158
 159             snippets = dict([(s, mungeSnippet(d.snippets[s]))
 160                              for s in snippetLabels])
 161
 162             myDict.update(snippets)
 163
 164             compositeCode = ' '.join(map(str, snippets.values()))
 165
 166             # Add in template itself in case it references any
 167             # operands explicitly (like Mem)
 168             compositeCode += ' ' + template
 169
 170             operands = SubOperandList(compositeCode, d.operands)
 171
 172             myDict['op_decl'] = operands.concatAttrStrings('op_decl')
 173
 174             is_src = lambda op: op.is_src
 175             is_dest = lambda op: op.is_dest
 176
 177             myDict['op_src_decl'] = \
 178                       operands.concatSomeAttrStrings(is_src, 'op_src_decl')
 179             myDict['op_dest_decl'] = \
 180                       operands.concatSomeAttrStrings(is_dest, 'op_dest_decl')
 181
 182             myDict['op_rd'] = operands.concatAttrStrings('op_rd')
 183             myDict['op_wb'] = operands.concatAttrStrings('op_wb')
 184
 185             if d.operands.memOperand:
 186                 myDict['mem_acc_size'] = d.operands.memOperand.mem_acc_size
 187                 myDict['mem_acc_type'] = d.operands.memOperand.mem_acc_type
 188
 189         elif isinstance(d, dict):
 190             # if the argument is a dictionary, we just use it.
 191             myDict.update(d)
 192         elif hasattr(d, '__dict__'):
 193             # if the argument is an object, we use its attribute map.
 194             myDict.update(d.__dict__)
 195         else:
 196             raise TypeError, "Template.subst() arg must be or have dictionary"
 197         return template % myDict
 198
 199     # Convert to string.  This handles the case when a template with a
 200     # CPU-specific term gets interpolated into another template or into
 201     # an output block.
 202     def __str__(self):
 203         return expand_cpu_symbols_to_string(self.template)
 204
 205 ################
 206 # Format object.
 207 #
 208 # A format object encapsulates an instruction format.  It must provide
 209 # a defineInst() method that generates the code for an instruction
 210 # definition.
 211
 212 class Format(object):
 213     def __init__(self, parser, id, params, code):
 214         self.parser = parser
 215         self.id = id
 216         self.params = params
 217         label = 'def format ' + id
 218         self.user_code = compile(fixPythonIndentation(code), label, 'exec')
 219         param_list = string.join(params, ", ")
 220         f = '''def defInst(_code, _context, %s):
 221                 my_locals = vars().copy()
 222                 exec _code in _context, my_locals
 223                 return my_locals\n''' % param_list
 224         c = compile(f, label + ' wrapper', 'exec')
 225         exec c
 226         self.func = defInst
 227
 228     def defineInst(self, name, args, lineno):
 229         self.parser.updateExportContext()
 230         context = self.parser.exportContext.copy()
 231         if len(name):
 232             Name = name[0].upper()
 233             if len(name) > 1:
 234                 Name += name[1:]
 235         context.update({ 'name' : name, 'Name' : Name })
 236         try:
 237             vars = self.func(self.user_code, context, *args[0], **args[1])
 238         except Exception, exc:
 239             if debug:
 240                 raise
 241             error(lineno, 'error defining "%s": %s.' % (name, exc))
 242         for k in vars.keys():
 243             if k not in ('header_output', 'decoder_output',
 244                          'exec_output', 'decode_block'):
 245                 del vars[k]
 246         return GenCode(**vars)
 247
 248 # Special null format to catch an implicit-format instruction
 249 # definition outside of any format block.
 250 class NoFormat(object):
 251     def __init__(self):
 252         self.defaultInst = ''
 253
 254     def defineInst(self, name, args, lineno):
 255         error(lineno,
 256               'instruction definition "%s" with no active format!' % name)
 257
 258 #####################################################################
 259 #
 260 #                           Support Classes
 261 #
 262 #####################################################################
 263
 264 # Expand template with CPU-specific references into a dictionary with
 265 # an entry for each CPU model name.  The entry key is the model name
 266 # and the corresponding value is the template with the CPU-specific
 267 # refs substituted for that model.
 268 def expand_cpu_symbols_to_dict(template):
 269     # Protect '%'s that don't go with CPU-specific terms
 270     t = re.sub(r'%(?!\(CPU_)', '%%', template)
 271     result = {}
 272     for cpu in cpu_models:
 273         result[cpu.name] = t % cpu.strings
 274     return result
 275
 276 # *If* the template has CPU-specific references, return a single
 277 # string containing a copy of the template for each CPU model with the
 278 # corresponding values substituted in.  If the template has no
 279 # CPU-specific references, it is returned unmodified.
 280 def expand_cpu_symbols_to_string(template):
 281     if template.find('%(CPU_') != -1:
 282         return reduce(lambda x,y: x+y,
 283                       expand_cpu_symbols_to_dict(template).values())
 284     else:
 285         return template
 286
 287 # Protect CPU-specific references by doubling the corresponding '%'s
 288 # (in preparation for substituting a different set of references into
 289 # the template).
 290 def protect_cpu_symbols(template):
 291     return re.sub(r'%(?=\(CPU_)', '%%', template)
 292
 293 # Protect any non-dict-substitution '%'s in a format string
 294 # (i.e. those not followed by '(')
 295 def protect_non_subst_percents(s):
 296     return re.sub(r'%(?!\()', '%%', s)
 297
 298 ###############
 299 # GenCode class
 300 #
 301 # The GenCode class encapsulates generated code destined for various
 302 # output files.  The header_output and decoder_output attributes are
 303 # strings containing code destined for decoder.hh and decoder.cc
 304 # respectively.  The decode_block attribute contains code to be
 305 # incorporated in the decode function itself (that will also end up in
 306 # decoder.cc).  The exec_output attribute is a dictionary with a key
 307 # for each CPU model name; the value associated with a particular key
 308 # is the string of code for that CPU model's exec.cc file.  The
 309 # has_decode_default attribute is used in the decode block to allow
 310 # explicit default clauses to override default default clauses.
 311
 312 class GenCode(object):
 313     # Constructor.  At this point we substitute out all CPU-specific
 314     # symbols.  For the exec output, these go into the per-model
 315     # dictionary.  For all other output types they get collapsed into
 316     # a single string.
 317     def __init__(self,
 318                  header_output = '', decoder_output = '', exec_output = '',
 319                  decode_block = '', has_decode_default = False):
 320         self.header_output = expand_cpu_symbols_to_string(header_output)
 321         self.decoder_output = expand_cpu_symbols_to_string(decoder_output)
 322         if isinstance(exec_output, dict):
 323             self.exec_output = exec_output
 324         elif isinstance(exec_output, str):
 325             # If the exec_output arg is a single string, we replicate
 326             # it for each of the CPU models, substituting and
 327             # %(CPU_foo)s params appropriately.
 328             self.exec_output = expand_cpu_symbols_to_dict(exec_output)
 329         self.decode_block = expand_cpu_symbols_to_string(decode_block)
 330         self.has_decode_default = has_decode_default
 331
 332     # Override '+' operator: generate a new GenCode object that
 333     # concatenates all the individual strings in the operands.
 334     def __add__(self, other):
 335         exec_output = {}
 336         for cpu in cpu_models:
 337             n = cpu.name
 338             exec_output[n] = self.exec_output[n] + other.exec_output[n]
 339         return GenCode(self.header_output + other.header_output,
 340                        self.decoder_output + other.decoder_output,
 341                        exec_output,
 342                        self.decode_block + other.decode_block,
 343                        self.has_decode_default or other.has_decode_default)
 344
 345     # Prepend a string (typically a comment) to all the strings.
 346     def prepend_all(self, pre):
 347         self.header_output = pre + self.header_output
 348         self.decoder_output  = pre + self.decoder_output
 349         self.decode_block = pre + self.decode_block
 350         for cpu in cpu_models:
 351             self.exec_output[cpu.name] = pre + self.exec_output[cpu.name]
 352
 353     # Wrap the decode block in a pair of strings (e.g., 'case foo:'
 354     # and 'break;').  Used to build the big nested switch statement.
 355     def wrap_decode_block(self, pre, post = ''):
 356         self.decode_block = pre + indent(self.decode_block) + post
 357
 358 #####################################################################
 359 #
 360 #                      Bitfield Operator Support
 361 #
 362 #####################################################################
 363
 364 bitOp1ArgRE = re.compile(r'<\s*(\w+)\s*:\s*>')
 365
 366 bitOpWordRE = re.compile(r'(?<![\w\.])([\w\.]+)<\s*(\w+)\s*:\s*(\w+)\s*>')
 367 bitOpExprRE = re.compile(r'\)<\s*(\w+)\s*:\s*(\w+)\s*>')
 368
 369 def substBitOps(code):
 370     # first convert single-bit selectors to two-index form
 371     # i.e., <n> --> <n:n>
 372     code = bitOp1ArgRE.sub(r'<\1:\1>', code)
 373     # simple case: selector applied to ID (name)
 374     # i.e., foo<a:b> --> bits(foo, a, b)
 375     code = bitOpWordRE.sub(r'bits(\1, \2, \3)', code)
 376     # if selector is applied to expression (ending in ')'),
 377     # we need to search backward for matching '('
 378     match = bitOpExprRE.search(code)
 379     while match:
 380         exprEnd = match.start()
 381         here = exprEnd - 1
 382         nestLevel = 1
 383         while nestLevel > 0:
 384             if code[here] == '(':
 385                 nestLevel -= 1
 386             elif code[here] == ')':
 387                 nestLevel += 1
 388             here -= 1
 389             if here < 0:
 390                 sys.exit("Didn't find '('!")
 391         exprStart = here+1
 392         newExpr = r'bits(%s, %s, %s)' % (code[exprStart:exprEnd+1],
 393                                          match.group(1), match.group(2))
 394         code = code[:exprStart] + newExpr + code[match.end():]
 395         match = bitOpExprRE.search(code)
 396     return code
 397
 398
 399 #####################################################################
 400 #
 401 #                             Code Parser
 402 #
 403 # The remaining code is the support for automatically extracting
 404 # instruction characteristics from pseudocode.
 405 #
 406 #####################################################################
 407
 408 # Force the argument to be a list.  Useful for flags, where a caller
 409 # can specify a singleton flag or a list of flags.  Also usful for
 410 # converting tuples to lists so they can be modified.
 411 def makeList(arg):
 412     if isinstance(arg, list):
 413         return arg
 414     elif isinstance(arg, tuple):
 415         return list(arg)
 416     elif not arg:
 417         return []
 418     else:
 419         return [ arg ]
 420
 421 # Generate operandTypeMap from the user's 'def operand_types'
 422 # statement.
 423 def buildOperandTypeMap(user_dict, lineno):
 424     global operandTypeMap
 425     operandTypeMap = {}
 426     for (ext, (desc, size)) in user_dict.iteritems():
 427         if desc == 'signed int':
 428             ctype = 'int%d_t' % size
 429             is_signed = 1
 430         elif desc == 'unsigned int':
 431             ctype = 'uint%d_t' % size
 432             is_signed = 0
 433         elif desc == 'float':
 434             is_signed = 1       # shouldn't really matter
 435             if size == 32:
 436                 ctype = 'float'
 437             elif size == 64:
 438                 ctype = 'double'
 439         elif desc == 'twin64 int':
 440             is_signed = 0
 441             ctype = 'Twin64_t'
 442         elif desc == 'twin32 int':
 443             is_signed = 0
 444             ctype = 'Twin32_t'
 445         if ctype == '':
 446             error(lineno, 'Unrecognized type description "%s" in user_dict')
 447         operandTypeMap[ext] = (size, ctype, is_signed)
 448
 449 class Operand(object):
 450     '''Base class for operand descriptors.  An instance of this class
 451     (or actually a class derived from this one) represents a specific
 452     operand for a code block (e.g, "Rc.sq" as a dest). Intermediate
 453     derived classes encapsulates the traits of a particular operand
 454     type (e.g., "32-bit integer register").'''
 455
 456     def buildReadCode(self, func = None):
 457         code = self.read_code % {"name": self.base_name,
 458                                  "func": func,
 459                                  "op_idx": self.src_reg_idx,
 460                                  "reg_idx": self.reg_spec,
 461                                  "size": self.size,
 462                                  "ctype": self.ctype}
 463         if self.size != self.dflt_size:
 464             return '%s = bits(%s, %d, 0);\n' % \
 465                    (self.base_name, code, self.size-1)
 466         else:
 467             return '%s = %s;\n' % \
 468                    (self.base_name, code)
 469
 470     def buildWriteCode(self, func = None):
 471         if (self.size != self.dflt_size and self.is_signed):
 472             final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
 473         else:
 474             final_val = self.base_name
 475         code = self.write_code % {"name": self.base_name,
 476                                   "func": func,
 477                                   "op_idx": self.dest_reg_idx,
 478                                   "reg_idx": self.reg_spec,
 479                                   "size": self.size,
 480                                   "ctype": self.ctype,
 481                                   "final_val": final_val}
 482         return '''
 483         {
 484             %s final_val = %s;
 485             %s;
 486             if (traceData) { traceData->setData(final_val); }
 487         }''' % (self.dflt_ctype, final_val, code)
 488
 489     def __init__(self, full_name, ext, is_src, is_dest):
 490         self.full_name = full_name
 491         self.ext = ext
 492         self.is_src = is_src
 493         self.is_dest = is_dest
 494         # The 'effective extension' (eff_ext) is either the actual
 495         # extension, if one was explicitly provided, or the default.
 496         if ext:
 497             self.eff_ext = ext
 498         else:
 499             self.eff_ext = self.dflt_ext
 500
 501         (self.size, self.ctype, self.is_signed) = operandTypeMap[self.eff_ext]
 502
 503         # note that mem_acc_size is undefined for non-mem operands...
 504         # template must be careful not to use it if it doesn't apply.
 505         if self.isMem():
 506             self.mem_acc_size = self.makeAccSize()
 507             if self.ctype in ['Twin32_t', 'Twin64_t']:
 508                 self.mem_acc_type = 'Twin'
 509             else:
 510                 self.mem_acc_type = 'uint'
 511
 512     # Finalize additional fields (primarily code fields).  This step
 513     # is done separately since some of these fields may depend on the
 514     # register index enumeration that hasn't been performed yet at the
 515     # time of __init__().
 516     def finalize(self):
 517         self.flags = self.getFlags()
 518         self.constructor = self.makeConstructor()
 519         self.op_decl = self.makeDecl()
 520
 521         if self.is_src:
 522             self.op_rd = self.makeRead()
 523             self.op_src_decl = self.makeDecl()
 524         else:
 525             self.op_rd = ''
 526             self.op_src_decl = ''
 527
 528         if self.is_dest:
 529             self.op_wb = self.makeWrite()
 530             self.op_dest_decl = self.makeDecl()
 531         else:
 532             self.op_wb = ''
 533             self.op_dest_decl = ''
 534
 535     def isMem(self):
 536         return 0
 537
 538     def isReg(self):
 539         return 0
 540
 541     def isFloatReg(self):
 542         return 0
 543
 544     def isIntReg(self):
 545         return 0
 546
 547     def isControlReg(self):
 548         return 0
 549
 550     def getFlags(self):
 551         # note the empty slice '[:]' gives us a copy of self.flags[0]
 552         # instead of a reference to it
 553         my_flags = self.flags[0][:]
 554         if self.is_src:
 555             my_flags += self.flags[1]
 556         if self.is_dest:
 557             my_flags += self.flags[2]
 558         return my_flags
 559
 560     def makeDecl(self):
 561         # Note that initializations in the declarations are solely
 562         # to avoid 'uninitialized variable' errors from the compiler.
 563         return self.ctype + ' ' + self.base_name + ' = 0;\n';
 564
 565 class IntRegOperand(Operand):
 566     def isReg(self):
 567         return 1
 568
 569     def isIntReg(self):
 570         return 1
 571
 572     def makeConstructor(self):
 573         c = ''
 574         if self.is_src:
 575             c += '\n\t_srcRegIdx[%d] = %s;' % \
 576                  (self.src_reg_idx, self.reg_spec)
 577         if self.is_dest:
 578             c += '\n\t_destRegIdx[%d] = %s;' % \
 579                  (self.dest_reg_idx, self.reg_spec)
 580         return c
 581
 582     def makeRead(self):
 583         if (self.ctype == 'float' or self.ctype == 'double'):
 584             error('Attempt to read integer register as FP')
 585         if self.read_code != None:
 586             return self.buildReadCode('readIntRegOperand')
 587         if (self.size == self.dflt_size):
 588             return '%s = xc->readIntRegOperand(this, %d);\n' % \
 589                    (self.base_name, self.src_reg_idx)
 590         elif (self.size > self.dflt_size):
 591             int_reg_val = 'xc->readIntRegOperand(this, %d)' % \
 592                           (self.src_reg_idx)
 593             if (self.is_signed):
 594                 int_reg_val = 'sext<%d>(%s)' % (self.dflt_size, int_reg_val)
 595             return '%s = %s;\n' % (self.base_name, int_reg_val)
 596         else:
 597             return '%s = bits(xc->readIntRegOperand(this, %d), %d, 0);\n' % \
 598                    (self.base_name, self.src_reg_idx, self.size-1)
 599
 600     def makeWrite(self):
 601         if (self.ctype == 'float' or self.ctype == 'double'):
 602             error('Attempt to write integer register as FP')
 603         if self.write_code != None:
 604             return self.buildWriteCode('setIntRegOperand')
 605         if (self.size != self.dflt_size and self.is_signed):
 606             final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
 607         else:
 608             final_val = self.base_name
 609         wb = '''
 610         {
 611             %s final_val = %s;
 612             xc->setIntRegOperand(this, %d, final_val);\n
 613             if (traceData) { traceData->setData(final_val); }
 614         }''' % (self.dflt_ctype, final_val, self.dest_reg_idx)
 615         return wb
 616
 617 class FloatRegOperand(Operand):
 618     def isReg(self):
 619         return 1
 620
 621     def isFloatReg(self):
 622         return 1
 623
 624     def makeConstructor(self):
 625         c = ''
 626         if self.is_src:
 627             c += '\n\t_srcRegIdx[%d] = %s + FP_Base_DepTag;' % \
 628                  (self.src_reg_idx, self.reg_spec)
 629         if self.is_dest:
 630             c += '\n\t_destRegIdx[%d] = %s + FP_Base_DepTag;' % \
 631                  (self.dest_reg_idx, self.reg_spec)
 632         return c
 633
 634     def makeRead(self):
 635         bit_select = 0
 636         if (self.ctype == 'float' or self.ctype == 'double'):
 637             func = 'readFloatRegOperand'
 638         else:
 639             func = 'readFloatRegOperandBits'
 640             if (self.size != self.dflt_size):
 641                 bit_select = 1
 642         base = 'xc->%s(this, %d)' % (func, self.src_reg_idx)
 643         if self.read_code != None:
 644             return self.buildReadCode(func)
 645         if bit_select:
 646             return '%s = bits(%s, %d, 0);\n' % \
 647                    (self.base_name, base, self.size-1)
 648         else:
 649             return '%s = %s;\n' % (self.base_name, base)
 650
 651     def makeWrite(self):
 652         final_val = self.base_name
 653         final_ctype = self.ctype
 654         if (self.ctype == 'float' or self.ctype == 'double'):
 655             func = 'setFloatRegOperand'
 656         elif (self.ctype == 'uint32_t' or self.ctype == 'uint64_t'):
 657             func = 'setFloatRegOperandBits'
 658         else:
 659             func = 'setFloatRegOperandBits'
 660             final_ctype = 'uint%d_t' % self.dflt_size
 661             if (self.size != self.dflt_size and self.is_signed):
 662                 final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
 663         if self.write_code != None:
 664             return self.buildWriteCode(func)
 665         wb = '''
 666         {
 667             %s final_val = %s;
 668             xc->%s(this, %d, final_val);\n
 669             if (traceData) { traceData->setData(final_val); }
 670         }''' % (final_ctype, final_val, func, self.dest_reg_idx)
 671         return wb
 672
 673 class ControlRegOperand(Operand):
 674     def isReg(self):
 675         return 1
 676
 677     def isControlReg(self):
 678         return 1
 679
 680     def makeConstructor(self):
 681         c = ''
 682         if self.is_src:
 683             c += '\n\t_srcRegIdx[%d] = %s + Ctrl_Base_DepTag;' % \
 684                  (self.src_reg_idx, self.reg_spec)
 685         if self.is_dest:
 686             c += '\n\t_destRegIdx[%d] = %s + Ctrl_Base_DepTag;' % \
 687                  (self.dest_reg_idx, self.reg_spec)
 688         return c
 689
 690     def makeRead(self):
 691         bit_select = 0
 692         if (self.ctype == 'float' or self.ctype == 'double'):
 693             error('Attempt to read control register as FP')
 694         if self.read_code != None:
 695             return self.buildReadCode('readMiscRegOperand')
 696         base = 'xc->readMiscRegOperand(this, %s)' % self.src_reg_idx
 697         if self.size == self.dflt_size:
 698             return '%s = %s;\n' % (self.base_name, base)
 699         else:
 700             return '%s = bits(%s, %d, 0);\n' % \
 701                    (self.base_name, base, self.size-1)
 702
 703     def makeWrite(self):
 704         if (self.ctype == 'float' or self.ctype == 'double'):
 705             error('Attempt to write control register as FP')
 706         if self.write_code != None:
 707             return self.buildWriteCode('setMiscRegOperand')
 708         wb = 'xc->setMiscRegOperand(this, %s, %s);\n' % \
 709              (self.dest_reg_idx, self.base_name)
 710         wb += 'if (traceData) { traceData->setData(%s); }' % \
 711               self.base_name
 712         return wb
 713
 714 class MemOperand(Operand):
 715     def isMem(self):
 716         return 1
 717
 718     def makeConstructor(self):
 719         return ''
 720
 721     def makeDecl(self):
 722         # Note that initializations in the declarations are solely
 723         # to avoid 'uninitialized variable' errors from the compiler.
 724         # Declare memory data variable.
 725         if self.ctype in ['Twin32_t','Twin64_t']:
 726             return "%s %s; %s.a = 0; %s.b = 0;\n" % \
 727                    (self.ctype, self.base_name, self.base_name, self.base_name)
 728         return '%s %s = 0;\n' % (self.ctype, self.base_name)
 729
 730     def makeRead(self):
 731         if self.read_code != None:
 732             return self.buildReadCode()
 733         return ''
 734
 735     def makeWrite(self):
 736         if self.write_code != None:
 737             return self.buildWriteCode()
 738         return ''
 739
 740     # Return the memory access size *in bits*, suitable for
 741     # forming a type via "uint%d_t".  Divide by 8 if you want bytes.
 742     def makeAccSize(self):
 743         return self.size
 744
 745 class PCOperand(Operand):
 746     def makeConstructor(self):
 747         return ''
 748
 749     def makeRead(self):
 750         return '%s = xc->readPC();\n' % self.base_name
 751
 752     def makeWrite(self):
 753         return 'xc->setPC(%s);\n' % self.base_name
 754
 755 class UPCOperand(Operand):
 756     def makeConstructor(self):
 757         return ''
 758
 759     def makeRead(self):
 760         if self.read_code != None:
 761             return self.buildReadCode('readMicroPC')
 762         return '%s = xc->readMicroPC();\n' % self.base_name
 763
 764     def makeWrite(self):
 765         if self.write_code != None:
 766             return self.buildWriteCode('setMicroPC')
 767         return 'xc->setMicroPC(%s);\n' % self.base_name
 768
 769 class NUPCOperand(Operand):
 770     def makeConstructor(self):
 771         return ''
 772
 773     def makeRead(self):
 774         if self.read_code != None:
 775             return self.buildReadCode('readNextMicroPC')
 776         return '%s = xc->readNextMicroPC();\n' % self.base_name
 777
 778     def makeWrite(self):
 779         if self.write_code != None:
 780             return self.buildWriteCode('setNextMicroPC')
 781         return 'xc->setNextMicroPC(%s);\n' % self.base_name
 782
 783 class NPCOperand(Operand):
 784     def makeConstructor(self):
 785         return ''
 786
 787     def makeRead(self):
 788         if self.read_code != None:
 789             return self.buildReadCode('readNextPC')
 790         return '%s = xc->readNextPC();\n' % self.base_name
 791
 792     def makeWrite(self):
 793         if self.write_code != None:
 794             return self.buildWriteCode('setNextPC')
 795         return 'xc->setNextPC(%s);\n' % self.base_name
 796
 797 class NNPCOperand(Operand):
 798     def makeConstructor(self):
 799         return ''
 800
 801     def makeRead(self):
 802         if self.read_code != None:
 803             return self.buildReadCode('readNextNPC')
 804         return '%s = xc->readNextNPC();\n' % self.base_name
 805
 806     def makeWrite(self):
 807         if self.write_code != None:
 808             return self.buildWriteCode('setNextNPC')
 809         return 'xc->setNextNPC(%s);\n' % self.base_name
 810
 811 def buildOperandNameMap(user_dict, lineno):
 812     global operandNameMap
 813     operandNameMap = {}
 814     for (op_name, val) in user_dict.iteritems():
 815         (base_cls_name, dflt_ext, reg_spec, flags, sort_pri) = val[:5]
 816         if len(val) > 5:
 817             read_code = val[5]
 818         else:
 819             read_code = None
 820         if len(val) > 6:
 821             write_code = val[6]
 822         else:
 823             write_code = None
 824         if len(val) > 7:
 825             error(lineno,
 826                   'error: too many attributes for operand "%s"' %
 827                   base_cls_name)
 828
 829         (dflt_size, dflt_ctype, dflt_is_signed) = operandTypeMap[dflt_ext]
 830         # Canonical flag structure is a triple of lists, where each list
 831         # indicates the set of flags implied by this operand always, when
 832         # used as a source, and when used as a dest, respectively.
 833         # For simplicity this can be initialized using a variety of fairly
 834         # obvious shortcuts; we convert these to canonical form here.
 835         if not flags:
 836             # no flags specified (e.g., 'None')
 837             flags = ( [], [], [] )
 838         elif isinstance(flags, str):
 839             # a single flag: assumed to be unconditional
 840             flags = ( [ flags ], [], [] )
 841         elif isinstance(flags, list):
 842             # a list of flags: also assumed to be unconditional
 843             flags = ( flags, [], [] )
 844         elif isinstance(flags, tuple):
 845             # it's a tuple: it should be a triple,
 846             # but each item could be a single string or a list
 847             (uncond_flags, src_flags, dest_flags) = flags
 848             flags = (makeList(uncond_flags),
 849                      makeList(src_flags), makeList(dest_flags))
 850         # Accumulate attributes of new operand class in tmp_dict
 851         tmp_dict = {}
 852         for attr in ('dflt_ext', 'reg_spec', 'flags', 'sort_pri',
 853                      'dflt_size', 'dflt_ctype', 'dflt_is_signed',
 854                      'read_code', 'write_code'):
 855             tmp_dict[attr] = eval(attr)
 856         tmp_dict['base_name'] = op_name
 857         # New class name will be e.g. "IntReg_Ra"
 858         cls_name = base_cls_name + '_' + op_name
 859         # Evaluate string arg to get class object.  Note that the
 860         # actual base class for "IntReg" is "IntRegOperand", i.e. we
 861         # have to append "Operand".
 862         try:
 863             base_cls = eval(base_cls_name + 'Operand')
 864         except NameError:
 865             if debug:
 866                 raise
 867             error(lineno,
 868                   'error: unknown operand base class "%s"' % base_cls_name)
 869         # The following statement creates a new class called
 870         # <cls_name> as a subclass of <base_cls> with the attributes
 871         # in tmp_dict, just as if we evaluated a class declaration.
 872         operandNameMap[op_name] = type(cls_name, (base_cls,), tmp_dict)
 873
 874     # Define operand variables.
 875     operands = user_dict.keys()
 876
 877     operandsREString = (r'''
 878     (?<![\w\.])      # neg. lookbehind assertion: prevent partial matches
 879     ((%s)(?:\.(\w+))?)   # match: operand with optional '.' then suffix
 880     (?![\w\.])       # neg. lookahead assertion: prevent partial matches
 881     '''
 882                         % string.join(operands, '|'))
 883
 884     global operandsRE
 885     operandsRE = re.compile(operandsREString, re.MULTILINE|re.VERBOSE)
 886
 887     # Same as operandsREString, but extension is mandatory, and only two
 888     # groups are returned (base and ext, not full name as above).
 889     # Used for subtituting '_' for '.' to make C++ identifiers.
 890     operandsWithExtREString = (r'(?<![\w\.])(%s)\.(\w+)(?![\w\.])'
 891                                % string.join(operands, '|'))
 892
 893     global operandsWithExtRE
 894     operandsWithExtRE = re.compile(operandsWithExtREString, re.MULTILINE)
 895
 896 maxInstSrcRegs = 0
 897 maxInstDestRegs = 0
 898
 899 class OperandList(object):
 900     '''Find all the operands in the given code block.  Returns an operand
 901     descriptor list (instance of class OperandList).'''
 902     def __init__(self, code):
 903         self.items = []
 904         self.bases = {}
 905         # delete comments so we don't match on reg specifiers inside
 906         code = commentRE.sub('', code)
 907         # search for operands
 908         next_pos = 0
 909         while 1:
 910             match = operandsRE.search(code, next_pos)
 911             if not match:
 912                 # no more matches: we're done
 913                 break
 914             op = match.groups()
 915             # regexp groups are operand full name, base, and extension
 916             (op_full, op_base, op_ext) = op
 917             # if the token following the operand is an assignment, this is
 918             # a destination (LHS), else it's a source (RHS)
 919             is_dest = (assignRE.match(code, match.end()) != None)
 920             is_src = not is_dest
 921             # see if we've already seen this one
 922             op_desc = self.find_base(op_base)
 923             if op_desc:
 924                 if op_desc.ext != op_ext:
 925                     error('Inconsistent extensions for operand %s' % \
 926                           op_base)
 927                 op_desc.is_src = op_desc.is_src or is_src
 928                 op_desc.is_dest = op_desc.is_dest or is_dest
 929             else:
 930                 # new operand: create new descriptor
 931                 op_desc = operandNameMap[op_base](op_full, op_ext,
 932                                                   is_src, is_dest)
 933                 self.append(op_desc)
 934             # start next search after end of current match
 935             next_pos = match.end()
 936         self.sort()
 937         # enumerate source & dest register operands... used in building
 938         # constructor later
 939         self.numSrcRegs = 0
 940         self.numDestRegs = 0
 941         self.numFPDestRegs = 0
 942         self.numIntDestRegs = 0
 943         self.memOperand = None
 944         for op_desc in self.items:
 945             if op_desc.isReg():
 946                 if op_desc.is_src:
 947                     op_desc.src_reg_idx = self.numSrcRegs
 948                     self.numSrcRegs += 1
 949                 if op_desc.is_dest:
 950                     op_desc.dest_reg_idx = self.numDestRegs
 951                     self.numDestRegs += 1
 952                     if op_desc.isFloatReg():
 953                         self.numFPDestRegs += 1
 954                     elif op_desc.isIntReg():
 955                         self.numIntDestRegs += 1
 956             elif op_desc.isMem():
 957                 if self.memOperand:
 958                     error("Code block has more than one memory operand.")
 959                 self.memOperand = op_desc
 960         global maxInstSrcRegs
 961         global maxInstDestRegs
 962         if maxInstSrcRegs < self.numSrcRegs:
 963             maxInstSrcRegs = self.numSrcRegs
 964         if maxInstDestRegs < self.numDestRegs:
 965             maxInstDestRegs = self.numDestRegs
 966         # now make a final pass to finalize op_desc fields that may depend
 967         # on the register enumeration
 968         for op_desc in self.items:
 969             op_desc.finalize()
 970
 971     def __len__(self):
 972         return len(self.items)
 973
 974     def __getitem__(self, index):
 975         return self.items[index]
 976
 977     def append(self, op_desc):
 978         self.items.append(op_desc)
 979         self.bases[op_desc.base_name] = op_desc
 980
 981     def find_base(self, base_name):
 982         # like self.bases[base_name], but returns None if not found
 983         # (rather than raising exception)
 984         return self.bases.get(base_name)
 985
 986     # internal helper function for concat[Some]Attr{Strings|Lists}
 987     def __internalConcatAttrs(self, attr_name, filter, result):
 988         for op_desc in self.items:
 989             if filter(op_desc):
 990                 result += getattr(op_desc, attr_name)
 991         return result
 992
 993     # return a single string that is the concatenation of the (string)
 994     # values of the specified attribute for all operands
 995     def concatAttrStrings(self, attr_name):
 996         return self.__internalConcatAttrs(attr_name, lambda x: 1, '')
 997
 998     # like concatAttrStrings, but only include the values for the operands
 999     # for which the provided filter function returns true
1000     def concatSomeAttrStrings(self, filter, attr_name):
1001         return self.__internalConcatAttrs(attr_name, filter, '')
1002
1003     # return a single list that is the concatenation of the (list)
1004     # values of the specified attribute for all operands
1005     def concatAttrLists(self, attr_name):
1006         return self.__internalConcatAttrs(attr_name, lambda x: 1, [])
1007
1008     # like concatAttrLists, but only include the values for the operands
1009     # for which the provided filter function returns true
1010     def concatSomeAttrLists(self, filter, attr_name):
1011         return self.__internalConcatAttrs(attr_name, filter, [])
1012
1013     def sort(self):
1014         self.items.sort(lambda a, b: a.sort_pri - b.sort_pri)
1015
1016 class SubOperandList(OperandList):
1017     '''Find all the operands in the given code block.  Returns an operand
1018     descriptor list (instance of class OperandList).'''
1019     def __init__(self, code, master_list):
1020         self.items = []
1021         self.bases = {}
1022         # delete comments so we don't match on reg specifiers inside
1023         code = commentRE.sub('', code)
1024         # search for operands
1025         next_pos = 0
1026         while 1:
1027             match = operandsRE.search(code, next_pos)
1028             if not match:
1029                 # no more matches: we're done
1030                 break
1031             op = match.groups()
1032             # regexp groups are operand full name, base, and extension
1033             (op_full, op_base, op_ext) = op
1034             # find this op in the master list
1035             op_desc = master_list.find_base(op_base)
1036             if not op_desc:
1037                 error('Found operand %s which is not in the master list!' \
1038                       ' This is an internal error' % op_base)
1039             else:
1040                 # See if we've already found this operand
1041                 op_desc = self.find_base(op_base)
1042                 if not op_desc:
1043                     # if not, add a reference to it to this sub list
1044                     self.append(master_list.bases[op_base])
1045
1046             # start next search after end of current match
1047             next_pos = match.end()
1048         self.sort()
1049         self.memOperand = None
1050         for op_desc in self.items:
1051             if op_desc.isMem():
1052                 if self.memOperand:
1053                     error("Code block has more than one memory operand.")
1054                 self.memOperand = op_desc
1055
1056 # Regular expression object to match C++ comments
1057 # (used in findOperands())
1058 commentRE = re.compile(r'//.*\n')
1059
1060 # Regular expression object to match assignment statements
1061 # (used in findOperands())
1062 assignRE = re.compile(r'\s*=(?!=)', re.MULTILINE)
1063
1064 # Munge operand names in code string to make legal C++ variable names.
1065 # This means getting rid of the type extension if any.
1066 # (Will match base_name attribute of Operand object.)
1067 def substMungedOpNames(code):
1068     return operandsWithExtRE.sub(r'\1', code)
1069
1070 # Fix up code snippets for final substitution in templates.
1071 def mungeSnippet(s):
1072     if isinstance(s, str):
1073         return substMungedOpNames(substBitOps(s))
1074     else:
1075         return s
1076
1077 def makeFlagConstructor(flag_list):
1078     if len(flag_list) == 0:
1079         return ''
1080     # filter out repeated flags
1081     flag_list.sort()
1082     i = 1
1083     while i < len(flag_list):
1084         if flag_list[i] == flag_list[i-1]:
1085             del flag_list[i]
1086         else:
1087             i += 1
1088     pre = '\n\tflags['
1089     post = '] = true;'
1090     code = pre + string.join(flag_list, post + pre) + post
1091     return code
1092
1093 # Assume all instruction flags are of the form 'IsFoo'
1094 instFlagRE = re.compile(r'Is.*')
1095
1096 # OpClass constants end in 'Op' except No_OpClass
1097 opClassRE = re.compile(r'.*Op|No_OpClass')
1098
1099 class InstObjParams(object):
1100     def __init__(self, mnem, class_name, base_class = '',
1101                  snippets = {}, opt_args = []):
1102         self.mnemonic = mnem
1103         self.class_name = class_name
1104         self.base_class = base_class
1105         if not isinstance(snippets, dict):
1106             snippets = {'code' : snippets}
1107         compositeCode = ' '.join(map(str, snippets.values()))
1108         self.snippets = snippets
1109
1110         self.operands = OperandList(compositeCode)
1111         self.constructor = self.operands.concatAttrStrings('constructor')
1112         self.constructor += \
1113                  '\n\t_numSrcRegs = %d;' % self.operands.numSrcRegs
1114         self.constructor += \
1115                  '\n\t_numDestRegs = %d;' % self.operands.numDestRegs
1116         self.constructor += \
1117                  '\n\t_numFPDestRegs = %d;' % self.operands.numFPDestRegs
1118         self.constructor += \
1119                  '\n\t_numIntDestRegs = %d;' % self.operands.numIntDestRegs
1120         self.flags = self.operands.concatAttrLists('flags')
1121
1122         # Make a basic guess on the operand class (function unit type).
1123         # These are good enough for most cases, and can be overridden
1124         # later otherwise.
1125         if 'IsStore' in self.flags:
1126             self.op_class = 'MemWriteOp'
1127         elif 'IsLoad' in self.flags or 'IsPrefetch' in self.flags:
1128             self.op_class = 'MemReadOp'
1129         elif 'IsFloating' in self.flags:
1130             self.op_class = 'FloatAddOp'
1131         else:
1132             self.op_class = 'IntAluOp'
1133
1134         # Optional arguments are assumed to be either StaticInst flags
1135         # or an OpClass value.  To avoid having to import a complete
1136         # list of these values to match against, we do it ad-hoc
1137         # with regexps.
1138         for oa in opt_args:
1139             if instFlagRE.match(oa):
1140                 self.flags.append(oa)
1141             elif opClassRE.match(oa):
1142                 self.op_class = oa
1143             else:
1144                 error('InstObjParams: optional arg "%s" not recognized '
1145                       'as StaticInst::Flag or OpClass.' % oa)
1146
1147         # add flag initialization to contructor here to include
1148         # any flags added via opt_args
1149         self.constructor += makeFlagConstructor(self.flags)
1150
1151         # if 'IsFloating' is set, add call to the FP enable check
1152         # function (which should be provided by isa_desc via a declare)
1153         if 'IsFloating' in self.flags:
1154             self.fp_enable_check = 'fault = checkFpEnableFault(xc);'
1155         else:
1156             self.fp_enable_check = ''
1157
1158 ##############
1159 # Stack: a simple stack object.  Used for both formats (formatStack)
1160 # and default cases (defaultStack).  Simply wraps a list to give more
1161 # stack-like syntax and enable initialization with an argument list
1162 # (as opposed to an argument that's a list).
1163
1164 class Stack(list):
1165     def __init__(self, *items):
1166         list.__init__(self, items)
1167
1168     def push(self, item):
1169         self.append(item);
1170
1171     def top(self):
1172         return self[-1]
1173
1174 # Global stack that tracks current file and line number.
1175 # Each element is a tuple (filename, lineno) that records the
1176 # *current* filename and the line number in the *previous* file where
1177 # it was included.
1178 fileNameStack = Stack()
1179
1180
1181 #######################
1182 #
1183 # Output file template
1184 #
1185
1186 file_template = '''
1187 /*
1188  * DO NOT EDIT THIS FILE!!!
1189  *
1190  * It was automatically generated from the ISA description in %(filename)s
1191  */
1192
1193 %(includes)s
1194
1195 %(global_output)s
1196
1197 namespace %(namespace)s {
1198
1199 %(namespace_output)s
1200
1201 } // namespace %(namespace)s
1202
1203 %(decode_function)s
1204 '''
1205
1206 max_inst_regs_template = '''
1207 /*
1208  * DO NOT EDIT THIS FILE!!!
1209  *
1210  * It was automatically generated from the ISA description in %(filename)s
1211  */
1212
1213 namespace %(namespace)s {
1214
1215     const int MaxInstSrcRegs = %(MaxInstSrcRegs)d;
1216     const int MaxInstDestRegs = %(MaxInstDestRegs)d;
1217
1218 } // namespace %(namespace)s
1219
1220 '''
1221
1222 class ISAParser(Grammar):
1223     def __init__(self, output_dir):
1224         super(ISAParser, self).__init__()
1225         self.output_dir = output_dir
1226
1227         self.templateMap = {}
1228
1229         # This dictionary maps format name strings to Format objects.
1230         self.formatMap = {}
1231
1232         # The format stack.
1233         self.formatStack = Stack(NoFormat())
1234
1235         # The default case stack.
1236         self.defaultStack = Stack(None)
1237
1238         self.exportContext = {}
1239
1240     #####################################################################
1241     #
1242     #                                Lexer
1243     #
1244     # The PLY lexer module takes two things as input:
1245     # - A list of token names (the string list 'tokens')
1246     # - A regular expression describing a match for each token.  The
1247     #   regexp for token FOO can be provided in two ways:
1248     #   - as a string variable named t_FOO
1249     #   - as the doc string for a function named t_FOO.  In this case,
1250     #     the function is also executed, allowing an action to be
1251     #     associated with each token match.
1252     #
1253     #####################################################################
1254
1255     # Reserved words.  These are listed separately as they are matched
1256     # using the same regexp as generic IDs, but distinguished in the
1257     # t_ID() function.  The PLY documentation suggests this approach.
1258     reserved = (
1259         'BITFIELD', 'DECODE', 'DECODER', 'DEFAULT', 'DEF', 'EXEC', 'FORMAT',
1260         'HEADER', 'LET', 'NAMESPACE', 'OPERAND_TYPES', 'OPERANDS',
1261         'OUTPUT', 'SIGNED', 'TEMPLATE'
1262         )
1263
1264     # List of tokens.  The lex module requires this.
1265     tokens = reserved + (
1266         # identifier
1267         'ID',
1268
1269         # integer literal
1270         'INTLIT',
1271
1272         # string literal
1273         'STRLIT',
1274
1275         # code literal
1276         'CODELIT',
1277
1278         # ( ) [ ] { } < > , ; . : :: *
1279         'LPAREN', 'RPAREN',
1280         'LBRACKET', 'RBRACKET',
1281         'LBRACE', 'RBRACE',
1282         'LESS', 'GREATER', 'EQUALS',
1283         'COMMA', 'SEMI', 'DOT', 'COLON', 'DBLCOLON',
1284         'ASTERISK',
1285
1286         # C preprocessor directives
1287         'CPPDIRECTIVE'
1288
1289     # The following are matched but never returned. commented out to
1290     # suppress PLY warning
1291         # newfile directive
1292     #    'NEWFILE',
1293
1294         # endfile directive
1295     #    'ENDFILE'
1296     )
1297
1298     # Regular expressions for token matching
1299     t_LPAREN           = r'\('
1300     t_RPAREN           = r'\)'
1301     t_LBRACKET         = r'\['
1302     t_RBRACKET         = r'\]'
1303     t_LBRACE           = r'\{'
1304     t_RBRACE           = r'\}'
1305     t_LESS             = r'\<'
1306     t_GREATER          = r'\>'
1307     t_EQUALS           = r'='
1308     t_COMMA            = r','
1309     t_SEMI             = r';'
1310     t_DOT              = r'\.'
1311     t_COLON            = r':'
1312     t_DBLCOLON         = r'::'
1313     t_ASTERISK         = r'\*'
1314
1315     # Identifiers and reserved words
1316     reserved_map = { }
1317     for r in reserved:
1318         reserved_map[r.lower()] = r
1319
1320     def t_ID(self, t):
1321         r'[A-Za-z_]\w*'
1322         t.type = self.reserved_map.get(t.value, 'ID')
1323         return t
1324
1325     # Integer literal
1326     def t_INTLIT(self, t):
1327         r'-?(0x[\da-fA-F]+)|\d+'
1328         try:
1329             t.value = int(t.value,0)
1330         except ValueError:
1331             error(t, 'Integer value "%s" too large' % t.value)
1332             t.value = 0
1333         return t
1334
1335     # String literal.  Note that these use only single quotes, and
1336     # can span multiple lines.
1337     def t_STRLIT(self, t):
1338         r"(?m)'([^'])+'"
1339         # strip off quotes
1340         t.value = t.value[1:-1]
1341         t.lexer.lineno += t.value.count('\n')
1342         return t
1343
1344
1345     # "Code literal"... like a string literal, but delimiters are
1346     # '{{' and '}}' so they get formatted nicely under emacs c-mode
1347     def t_CODELIT(self, t):
1348         r"(?m)\{\{([^\}]|}(?!\}))+\}\}"
1349         # strip off {{ & }}
1350         t.value = t.value[2:-2]
1351         t.lexer.lineno += t.value.count('\n')
1352         return t
1353
1354     def t_CPPDIRECTIVE(self, t):
1355         r'^\#[^\#].*\n'
1356         t.lexer.lineno += t.value.count('\n')
1357         return t
1358
1359     def t_NEWFILE(self, t):
1360         r'^\#\#newfile\s+"[\w/.-]*"'
1361         fileNameStack.push((t.value[11:-1], t.lexer.lineno))
1362         t.lexer.lineno = 0
1363
1364     def t_ENDFILE(self, t):
1365         r'^\#\#endfile'
1366         (old_filename, t.lexer.lineno) = fileNameStack.pop()
1367
1368     #
1369     # The functions t_NEWLINE, t_ignore, and t_error are
1370     # special for the lex module.
1371     #
1372
1373     # Newlines
1374     def t_NEWLINE(self, t):
1375         r'\n+'
1376         t.lexer.lineno += t.value.count('\n')
1377
1378     # Comments
1379     def t_comment(self, t):
1380         r'//.*'
1381
1382     # Completely ignored characters
1383     t_ignore = ' \t\x0c'
1384
1385     # Error handler
1386     def t_error(self, t):
1387         error(t, "illegal character '%s'" % t.value[0])
1388         t.skip(1)
1389
1390     #####################################################################
1391     #
1392     #                                Parser
1393     #
1394     # Every function whose name starts with 'p_' defines a grammar
1395     # rule.  The rule is encoded in the function's doc string, while
1396     # the function body provides the action taken when the rule is
1397     # matched.  The argument to each function is a list of the values
1398     # of the rule's symbols: t[0] for the LHS, and t[1..n] for the
1399     # symbols on the RHS.  For tokens, the value is copied from the
1400     # t.value attribute provided by the lexer.  For non-terminals, the
1401     # value is assigned by the producing rule; i.e., the job of the
1402     # grammar rule function is to set the value for the non-terminal
1403     # on the LHS (by assigning to t[0]).
1404     #####################################################################
1405
1406     # The LHS of the first grammar rule is used as the start symbol
1407     # (in this case, 'specification').  Note that this rule enforces
1408     # that there will be exactly one namespace declaration, with 0 or
1409     # more global defs/decls before and after it.  The defs & decls
1410     # before the namespace decl will be outside the namespace; those
1411     # after will be inside.  The decoder function is always inside the
1412     # namespace.
1413     def p_specification(self, t):
1414         'specification : opt_defs_and_outputs name_decl opt_defs_and_outputs decode_block'
1415         global_code = t[1]
1416         isa_name = t[2]
1417         namespace = isa_name + "Inst"
1418         # wrap the decode block as a function definition
1419         t[4].wrap_decode_block('''
1420 StaticInstPtr
1421 %(isa_name)s::decodeInst(%(isa_name)s::ExtMachInst machInst)
1422 {
1423     using namespace %(namespace)s;
1424 ''' % vars(), '}')
1425         # both the latter output blocks and the decode block are in
1426         # the namespace
1427         namespace_code = t[3] + t[4]
1428         # pass it all back to the caller of yacc.parse()
1429         t[0] = (isa_name, namespace, global_code, namespace_code)
1430
1431     # ISA name declaration looks like "namespace <foo>;"
1432     def p_name_decl(self, t):
1433         'name_decl : NAMESPACE ID SEMI'
1434         t[0] = t[2]
1435
1436     # 'opt_defs_and_outputs' is a possibly empty sequence of
1437     # def and/or output statements.
1438     def p_opt_defs_and_outputs_0(self, t):
1439         'opt_defs_and_outputs : empty'
1440         t[0] = GenCode()
1441
1442     def p_opt_defs_and_outputs_1(self, t):
1443         'opt_defs_and_outputs : defs_and_outputs'
1444         t[0] = t[1]
1445
1446     def p_defs_and_outputs_0(self, t):
1447         'defs_and_outputs : def_or_output'
1448         t[0] = t[1]
1449
1450     def p_defs_and_outputs_1(self, t):
1451         'defs_and_outputs : defs_and_outputs def_or_output'
1452         t[0] = t[1] + t[2]
1453
1454     # The list of possible definition/output statements.
1455     def p_def_or_output(self, t):
1456         '''def_or_output : def_format
1457                          | def_bitfield
1458                          | def_bitfield_struct
1459                          | def_template
1460                          | def_operand_types
1461                          | def_operands
1462                          | output_header
1463                          | output_decoder
1464                          | output_exec
1465                          | global_let'''
1466         t[0] = t[1]
1467
1468     # Output blocks 'output <foo> {{...}}' (C++ code blocks) are copied
1469     # directly to the appropriate output section.
1470
1471     # Massage output block by substituting in template definitions and
1472     # bit operators.  We handle '%'s embedded in the string that don't
1473     # indicate template substitutions (or CPU-specific symbols, which
1474     # get handled in GenCode) by doubling them first so that the
1475     # format operation will reduce them back to single '%'s.
1476     def process_output(self, s):
1477         s = protect_non_subst_percents(s)
1478         # protects cpu-specific symbols too
1479         s = protect_cpu_symbols(s)
1480         return substBitOps(s % self.templateMap)
1481
1482     def p_output_header(self, t):
1483         'output_header : OUTPUT HEADER CODELIT SEMI'
1484         t[0] = GenCode(header_output = self.process_output(t[3]))
1485
1486     def p_output_decoder(self, t):
1487         'output_decoder : OUTPUT DECODER CODELIT SEMI'
1488         t[0] = GenCode(decoder_output = self.process_output(t[3]))
1489
1490     def p_output_exec(self, t):
1491         'output_exec : OUTPUT EXEC CODELIT SEMI'
1492         t[0] = GenCode(exec_output = self.process_output(t[3]))
1493
1494     # global let blocks 'let {{...}}' (Python code blocks) are
1495     # executed directly when seen.  Note that these execute in a
1496     # special variable context 'exportContext' to prevent the code
1497     # from polluting this script's namespace.
1498     def p_global_let(self, t):
1499         'global_let : LET CODELIT SEMI'
1500         self.updateExportContext()
1501         self.exportContext["header_output"] = ''
1502         self.exportContext["decoder_output"] = ''
1503         self.exportContext["exec_output"] = ''
1504         self.exportContext["decode_block"] = ''
1505         try:
1506             exec fixPythonIndentation(t[2]) in self.exportContext
1507         except Exception, exc:
1508             if debug:
1509                 raise
1510             error(t, 'error: %s in global let block "%s".' % (exc, t[2]))
1511         t[0] = GenCode(header_output=self.exportContext["header_output"],
1512                        decoder_output=self.exportContext["decoder_output"],
1513                        exec_output=self.exportContext["exec_output"],
1514                        decode_block=self.exportContext["decode_block"])
1515
1516     # Define the mapping from operand type extensions to C++ types and
1517     # bit widths (stored in operandTypeMap).
1518     def p_def_operand_types(self, t):
1519         'def_operand_types : DEF OPERAND_TYPES CODELIT SEMI'
1520         try:
1521             user_dict = eval('{' + t[3] + '}')
1522         except Exception, exc:
1523             if debug:
1524                 raise
1525             error(t,
1526                   'error: %s in def operand_types block "%s".' % (exc, t[3]))
1527         buildOperandTypeMap(user_dict, t.lexer.lineno)
1528         t[0] = GenCode() # contributes nothing to the output C++ file
1529
1530     # Define the mapping from operand names to operand classes and
1531     # other traits.  Stored in operandNameMap.
1532     def p_def_operands(self, t):
1533         'def_operands : DEF OPERANDS CODELIT SEMI'
1534         if not globals().has_key('operandTypeMap'):
1535             error(t, 'error: operand types must be defined before operands')
1536         try:
1537             user_dict = eval('{' + t[3] + '}', self.exportContext)
1538         except Exception, exc:
1539             if debug:
1540                 raise
1541             error(t, 'error: %s in def operands block "%s".' % (exc, t[3]))
1542         buildOperandNameMap(user_dict, t.lexer.lineno)
1543         t[0] = GenCode() # contributes nothing to the output C++ file
1544
1545     # A bitfield definition looks like:
1546     # 'def [signed] bitfield <ID> [<first>:<last>]'
1547     # This generates a preprocessor macro in the output file.
1548     def p_def_bitfield_0(self, t):
1549         'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT COLON INTLIT GREATER SEMI'
1550         expr = 'bits(machInst, %2d, %2d)' % (t[6], t[8])
1551         if (t[2] == 'signed'):
1552             expr = 'sext<%d>(%s)' % (t[6] - t[8] + 1, expr)
1553         hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1554         t[0] = GenCode(header_output = hash_define)
1555
1556     # alternate form for single bit: 'def [signed] bitfield <ID> [<bit>]'
1557     def p_def_bitfield_1(self, t):
1558         'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT GREATER SEMI'
1559         expr = 'bits(machInst, %2d, %2d)' % (t[6], t[6])
1560         if (t[2] == 'signed'):
1561             expr = 'sext<%d>(%s)' % (1, expr)
1562         hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1563         t[0] = GenCode(header_output = hash_define)
1564
1565     # alternate form for structure member: 'def bitfield <ID> <ID>'
1566     def p_def_bitfield_struct(self, t):
1567         'def_bitfield_struct : DEF opt_signed BITFIELD ID id_with_dot SEMI'
1568         if (t[2] != ''):
1569             error(t, 'error: structure bitfields are always unsigned.')
1570         expr = 'machInst.%s' % t[5]
1571         hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1572         t[0] = GenCode(header_output = hash_define)
1573
1574     def p_id_with_dot_0(self, t):
1575         'id_with_dot : ID'
1576         t[0] = t[1]
1577
1578     def p_id_with_dot_1(self, t):
1579         'id_with_dot : ID DOT id_with_dot'
1580         t[0] = t[1] + t[2] + t[3]
1581
1582     def p_opt_signed_0(self, t):
1583         'opt_signed : SIGNED'
1584         t[0] = t[1]
1585
1586     def p_opt_signed_1(self, t):
1587         'opt_signed : empty'
1588         t[0] = ''
1589
1590     def p_def_template(self, t):
1591         'def_template : DEF TEMPLATE ID CODELIT SEMI'
1592         self.templateMap[t[3]] = Template(t[4])
1593         t[0] = GenCode()
1594
1595     # An instruction format definition looks like
1596     # "def format <fmt>(<params>) {{...}};"
1597     def p_def_format(self, t):
1598         'def_format : DEF FORMAT ID LPAREN param_list RPAREN CODELIT SEMI'
1599         (id, params, code) = (t[3], t[5], t[7])
1600         self.defFormat(id, params, code, t.lexer.lineno)
1601         t[0] = GenCode()
1602
1603     # The formal parameter list for an instruction format is a
1604     # possibly empty list of comma-separated parameters.  Positional
1605     # (standard, non-keyword) parameters must come first, followed by
1606     # keyword parameters, followed by a '*foo' parameter that gets
1607     # excess positional arguments (as in Python).  Each of these three
1608     # parameter categories is optional.
1609     #
1610     # Note that we do not support the '**foo' parameter for collecting
1611     # otherwise undefined keyword args.  Otherwise the parameter list
1612     # is (I believe) identical to what is supported in Python.
1613     #
1614     # The param list generates a tuple, where the first element is a
1615     # list of the positional params and the second element is a dict
1616     # containing the keyword params.
1617     def p_param_list_0(self, t):
1618         'param_list : positional_param_list COMMA nonpositional_param_list'
1619         t[0] = t[1] + t[3]
1620
1621     def p_param_list_1(self, t):
1622         '''param_list : positional_param_list
1623                       | nonpositional_param_list'''
1624         t[0] = t[1]
1625
1626     def p_positional_param_list_0(self, t):
1627         'positional_param_list : empty'
1628         t[0] = []
1629
1630     def p_positional_param_list_1(self, t):
1631         'positional_param_list : ID'
1632         t[0] = [t[1]]
1633
1634     def p_positional_param_list_2(self, t):
1635         'positional_param_list : positional_param_list COMMA ID'
1636         t[0] = t[1] + [t[3]]
1637
1638     def p_nonpositional_param_list_0(self, t):
1639         'nonpositional_param_list : keyword_param_list COMMA excess_args_param'
1640         t[0] = t[1] + t[3]
1641
1642     def p_nonpositional_param_list_1(self, t):
1643         '''nonpositional_param_list : keyword_param_list
1644                                     | excess_args_param'''
1645         t[0] = t[1]
1646
1647     def p_keyword_param_list_0(self, t):
1648         'keyword_param_list : keyword_param'
1649         t[0] = [t[1]]
1650
1651     def p_keyword_param_list_1(self, t):
1652         'keyword_param_list : keyword_param_list COMMA keyword_param'
1653         t[0] = t[1] + [t[3]]
1654
1655     def p_keyword_param(self, t):
1656         'keyword_param : ID EQUALS expr'
1657         t[0] = t[1] + ' = ' + t[3].__repr__()
1658
1659     def p_excess_args_param(self, t):
1660         'excess_args_param : ASTERISK ID'
1661         # Just concatenate them: '*ID'.  Wrap in list to be consistent
1662         # with positional_param_list and keyword_param_list.
1663         t[0] = [t[1] + t[2]]
1664
1665     # End of format definition-related rules.
1666     ##############
1667
1668     #
1669     # A decode block looks like:
1670     #       decode <field1> [, <field2>]* [default <inst>] { ... }
1671     #
1672     def p_decode_block(self, t):
1673         'decode_block : DECODE ID opt_default LBRACE decode_stmt_list RBRACE'
1674         default_defaults = self.defaultStack.pop()
1675         codeObj = t[5]
1676         # use the "default defaults" only if there was no explicit
1677         # default statement in decode_stmt_list
1678         if not codeObj.has_decode_default:
1679             codeObj += default_defaults
1680         codeObj.wrap_decode_block('switch (%s) {\n' % t[2], '}\n')
1681         t[0] = codeObj
1682
1683     # The opt_default statement serves only to push the "default
1684     # defaults" onto defaultStack.  This value will be used by nested
1685     # decode blocks, and used and popped off when the current
1686     # decode_block is processed (in p_decode_block() above).
1687     def p_opt_default_0(self, t):
1688         'opt_default : empty'
1689         # no default specified: reuse the one currently at the top of
1690         # the stack
1691         self.defaultStack.push(self.defaultStack.top())
1692         # no meaningful value returned
1693         t[0] = None
1694
1695     def p_opt_default_1(self, t):
1696         'opt_default : DEFAULT inst'
1697         # push the new default
1698         codeObj = t[2]
1699         codeObj.wrap_decode_block('\ndefault:\n', 'break;\n')
1700         self.defaultStack.push(codeObj)
1701         # no meaningful value returned
1702         t[0] = None
1703
1704     def p_decode_stmt_list_0(self, t):
1705         'decode_stmt_list : decode_stmt'
1706         t[0] = t[1]
1707
1708     def p_decode_stmt_list_1(self, t):
1709         'decode_stmt_list : decode_stmt decode_stmt_list'
1710         if (t[1].has_decode_default and t[2].has_decode_default):
1711             error(t, 'Two default cases in decode block')
1712         t[0] = t[1] + t[2]
1713
1714     #
1715     # Decode statement rules
1716     #
1717     # There are four types of statements allowed in a decode block:
1718     # 1. Format blocks 'format <foo> { ... }'
1719     # 2. Nested decode blocks
1720     # 3. Instruction definitions.
1721     # 4. C preprocessor directives.
1722
1723
1724     # Preprocessor directives found in a decode statement list are
1725     # passed through to the output, replicated to all of the output
1726     # code streams.  This works well for ifdefs, so we can ifdef out
1727     # both the declarations and the decode cases generated by an
1728     # instruction definition.  Handling them as part of the grammar
1729     # makes it easy to keep them in the right place with respect to
1730     # the code generated by the other statements.
1731     def p_decode_stmt_cpp(self, t):
1732         'decode_stmt : CPPDIRECTIVE'
1733         t[0] = GenCode(t[1], t[1], t[1], t[1])
1734
1735     # A format block 'format <foo> { ... }' sets the default
1736     # instruction format used to handle instruction definitions inside
1737     # the block.  This format can be overridden by using an explicit
1738     # format on the instruction definition or with a nested format
1739     # block.
1740     def p_decode_stmt_format(self, t):
1741         'decode_stmt : FORMAT push_format_id LBRACE decode_stmt_list RBRACE'
1742         # The format will be pushed on the stack when 'push_format_id'
1743         # is processed (see below).  Once the parser has recognized
1744         # the full production (though the right brace), we're done
1745         # with the format, so now we can pop it.
1746         self.formatStack.pop()
1747         t[0] = t[4]
1748
1749     # This rule exists so we can set the current format (& push the
1750     # stack) when we recognize the format name part of the format
1751     # block.
1752     def p_push_format_id(self, t):
1753         'push_format_id : ID'
1754         try:
1755             self.formatStack.push(self.formatMap[t[1]])
1756             t[0] = ('', '// format %s' % t[1])
1757         except KeyError:
1758             error(t, 'instruction format "%s" not defined.' % t[1])
1759
1760     # Nested decode block: if the value of the current field matches
1761     # the specified constant, do a nested decode on some other field.
1762     def p_decode_stmt_decode(self, t):
1763         'decode_stmt : case_label COLON decode_block'
1764         label = t[1]
1765         codeObj = t[3]
1766         # just wrap the decoding code from the block as a case in the
1767         # outer switch statement.
1768         codeObj.wrap_decode_block('\n%s:\n' % label)
1769         codeObj.has_decode_default = (label == 'default')
1770         t[0] = codeObj
1771
1772     # Instruction definition (finally!).
1773     def p_decode_stmt_inst(self, t):
1774         'decode_stmt : case_label COLON inst SEMI'
1775         label = t[1]
1776         codeObj = t[3]
1777         codeObj.wrap_decode_block('\n%s:' % label, 'break;\n')
1778         codeObj.has_decode_default = (label == 'default')
1779         t[0] = codeObj
1780
1781     # The case label is either a list of one or more constants or
1782     # 'default'
1783     def p_case_label_0(self, t):
1784         'case_label : intlit_list'
1785         def make_case(intlit):
1786             if intlit >= 2**32:
1787                 return 'case ULL(%#x)' % intlit
1788             else:
1789                 return 'case %#x' % intlit
1790         t[0] = ': '.join(map(make_case, t[1]))
1791
1792     def p_case_label_1(self, t):
1793         'case_label : DEFAULT'
1794         t[0] = 'default'
1795
1796     #
1797     # The constant list for a decode case label must be non-empty, but
1798     # may have one or more comma-separated integer literals in it.
1799     #
1800     def p_intlit_list_0(self, t):
1801         'intlit_list : INTLIT'
1802         t[0] = [t[1]]
1803
1804     def p_intlit_list_1(self, t):
1805         'intlit_list : intlit_list COMMA INTLIT'
1806         t[0] = t[1]
1807         t[0].append(t[3])
1808
1809     # Define an instruction using the current instruction format
1810     # (specified by an enclosing format block).
1811     # "<mnemonic>(<args>)"
1812     def p_inst_0(self, t):
1813         'inst : ID LPAREN arg_list RPAREN'
1814         # Pass the ID and arg list to the current format class to deal with.
1815         currentFormat = self.formatStack.top()
1816         codeObj = currentFormat.defineInst(t[1], t[3], t.lexer.lineno)
1817         args = ','.join(map(str, t[3]))
1818         args = re.sub('(?m)^', '//', args)
1819         args = re.sub('^//', '', args)
1820         comment = '\n// %s::%s(%s)\n' % (currentFormat.id, t[1], args)
1821         codeObj.prepend_all(comment)
1822         t[0] = codeObj
1823
1824     # Define an instruction using an explicitly specified format:
1825     # "<fmt>::<mnemonic>(<args>)"
1826     def p_inst_1(self, t):
1827         'inst : ID DBLCOLON ID LPAREN arg_list RPAREN'
1828         try:
1829             format = self.formatMap[t[1]]
1830         except KeyError:
1831             error(t, 'instruction format "%s" not defined.' % t[1])
1832         codeObj = format.defineInst(t[3], t[5], t.lexer.lineno)
1833         comment = '\n// %s::%s(%s)\n' % (t[1], t[3], t[5])
1834         codeObj.prepend_all(comment)
1835         t[0] = codeObj
1836
1837     # The arg list generates a tuple, where the first element is a
1838     # list of the positional args and the second element is a dict
1839     # containing the keyword args.
1840     def p_arg_list_0(self, t):
1841         'arg_list : positional_arg_list COMMA keyword_arg_list'
1842         t[0] = ( t[1], t[3] )
1843
1844     def p_arg_list_1(self, t):
1845         'arg_list : positional_arg_list'
1846         t[0] = ( t[1], {} )
1847
1848     def p_arg_list_2(self, t):
1849         'arg_list : keyword_arg_list'
1850         t[0] = ( [], t[1] )
1851
1852     def p_positional_arg_list_0(self, t):
1853         'positional_arg_list : empty'
1854         t[0] = []
1855
1856     def p_positional_arg_list_1(self, t):
1857         'positional_arg_list : expr'
1858         t[0] = [t[1]]
1859
1860     def p_positional_arg_list_2(self, t):
1861         'positional_arg_list : positional_arg_list COMMA expr'
1862         t[0] = t[1] + [t[3]]
1863
1864     def p_keyword_arg_list_0(self, t):
1865         'keyword_arg_list : keyword_arg'
1866         t[0] = t[1]
1867
1868     def p_keyword_arg_list_1(self, t):
1869         'keyword_arg_list : keyword_arg_list COMMA keyword_arg'
1870         t[0] = t[1]
1871         t[0].update(t[3])
1872
1873     def p_keyword_arg(self, t):
1874         'keyword_arg : ID EQUALS expr'
1875         t[0] = { t[1] : t[3] }
1876
1877     #
1878     # Basic expressions.  These constitute the argument values of
1879     # "function calls" (i.e. instruction definitions in the decode
1880     # block) and default values for formal parameters of format
1881     # functions.
1882     #
1883     # Right now, these are either strings, integers, or (recursively)
1884     # lists of exprs (using Python square-bracket list syntax).  Note
1885     # that bare identifiers are trated as string constants here (since
1886     # there isn't really a variable namespace to refer to).
1887     #
1888     def p_expr_0(self, t):
1889         '''expr : ID
1890                 | INTLIT
1891                 | STRLIT
1892                 | CODELIT'''
1893         t[0] = t[1]
1894
1895     def p_expr_1(self, t):
1896         '''expr : LBRACKET list_expr RBRACKET'''
1897         t[0] = t[2]
1898
1899     def p_list_expr_0(self, t):
1900         'list_expr : expr'
1901         t[0] = [t[1]]
1902
1903     def p_list_expr_1(self, t):
1904         'list_expr : list_expr COMMA expr'
1905         t[0] = t[1] + [t[3]]
1906
1907     def p_list_expr_2(self, t):
1908         'list_expr : empty'
1909         t[0] = []
1910
1911     #
1912     # Empty production... use in other rules for readability.
1913     #
1914     def p_empty(self, t):
1915         'empty :'
1916         pass
1917
1918     # Parse error handler.  Note that the argument here is the
1919     # offending *token*, not a grammar symbol (hence the need to use
1920     # t.value)
1921     def p_error(self, t):
1922         if t:
1923             error(t, "syntax error at '%s'" % t.value)
1924         else:
1925             error("unknown syntax error")
1926
1927     # END OF GRAMMAR RULES
1928
1929     exportContextSymbols = ('InstObjParams', 'makeList', 're', 'string')
1930     def updateExportContext(self):
1931         exportDict = dict([(s, eval(s)) for s in self.exportContextSymbols])
1932         self.exportContext.update(exportDict)
1933         self.exportContext.update(parser.templateMap)
1934
1935     def defFormat(self, id, params, code, lineno):
1936         '''Define a new format'''
1937
1938         # make sure we haven't already defined this one
1939         if id in self.formatMap:
1940             error(lineno, 'format %s redefined.' % id)
1941
1942         # create new object and store in global map
1943         self.formatMap[id] = Format(self, id, params, code)
1944
1945     def update_if_needed(self, file, contents):
1946         '''Update the output file only if the new contents are
1947         different from the current contents.  Minimizes the files that
1948         need to be rebuilt after minor changes.'''
1949
1950         file = os.path.join(self.output_dir, file)
1951         update = False
1952         if os.access(file, os.R_OK):
1953             f = open(file, 'r')
1954             old_contents = f.read()
1955             f.close()
1956             if contents != old_contents:
1957                 print 'Updating', file
1958                 os.remove(file) # in case it's write-protected
1959                 update = True
1960             else:
1961                 print 'File', file, 'is unchanged'
1962         else:
1963             print 'Generating', file
1964             update = True
1965         if update:
1966             f = open(file, 'w')
1967             f.write(contents)
1968             f.close()
1969
1970     # This regular expression matches '##include' directives
1971     includeRE = re.compile(r'^\s*##include\s+"(?P<filename>[\w/.-]*)".*$',
1972                            re.MULTILINE)
1973
1974     def replace_include(self, matchobj, dirname):
1975         """Function to replace a matched '##include' directive with the
1976         contents of the specified file (with nested ##includes
1977         replaced recursively).  'matchobj' is an re match object
1978         (from a match of includeRE) and 'dirname' is the directory
1979         relative to which the file path should be resolved."""
1980
1981         fname = matchobj.group('filename')
1982         full_fname = os.path.normpath(os.path.join(dirname, fname))
1983         contents = '##newfile "%s"\n%s\n##endfile\n' % \
1984                    (full_fname, self.read_and_flatten(full_fname))
1985         return contents
1986
1987     def read_and_flatten(self, filename):
1988         """Read a file and recursively flatten nested '##include' files."""
1989
1990         current_dir = os.path.dirname(filename)
1991         try:
1992             contents = open(filename).read()
1993         except IOError:
1994             error('Error including file "%s"' % filename)
1995
1996         fileNameStack.push((filename, 0))
1997
1998         # Find any includes and include them
1999         def replace(matchobj):
2000             return self.replace_include(matchobj, current_dir)
2001         contents = self.includeRE.sub(replace, contents)
2002
2003         fileNameStack.pop()
2004         return contents
2005
2006     def _parse_isa_desc(self, isa_desc_file):
2007         '''Read in and parse the ISA description.'''
2008
2009         # Read file and (recursively) all included files into a string.
2010         # PLY requires that the input be in a single string so we have to
2011         # do this up front.
2012         isa_desc = self.read_and_flatten(isa_desc_file)
2013
2014         # Initialize filename stack with outer file.
2015         fileNameStack.push((isa_desc_file, 0))
2016
2017         # Parse it.
2018         (isa_name, namespace, global_code, namespace_code) = \
2019                    self.parse(isa_desc)
2020
2021         # grab the last three path components of isa_desc_file to put in
2022         # the output
2023         filename = '/'.join(isa_desc_file.split('/')[-3:])
2024
2025         # generate decoder.hh
2026         includes = '#include "base/bitfield.hh" // for bitfield support'
2027         global_output = global_code.header_output
2028         namespace_output = namespace_code.header_output
2029         decode_function = ''
2030         self.update_if_needed('decoder.hh', file_template % vars())
2031
2032         # generate decoder.cc
2033         includes = '#include "decoder.hh"'
2034         global_output = global_code.decoder_output
2035         namespace_output = namespace_code.decoder_output
2036         # namespace_output += namespace_code.decode_block
2037         decode_function = namespace_code.decode_block
2038         self.update_if_needed('decoder.cc', file_template % vars())
2039
2040         # generate per-cpu exec files
2041         for cpu in cpu_models:
2042             includes = '#include "decoder.hh"\n'
2043             includes += cpu.includes
2044             global_output = global_code.exec_output[cpu.name]
2045             namespace_output = namespace_code.exec_output[cpu.name]
2046             decode_function = ''
2047             self.update_if_needed(cpu.filename, file_template % vars())
2048
2049         # The variable names here are hacky, but this will creat local
2050         # variables which will be referenced in vars() which have the
2051         # value of the globals.
2052         global maxInstSrcRegs
2053         MaxInstSrcRegs = maxInstSrcRegs
2054         global maxInstDestRegs
2055         MaxInstDestRegs = maxInstDestRegs
2056         # max_inst_regs.hh
2057         self.update_if_needed('max_inst_regs.hh',
2058                               max_inst_regs_template % vars())
2059
2060     def parse_isa_desc(self, *args, **kwargs):
2061         try:
2062             self._parse_isa_desc(*args, **kwargs)
2063         except ISAParserError, e:
2064             e.exit(fileNameStack)
2065
2066 # global list of CpuModel objects (see cpu_models.py)
2067 cpu_models = []
2068
2069 # Called as script: get args from command line.
2070 # Args are: <path to cpu_models.py> <isa desc file> <output dir> <cpu models>
2071 if __name__ == '__main__':
2072     execfile(sys.argv[1])  # read in CpuModel definitions
2073     cpu_models = [CpuModel.dict[cpu] for cpu in sys.argv[4:]]
2074     parser = ISAParser(sys.argv[3])
2075     parser.parse_isa_desc(sys.argv[2])