ARM: Implement fault classes.

[gem5.git] / src / arch / isa_parser.py
diff --git a/src/arch/isa_parser.py b/src/arch/isa_parser.py

index 39500df36082a4950cfba1947a021ede7d983105..2db7c6aa6d134590286fa5d22143d3718b35e708 100755 (executable)
--- a/src/arch/isa_parser.py
+++ b/src/arch/isa_parser.py
@@ -25,7 +25,6 @@
  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  #
  # Authors: Steve Reinhardt
-#          Korey Sewell
  
  import os
  import sys
@@ -35,676 +34,704 @@ import traceback
  # get type names
  from types import *
  
-# Prepend the directory where the PLY lex & yacc modules are found
-# to the search path.  Assumes we're compiling in a subdirectory
-# of 'build' in the current tree.
-sys.path[0:0] = [os.environ['M5_PLY']]
-
-import lex
-import yacc
-
-#####################################################################
-#
-#                                Lexer
-#
-# The PLY lexer module takes two things as input:
-# - A list of token names (the string list 'tokens')
-# - A regular expression describing a match for each token.  The
-#   regexp for token FOO can be provided in two ways:
-#   - as a string variable named t_FOO
-#   - as the doc string for a function named t_FOO.  In this case,
-#     the function is also executed, allowing an action to be
-#     associated with each token match.
-#
-#####################################################################
-
-# Reserved words.  These are listed separately as they are matched
-# using the same regexp as generic IDs, but distinguished in the
-# t_ID() function.  The PLY documentation suggests this approach.
-reserved = (
-    'BITFIELD', 'DECODE', 'DECODER', 'DEFAULT', 'DEF', 'EXEC', 'FORMAT',
-    'HEADER', 'LET', 'NAMESPACE', 'OPERAND_TYPES', 'OPERANDS',
-    'OUTPUT', 'SIGNED', 'TEMPLATE'
+from m5.util.grammar import Grammar
+
+class ISAParser(Grammar):
+    def __init__(self, *args, **kwargs):
+        super(ISAParser, self).__init__(*args, **kwargs)
+        self.templateMap = {}
+
+    #####################################################################
+    #
+    #                                Lexer
+    #
+    # The PLY lexer module takes two things as input:
+    # - A list of token names (the string list 'tokens')
+    # - A regular expression describing a match for each token.  The
+    #   regexp for token FOO can be provided in two ways:
+    #   - as a string variable named t_FOO
+    #   - as the doc string for a function named t_FOO.  In this case,
+    #     the function is also executed, allowing an action to be
+    #     associated with each token match.
+    #
+    #####################################################################
+
+    # Reserved words.  These are listed separately as they are matched
+    # using the same regexp as generic IDs, but distinguished in the
+    # t_ID() function.  The PLY documentation suggests this approach.
+    reserved = (
+        'BITFIELD', 'DECODE', 'DECODER', 'DEFAULT', 'DEF', 'EXEC', 'FORMAT',
+        'HEADER', 'LET', 'NAMESPACE', 'OPERAND_TYPES', 'OPERANDS',
+        'OUTPUT', 'SIGNED', 'TEMPLATE'
+        )
+
+    # List of tokens.  The lex module requires this.
+    tokens = reserved + (
+        # identifier
+        'ID',
+
+        # integer literal
+        'INTLIT',
+
+        # string literal
+        'STRLIT',
+
+        # code literal
+        'CODELIT',
+
+        # ( ) [ ] { } < > , ; . : :: *
+        'LPAREN', 'RPAREN',
+        'LBRACKET', 'RBRACKET',
+        'LBRACE', 'RBRACE',
+        'LESS', 'GREATER', 'EQUALS',
+        'COMMA', 'SEMI', 'DOT', 'COLON', 'DBLCOLON',
+        'ASTERISK',
+
+        # C preprocessor directives
+        'CPPDIRECTIVE'
+
+    # The following are matched but never returned. commented out to
+    # suppress PLY warning
+        # newfile directive
+    #    'NEWFILE',
+
+        # endfile directive
+    #    'ENDFILE'
      )
  
-# List of tokens.  The lex module requires this.
-tokens = reserved + (
-    # identifier
-    'ID',
-
-    # integer literal
-    'INTLIT',
-
-    # string literal
-    'STRLIT',
-
-    # code literal
-    'CODELIT',
-
-    # ( ) [ ] { } < > , ; : :: *
-    'LPAREN', 'RPAREN',
-    'LBRACKET', 'RBRACKET',
-    'LBRACE', 'RBRACE',
-    'LESS', 'GREATER', 'EQUALS',
-    'COMMA', 'SEMI', 'COLON', 'DBLCOLON',
-    'ASTERISK',
-
-    # C preprocessor directives
-    'CPPDIRECTIVE'
-
-# The following are matched but never returned. commented out to
-# suppress PLY warning
-    # newfile directive
-#    'NEWFILE',
-
-    # endfile directive
-#    'ENDFILE'
-)
-
-# Regular expressions for token matching
-t_LPAREN           = r'\('
-t_RPAREN           = r'\)'
-t_LBRACKET         = r'\['
-t_RBRACKET         = r'\]'
-t_LBRACE           = r'\{'
-t_RBRACE           = r'\}'
-t_LESS             = r'\<'
-t_GREATER          = r'\>'
-t_EQUALS           = r'='
-t_COMMA            = r','
-t_SEMI             = r';'
-t_COLON            = r':'
-t_DBLCOLON         = r'::'
-t_ASTERISK        = r'\*'
-
-# Identifiers and reserved words
-reserved_map = { }
-for r in reserved:
-    reserved_map[r.lower()] = r
-
-def t_ID(t):
-    r'[A-Za-z_]\w*'
-    t.type = reserved_map.get(t.value,'ID')
-    return t
-
-# Integer literal
-def t_INTLIT(t):
-    r'(0x[\da-fA-F]+)|\d+'
-    try:
-        t.value = int(t.value,0)
-    except ValueError:
-        error(t.lineno, 'Integer value "%s" too large' % t.value)
-        t.value = 0
-    return t
-
-# String literal.  Note that these use only single quotes, and
-# can span multiple lines.
-def t_STRLIT(t):
-    r"(?m)'([^'])+'"
-    # strip off quotes
-    t.value = t.value[1:-1]
-    t.lineno += t.value.count('\n')
-    return t
-
-
-# "Code literal"... like a string literal, but delimiters are
-# '{{' and '}}' so they get formatted nicely under emacs c-mode
-def t_CODELIT(t):
-    r"(?m)\{\{([^\}]|}(?!\}))+\}\}"
-    # strip off {{ & }}
-    t.value = t.value[2:-2]
-    t.lineno += t.value.count('\n')
-    return t
-
-def t_CPPDIRECTIVE(t):
-    r'^\#[^\#].*\n'
-    t.lineno += t.value.count('\n')
-    return t
-
-def t_NEWFILE(t):
-    r'^\#\#newfile\s+"[\w/.-]*"'
-    fileNameStack.push((t.value[11:-1], t.lineno))
-    t.lineno = 0
-
-def t_ENDFILE(t):
-    r'^\#\#endfile'
-    (old_filename, t.lineno) = fileNameStack.pop()
-
-#
-# The functions t_NEWLINE, t_ignore, and t_error are
-# special for the lex module.
-#
-
-# Newlines
-def t_NEWLINE(t):
-    r'\n+'
-    t.lineno += t.value.count('\n')
-
-# Comments
-def t_comment(t):
-    r'//.*'
-
-# Completely ignored characters
-t_ignore           = ' \t\x0c'
-
-# Error handler
-def t_error(t):
-    error(t.lineno, "illegal character '%s'" % t.value[0])
-    t.skip(1)
-
-# Build the lexer
-lex.lex()
-
-#####################################################################
-#
-#                                Parser
-#
-# Every function whose name starts with 'p_' defines a grammar rule.
-# The rule is encoded in the function's doc string, while the
-# function body provides the action taken when the rule is matched.
-# The argument to each function is a list of the values of the
-# rule's symbols: t[0] for the LHS, and t[1..n] for the symbols
-# on the RHS.  For tokens, the value is copied from the t.value
-# attribute provided by the lexer.  For non-terminals, the value
-# is assigned by the producing rule; i.e., the job of the grammar
-# rule function is to set the value for the non-terminal on the LHS
-# (by assigning to t[0]).
-#####################################################################
-
-# The LHS of the first grammar rule is used as the start symbol
-# (in this case, 'specification').  Note that this rule enforces
-# that there will be exactly one namespace declaration, with 0 or more
-# global defs/decls before and after it.  The defs & decls before
-# the namespace decl will be outside the namespace; those after
-# will be inside.  The decoder function is always inside the namespace.
-def p_specification(t):
-    'specification : opt_defs_and_outputs name_decl opt_defs_and_outputs decode_block'
-    global_code = t[1]
-    isa_name = t[2]
-    namespace = isa_name + "Inst"
-    # wrap the decode block as a function definition
-    t[4].wrap_decode_block('''
+    # Regular expressions for token matching
+    t_LPAREN           = r'\('
+    t_RPAREN           = r'\)'
+    t_LBRACKET         = r'\['
+    t_RBRACKET         = r'\]'
+    t_LBRACE           = r'\{'
+    t_RBRACE           = r'\}'
+    t_LESS             = r'\<'
+    t_GREATER          = r'\>'
+    t_EQUALS           = r'='
+    t_COMMA            = r','
+    t_SEMI             = r';'
+    t_DOT              = r'\.'
+    t_COLON            = r':'
+    t_DBLCOLON         = r'::'
+    t_ASTERISK         = r'\*'
+
+    # Identifiers and reserved words
+    reserved_map = { }
+    for r in reserved:
+        reserved_map[r.lower()] = r
+
+    def t_ID(self, t):
+        r'[A-Za-z_]\w*'
+        t.type = self.reserved_map.get(t.value, 'ID')
+        return t
+
+    # Integer literal
+    def t_INTLIT(self, t):
+        r'(0x[\da-fA-F]+)|\d+'
+        try:
+            t.value = int(t.value,0)
+        except ValueError:
+            error(t.lexer.lineno, 'Integer value "%s" too large' % t.value)
+            t.value = 0
+        return t
+
+    # String literal.  Note that these use only single quotes, and
+    # can span multiple lines.
+    def t_STRLIT(self, t):
+        r"(?m)'([^'])+'"
+        # strip off quotes
+        t.value = t.value[1:-1]
+        t.lexer.lineno += t.value.count('\n')
+        return t
+
+
+    # "Code literal"... like a string literal, but delimiters are
+    # '{{' and '}}' so they get formatted nicely under emacs c-mode
+    def t_CODELIT(self, t):
+        r"(?m)\{\{([^\}]|}(?!\}))+\}\}"
+        # strip off {{ & }}
+        t.value = t.value[2:-2]
+        t.lexer.lineno += t.value.count('\n')
+        return t
+
+    def t_CPPDIRECTIVE(self, t):
+        r'^\#[^\#].*\n'
+        t.lexer.lineno += t.value.count('\n')
+        return t
+
+    def t_NEWFILE(self, t):
+        r'^\#\#newfile\s+"[\w/.-]*"'
+        fileNameStack.push((t.value[11:-1], t.lexer.lineno))
+        t.lexer.lineno = 0
+
+    def t_ENDFILE(self, t):
+        r'^\#\#endfile'
+        (old_filename, t.lexer.lineno) = fileNameStack.pop()
+
+    #
+    # The functions t_NEWLINE, t_ignore, and t_error are
+    # special for the lex module.
+    #
+
+    # Newlines
+    def t_NEWLINE(self, t):
+        r'\n+'
+        t.lexer.lineno += t.value.count('\n')
+
+    # Comments
+    def t_comment(self, t):
+        r'//.*'
+
+    # Completely ignored characters
+    t_ignore = ' \t\x0c'
+
+    # Error handler
+    def t_error(self, t):
+        error(t.lexer.lineno, "illegal character '%s'" % t.value[0])
+        t.skip(1)
+
+    #####################################################################
+    #
+    #                                Parser
+    #
+    # Every function whose name starts with 'p_' defines a grammar
+    # rule.  The rule is encoded in the function's doc string, while
+    # the function body provides the action taken when the rule is
+    # matched.  The argument to each function is a list of the values
+    # of the rule's symbols: t[0] for the LHS, and t[1..n] for the
+    # symbols on the RHS.  For tokens, the value is copied from the
+    # t.value attribute provided by the lexer.  For non-terminals, the
+    # value is assigned by the producing rule; i.e., the job of the
+    # grammar rule function is to set the value for the non-terminal
+    # on the LHS (by assigning to t[0]).
+    #####################################################################
+
+    # The LHS of the first grammar rule is used as the start symbol
+    # (in this case, 'specification').  Note that this rule enforces
+    # that there will be exactly one namespace declaration, with 0 or
+    # more global defs/decls before and after it.  The defs & decls
+    # before the namespace decl will be outside the namespace; those
+    # after will be inside.  The decoder function is always inside the
+    # namespace.
+    def p_specification(self, t):
+        'specification : opt_defs_and_outputs name_decl opt_defs_and_outputs decode_block'
+        global_code = t[1]
+        isa_name = t[2]
+        namespace = isa_name + "Inst"
+        # wrap the decode block as a function definition
+        t[4].wrap_decode_block('''
  StaticInstPtr
  %(isa_name)s::decodeInst(%(isa_name)s::ExtMachInst machInst)
  {
      using namespace %(namespace)s;
  ''' % vars(), '}')
-    # both the latter output blocks and the decode block are in the namespace
-    namespace_code = t[3] + t[4]
-    # pass it all back to the caller of yacc.parse()
-    t[0] = (isa_name, namespace, global_code, namespace_code)
-
-# ISA name declaration looks like "namespace <foo>;"
-def p_name_decl(t):
-    'name_decl : NAMESPACE ID SEMI'
-    t[0] = t[2]
-
-# 'opt_defs_and_outputs' is a possibly empty sequence of
-# def and/or output statements.
-def p_opt_defs_and_outputs_0(t):
-    'opt_defs_and_outputs : empty'
-    t[0] = GenCode()
-
-def p_opt_defs_and_outputs_1(t):
-    'opt_defs_and_outputs : defs_and_outputs'
-    t[0] = t[1]
-
-def p_defs_and_outputs_0(t):
-    'defs_and_outputs : def_or_output'
-    t[0] = t[1]
-
-def p_defs_and_outputs_1(t):
-    'defs_and_outputs : defs_and_outputs def_or_output'
-    t[0] = t[1] + t[2]
-
-# The list of possible definition/output statements.
-def p_def_or_output(t):
-    '''def_or_output : def_format
-                     | def_bitfield
-                     | def_template
-                     | def_operand_types
-                     | def_operands
-                     | output_header
-                     | output_decoder
-                     | output_exec
-                     | global_let'''
-    t[0] = t[1]
-
-# Output blocks 'output <foo> {{...}}' (C++ code blocks) are copied
-# directly to the appropriate output section.
-
-
-# Protect any non-dict-substitution '%'s in a format string
-# (i.e. those not followed by '(')
-def protect_non_subst_percents(s):
-    return re.sub(r'%(?!\()', '%%', s)
-
-# Massage output block by substituting in template definitions and bit
-# operators.  We handle '%'s embedded in the string that don't
-# indicate template substitutions (or CPU-specific symbols, which get
-# handled in GenCode) by doubling them first so that the format
-# operation will reduce them back to single '%'s.
-def process_output(s):
-    s = protect_non_subst_percents(s)
-    # protects cpu-specific symbols too
-    s = protect_cpu_symbols(s)
-    return substBitOps(s % templateMap)
-
-def p_output_header(t):
-    'output_header : OUTPUT HEADER CODELIT SEMI'
-    t[0] = GenCode(header_output = process_output(t[3]))
-
-def p_output_decoder(t):
-    'output_decoder : OUTPUT DECODER CODELIT SEMI'
-    t[0] = GenCode(decoder_output = process_output(t[3]))
-
-def p_output_exec(t):
-    'output_exec : OUTPUT EXEC CODELIT SEMI'
-    t[0] = GenCode(exec_output = process_output(t[3]))
-
-# global let blocks 'let {{...}}' (Python code blocks) are executed
-# directly when seen.  Note that these execute in a special variable
-# context 'exportContext' to prevent the code from polluting this
-# script's namespace.
-def p_global_let(t):
-    'global_let : LET CODELIT SEMI'
-    updateExportContext()
-    try:
-        exec fixPythonIndentation(t[2]) in exportContext
-    except Exception, exc:
-        error(t.lineno(1),
-              'error: %s in global let block "%s".' % (exc, t[2]))
-    t[0] = GenCode() # contributes nothing to the output C++ file
-
-# Define the mapping from operand type extensions to C++ types and bit
-# widths (stored in operandTypeMap).
-def p_def_operand_types(t):
-    'def_operand_types : DEF OPERAND_TYPES CODELIT SEMI'
-    try:
-        userDict = eval('{' + t[3] + '}')
-    except Exception, exc:
-        error(t.lineno(1),
-              'error: %s in def operand_types block "%s".' % (exc, t[3]))
-    buildOperandTypeMap(userDict, t.lineno(1))
-    t[0] = GenCode() # contributes nothing to the output C++ file
-
-# Define the mapping from operand names to operand classes and other
-# traits.  Stored in operandNameMap.
-def p_def_operands(t):
-    'def_operands : DEF OPERANDS CODELIT SEMI'
-    if not globals().has_key('operandTypeMap'):
-        error(t.lineno(1),
-              'error: operand types must be defined before operands')
-    try:
-        userDict = eval('{' + t[3] + '}')
-    except Exception, exc:
-        error(t.lineno(1),
-              'error: %s in def operands block "%s".' % (exc, t[3]))
-    buildOperandNameMap(userDict, t.lineno(1))
-    t[0] = GenCode() # contributes nothing to the output C++ file
-
-# A bitfield definition looks like:
-# 'def [signed] bitfield <ID> [<first>:<last>]'
-# This generates a preprocessor macro in the output file.
-def p_def_bitfield_0(t):
-    'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT COLON INTLIT GREATER SEMI'
-    expr = 'bits(machInst, %2d, %2d)' % (t[6], t[8])
-    if (t[2] == 'signed'):
-        expr = 'sext<%d>(%s)' % (t[6] - t[8] + 1, expr)
-    hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
-    t[0] = GenCode(header_output = hash_define)
-
-# alternate form for single bit: 'def [signed] bitfield <ID> [<bit>]'
-def p_def_bitfield_1(t):
-    'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT GREATER SEMI'
-    expr = 'bits(machInst, %2d, %2d)' % (t[6], t[6])
-    if (t[2] == 'signed'):
-        expr = 'sext<%d>(%s)' % (1, expr)
-    hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
-    t[0] = GenCode(header_output = hash_define)
-
-def p_opt_signed_0(t):
-    'opt_signed : SIGNED'
-    t[0] = t[1]
-
-def p_opt_signed_1(t):
-    'opt_signed : empty'
-    t[0] = ''
-
-# Global map variable to hold templates
-templateMap = {}
-
-def p_def_template(t):
-    'def_template : DEF TEMPLATE ID CODELIT SEMI'
-    templateMap[t[3]] = Template(t[4])
-    t[0] = GenCode()
-
-# An instruction format definition looks like
-# "def format <fmt>(<params>) {{...}};"
-def p_def_format(t):
-    'def_format : DEF FORMAT ID LPAREN param_list RPAREN CODELIT SEMI'
-    (id, params, code) = (t[3], t[5], t[7])
-    defFormat(id, params, code, t.lineno(1))
-    t[0] = GenCode()
-
-# The formal parameter list for an instruction format is a possibly
-# empty list of comma-separated parameters.  Positional (standard,
-# non-keyword) parameters must come first, followed by keyword
-# parameters, followed by a '*foo' parameter that gets excess
-# positional arguments (as in Python).  Each of these three parameter
-# categories is optional.
-#
-# Note that we do not support the '**foo' parameter for collecting
-# otherwise undefined keyword args.  Otherwise the parameter list is
-# (I believe) identical to what is supported in Python.
-#
-# The param list generates a tuple, where the first element is a list of
-# the positional params and the second element is a dict containing the
-# keyword params.
-def p_param_list_0(t):
-    'param_list : positional_param_list COMMA nonpositional_param_list'
-    t[0] = t[1] + t[3]
-
-def p_param_list_1(t):
-    '''param_list : positional_param_list
-                  | nonpositional_param_list'''
-    t[0] = t[1]
-
-def p_positional_param_list_0(t):
-    'positional_param_list : empty'
-    t[0] = []
-
-def p_positional_param_list_1(t):
-    'positional_param_list : ID'
-    t[0] = [t[1]]
-
-def p_positional_param_list_2(t):
-    'positional_param_list : positional_param_list COMMA ID'
-    t[0] = t[1] + [t[3]]
-
-def p_nonpositional_param_list_0(t):
-    'nonpositional_param_list : keyword_param_list COMMA excess_args_param'
-    t[0] = t[1] + t[3]
-
-def p_nonpositional_param_list_1(t):
-    '''nonpositional_param_list : keyword_param_list
-                                | excess_args_param'''
-    t[0] = t[1]
-
-def p_keyword_param_list_0(t):
-    'keyword_param_list : keyword_param'
-    t[0] = [t[1]]
-
-def p_keyword_param_list_1(t):
-    'keyword_param_list : keyword_param_list COMMA keyword_param'
-    t[0] = t[1] + [t[3]]
-
-def p_keyword_param(t):
-    'keyword_param : ID EQUALS expr'
-    t[0] = t[1] + ' = ' + t[3].__repr__()
-
-def p_excess_args_param(t):
-    'excess_args_param : ASTERISK ID'
-    # Just concatenate them: '*ID'.  Wrap in list to be consistent
-    # with positional_param_list and keyword_param_list.
-    t[0] = [t[1] + t[2]]
-
-# End of format definition-related rules.
-##############
-
-#
-# A decode block looks like:
-#      decode <field1> [, <field2>]* [default <inst>] { ... }
-#
-def p_decode_block(t):
-    'decode_block : DECODE ID opt_default LBRACE decode_stmt_list RBRACE'
-    default_defaults = defaultStack.pop()
-    codeObj = t[5]
-    # use the "default defaults" only if there was no explicit
-    # default statement in decode_stmt_list
-    if not codeObj.has_decode_default:
-        codeObj += default_defaults
-    codeObj.wrap_decode_block('switch (%s) {\n' % t[2], '}\n')
-    t[0] = codeObj
-
-# The opt_default statement serves only to push the "default defaults"
-# onto defaultStack.  This value will be used by nested decode blocks,
-# and used and popped off when the current decode_block is processed
-# (in p_decode_block() above).
-def p_opt_default_0(t):
-    'opt_default : empty'
-    # no default specified: reuse the one currently at the top of the stack
-    defaultStack.push(defaultStack.top())
-    # no meaningful value returned
-    t[0] = None
-
-def p_opt_default_1(t):
-    'opt_default : DEFAULT inst'
-    # push the new default
-    codeObj = t[2]
-    codeObj.wrap_decode_block('\ndefault:\n', 'break;\n')
-    defaultStack.push(codeObj)
-    # no meaningful value returned
-    t[0] = None
-
-def p_decode_stmt_list_0(t):
-    'decode_stmt_list : decode_stmt'
-    t[0] = t[1]
-
-def p_decode_stmt_list_1(t):
-    'decode_stmt_list : decode_stmt decode_stmt_list'
-    if (t[1].has_decode_default and t[2].has_decode_default):
-        error(t.lineno(1), 'Two default cases in decode block')
-    t[0] = t[1] + t[2]
-
-#
-# Decode statement rules
-#
-# There are four types of statements allowed in a decode block:
-# 1. Format blocks 'format <foo> { ... }'
-# 2. Nested decode blocks
-# 3. Instruction definitions.
-# 4. C preprocessor directives.
-
-
-# Preprocessor directives found in a decode statement list are passed
-# through to the output, replicated to all of the output code
-# streams.  This works well for ifdefs, so we can ifdef out both the
-# declarations and the decode cases generated by an instruction
-# definition.  Handling them as part of the grammar makes it easy to
-# keep them in the right place with respect to the code generated by
-# the other statements.
-def p_decode_stmt_cpp(t):
-    'decode_stmt : CPPDIRECTIVE'
-    t[0] = GenCode(t[1], t[1], t[1], t[1])
-
-# A format block 'format <foo> { ... }' sets the default instruction
-# format used to handle instruction definitions inside the block.
-# This format can be overridden by using an explicit format on the
-# instruction definition or with a nested format block.
-def p_decode_stmt_format(t):
-    'decode_stmt : FORMAT push_format_id LBRACE decode_stmt_list RBRACE'
-    # The format will be pushed on the stack when 'push_format_id' is
-    # processed (see below).  Once the parser has recognized the full
-    # production (though the right brace), we're done with the format,
-    # so now we can pop it.
-    formatStack.pop()
-    t[0] = t[4]
-
-# This rule exists so we can set the current format (& push the stack)
-# when we recognize the format name part of the format block.
-def p_push_format_id(t):
-    'push_format_id : ID'
-    try:
-        formatStack.push(formatMap[t[1]])
-        t[0] = ('', '// format %s' % t[1])
-    except KeyError:
-        error(t.lineno(1), 'instruction format "%s" not defined.' % t[1])
-
-# Nested decode block: if the value of the current field matches the
-# specified constant, do a nested decode on some other field.
-def p_decode_stmt_decode(t):
-    'decode_stmt : case_label COLON decode_block'
-    label = t[1]
-    codeObj = t[3]
-    # just wrap the decoding code from the block as a case in the
-    # outer switch statement.
-    codeObj.wrap_decode_block('\n%s:\n' % label)
-    codeObj.has_decode_default = (label == 'default')
-    t[0] = codeObj
-
-# Instruction definition (finally!).
-def p_decode_stmt_inst(t):
-    'decode_stmt : case_label COLON inst SEMI'
-    label = t[1]
-    codeObj = t[3]
-    codeObj.wrap_decode_block('\n%s:' % label, 'break;\n')
-    codeObj.has_decode_default = (label == 'default')
-    t[0] = codeObj
-
-# The case label is either a list of one or more constants or 'default'
-def p_case_label_0(t):
-    'case_label : intlit_list'
-    t[0] = ': '.join(map(lambda a: 'case %#x' % a, t[1]))
-
-def p_case_label_1(t):
-    'case_label : DEFAULT'
-    t[0] = 'default'
-
-#
-# The constant list for a decode case label must be non-empty, but may have
-# one or more comma-separated integer literals in it.
-#
-def p_intlit_list_0(t):
-    'intlit_list : INTLIT'
-    t[0] = [t[1]]
-
-def p_intlit_list_1(t):
-    'intlit_list : intlit_list COMMA INTLIT'
-    t[0] = t[1]
-    t[0].append(t[3])
-
-# Define an instruction using the current instruction format (specified
-# by an enclosing format block).
-# "<mnemonic>(<args>)"
-def p_inst_0(t):
-    'inst : ID LPAREN arg_list RPAREN'
-    # Pass the ID and arg list to the current format class to deal with.
-    currentFormat = formatStack.top()
-    codeObj = currentFormat.defineInst(t[1], t[3], t.lineno(1))
-    args = ','.join(map(str, t[3]))
-    args = re.sub('(?m)^', '//', args)
-    args = re.sub('^//', '', args)
-    comment = '\n// %s::%s(%s)\n' % (currentFormat.id, t[1], args)
-    codeObj.prepend_all(comment)
-    t[0] = codeObj
-
-# Define an instruction using an explicitly specified format:
-# "<fmt>::<mnemonic>(<args>)"
-def p_inst_1(t):
-    'inst : ID DBLCOLON ID LPAREN arg_list RPAREN'
-    try:
-        format = formatMap[t[1]]
-    except KeyError:
-        error(t.lineno(1), 'instruction format "%s" not defined.' % t[1])
-    codeObj = format.defineInst(t[3], t[5], t.lineno(1))
-    comment = '\n// %s::%s(%s)\n' % (t[1], t[3], t[5])
-    codeObj.prepend_all(comment)
-    t[0] = codeObj
-
-# The arg list generates a tuple, where the first element is a list of
-# the positional args and the second element is a dict containing the
-# keyword args.
-def p_arg_list_0(t):
-    'arg_list : positional_arg_list COMMA keyword_arg_list'
-    t[0] = ( t[1], t[3] )
-
-def p_arg_list_1(t):
-    'arg_list : positional_arg_list'
-    t[0] = ( t[1], {} )
-
-def p_arg_list_2(t):
-    'arg_list : keyword_arg_list'
-    t[0] = ( [], t[1] )
-
-def p_positional_arg_list_0(t):
-    'positional_arg_list : empty'
-    t[0] = []
-
-def p_positional_arg_list_1(t):
-    'positional_arg_list : expr'
-    t[0] = [t[1]]
-
-def p_positional_arg_list_2(t):
-    'positional_arg_list : positional_arg_list COMMA expr'
-    t[0] = t[1] + [t[3]]
-
-def p_keyword_arg_list_0(t):
-    'keyword_arg_list : keyword_arg'
-    t[0] = t[1]
-
-def p_keyword_arg_list_1(t):
-    'keyword_arg_list : keyword_arg_list COMMA keyword_arg'
-    t[0] = t[1]
-    t[0].update(t[3])
-
-def p_keyword_arg(t):
-    'keyword_arg : ID EQUALS expr'
-    t[0] = { t[1] : t[3] }
-
-#
-# Basic expressions.  These constitute the argument values of
-# "function calls" (i.e. instruction definitions in the decode block)
-# and default values for formal parameters of format functions.
-#
-# Right now, these are either strings, integers, or (recursively)
-# lists of exprs (using Python square-bracket list syntax).  Note that
-# bare identifiers are trated as string constants here (since there
-# isn't really a variable namespace to refer to).
-#
-def p_expr_0(t):
-    '''expr : ID
-            | INTLIT
-            | STRLIT
-            | CODELIT'''
-    t[0] = t[1]
-
-def p_expr_1(t):
-    '''expr : LBRACKET list_expr RBRACKET'''
-    t[0] = t[2]
-
-def p_list_expr_0(t):
-    'list_expr : expr'
-    t[0] = [t[1]]
-
-def p_list_expr_1(t):
-    'list_expr : list_expr COMMA expr'
-    t[0] = t[1] + [t[3]]
-
-def p_list_expr_2(t):
-    'list_expr : empty'
-    t[0] = []
+        # both the latter output blocks and the decode block are in
+        # the namespace
+        namespace_code = t[3] + t[4]
+        # pass it all back to the caller of yacc.parse()
+        t[0] = (isa_name, namespace, global_code, namespace_code)
+
+    # ISA name declaration looks like "namespace <foo>;"
+    def p_name_decl(self, t):
+        'name_decl : NAMESPACE ID SEMI'
+        t[0] = t[2]
+
+    # 'opt_defs_and_outputs' is a possibly empty sequence of
+    # def and/or output statements.
+    def p_opt_defs_and_outputs_0(self, t):
+        'opt_defs_and_outputs : empty'
+        t[0] = GenCode()
+
+    def p_opt_defs_and_outputs_1(self, t):
+        'opt_defs_and_outputs : defs_and_outputs'
+        t[0] = t[1]
+
+    def p_defs_and_outputs_0(self, t):
+        'defs_and_outputs : def_or_output'
+        t[0] = t[1]
+
+    def p_defs_and_outputs_1(self, t):
+        'defs_and_outputs : defs_and_outputs def_or_output'
+        t[0] = t[1] + t[2]
+
+    # The list of possible definition/output statements.
+    def p_def_or_output(self, t):
+        '''def_or_output : def_format
+                         | def_bitfield
+                         | def_bitfield_struct
+                         | def_template
+                         | def_operand_types
+                         | def_operands
+                         | output_header
+                         | output_decoder
+                         | output_exec
+                         | global_let'''
+        t[0] = t[1]
+
+    # Output blocks 'output <foo> {{...}}' (C++ code blocks) are copied
+    # directly to the appropriate output section.
+
+    # Massage output block by substituting in template definitions and
+    # bit operators.  We handle '%'s embedded in the string that don't
+    # indicate template substitutions (or CPU-specific symbols, which
+    # get handled in GenCode) by doubling them first so that the
+    # format operation will reduce them back to single '%'s.
+    def process_output(self, s):
+        s = protect_non_subst_percents(s)
+        # protects cpu-specific symbols too
+        s = protect_cpu_symbols(s)
+        return substBitOps(s % self.templateMap)
+
+    def p_output_header(self, t):
+        'output_header : OUTPUT HEADER CODELIT SEMI'
+        t[0] = GenCode(header_output = self.process_output(t[3]))
+
+    def p_output_decoder(self, t):
+        'output_decoder : OUTPUT DECODER CODELIT SEMI'
+        t[0] = GenCode(decoder_output = self.process_output(t[3]))
+
+    def p_output_exec(self, t):
+        'output_exec : OUTPUT EXEC CODELIT SEMI'
+        t[0] = GenCode(exec_output = self.process_output(t[3]))
+
+    # global let blocks 'let {{...}}' (Python code blocks) are
+    # executed directly when seen.  Note that these execute in a
+    # special variable context 'exportContext' to prevent the code
+    # from polluting this script's namespace.
+    def p_global_let(self, t):
+        'global_let : LET CODELIT SEMI'
+        updateExportContext()
+        exportContext["header_output"] = ''
+        exportContext["decoder_output"] = ''
+        exportContext["exec_output"] = ''
+        exportContext["decode_block"] = ''
+        try:
+            exec fixPythonIndentation(t[2]) in exportContext
+        except Exception, exc:
+            error(t.lexer.lineno,
+                  'error: %s in global let block "%s".' % (exc, t[2]))
+        t[0] = GenCode(header_output = exportContext["header_output"],
+                       decoder_output = exportContext["decoder_output"],
+                       exec_output = exportContext["exec_output"],
+                       decode_block = exportContext["decode_block"])
+
+    # Define the mapping from operand type extensions to C++ types and
+    # bit widths (stored in operandTypeMap).
+    def p_def_operand_types(self, t):
+        'def_operand_types : DEF OPERAND_TYPES CODELIT SEMI'
+        try:
+            userDict = eval('{' + t[3] + '}')
+        except Exception, exc:
+            error(t.lexer.lineno,
+                  'error: %s in def operand_types block "%s".' % (exc, t[3]))
+        buildOperandTypeMap(userDict, t.lexer.lineno)
+        t[0] = GenCode() # contributes nothing to the output C++ file
+
+    # Define the mapping from operand names to operand classes and
+    # other traits.  Stored in operandNameMap.
+    def p_def_operands(self, t):
+        'def_operands : DEF OPERANDS CODELIT SEMI'
+        if not globals().has_key('operandTypeMap'):
+            error(t.lexer.lineno,
+                  'error: operand types must be defined before operands')
+        try:
+            userDict = eval('{' + t[3] + '}', exportContext)
+        except Exception, exc:
+            error(t.lexer.lineno,
+                  'error: %s in def operands block "%s".' % (exc, t[3]))
+        buildOperandNameMap(userDict, t.lexer.lineno)
+        t[0] = GenCode() # contributes nothing to the output C++ file
+
+    # A bitfield definition looks like:
+    # 'def [signed] bitfield <ID> [<first>:<last>]'
+    # This generates a preprocessor macro in the output file.
+    def p_def_bitfield_0(self, t):
+        'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT COLON INTLIT GREATER SEMI'
+        expr = 'bits(machInst, %2d, %2d)' % (t[6], t[8])
+        if (t[2] == 'signed'):
+            expr = 'sext<%d>(%s)' % (t[6] - t[8] + 1, expr)
+        hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
+        t[0] = GenCode(header_output = hash_define)
+
+    # alternate form for single bit: 'def [signed] bitfield <ID> [<bit>]'
+    def p_def_bitfield_1(self, t):
+        'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT GREATER SEMI'
+        expr = 'bits(machInst, %2d, %2d)' % (t[6], t[6])
+        if (t[2] == 'signed'):
+            expr = 'sext<%d>(%s)' % (1, expr)
+        hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
+        t[0] = GenCode(header_output = hash_define)
+
+    # alternate form for structure member: 'def bitfield <ID> <ID>'
+    def p_def_bitfield_struct(self, t):
+        'def_bitfield_struct : DEF opt_signed BITFIELD ID id_with_dot SEMI'
+        if (t[2] != ''):
+            error(t.lexer.lineno,
+                  'error: structure bitfields are always unsigned.')
+        expr = 'machInst.%s' % t[5]
+        hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
+        t[0] = GenCode(header_output = hash_define)
+
+    def p_id_with_dot_0(self, t):
+        'id_with_dot : ID'
+        t[0] = t[1]
+
+    def p_id_with_dot_1(self, t):
+        'id_with_dot : ID DOT id_with_dot'
+        t[0] = t[1] + t[2] + t[3]
+
+    def p_opt_signed_0(self, t):
+        'opt_signed : SIGNED'
+        t[0] = t[1]
+
+    def p_opt_signed_1(self, t):
+        'opt_signed : empty'
+        t[0] = ''
+
+    def p_def_template(self, t):
+        'def_template : DEF TEMPLATE ID CODELIT SEMI'
+        self.templateMap[t[3]] = Template(t[4])
+        t[0] = GenCode()
+
+    # An instruction format definition looks like
+    # "def format <fmt>(<params>) {{...}};"
+    def p_def_format(self, t):
+        'def_format : DEF FORMAT ID LPAREN param_list RPAREN CODELIT SEMI'
+        (id, params, code) = (t[3], t[5], t[7])
+        defFormat(id, params, code, t.lexer.lineno)
+        t[0] = GenCode()
+
+    # The formal parameter list for an instruction format is a
+    # possibly empty list of comma-separated parameters.  Positional
+    # (standard, non-keyword) parameters must come first, followed by
+    # keyword parameters, followed by a '*foo' parameter that gets
+    # excess positional arguments (as in Python).  Each of these three
+    # parameter categories is optional.
+    #
+    # Note that we do not support the '**foo' parameter for collecting
+    # otherwise undefined keyword args.  Otherwise the parameter list
+    # is (I believe) identical to what is supported in Python.
+    #
+    # The param list generates a tuple, where the first element is a
+    # list of the positional params and the second element is a dict
+    # containing the keyword params.
+    def p_param_list_0(self, t):
+        'param_list : positional_param_list COMMA nonpositional_param_list'
+        t[0] = t[1] + t[3]
+
+    def p_param_list_1(self, t):
+        '''param_list : positional_param_list
+                      | nonpositional_param_list'''
+        t[0] = t[1]
+
+    def p_positional_param_list_0(self, t):
+        'positional_param_list : empty'
+        t[0] = []
+
+    def p_positional_param_list_1(self, t):
+        'positional_param_list : ID'
+        t[0] = [t[1]]
+
+    def p_positional_param_list_2(self, t):
+        'positional_param_list : positional_param_list COMMA ID'
+        t[0] = t[1] + [t[3]]
+
+    def p_nonpositional_param_list_0(self, t):
+        'nonpositional_param_list : keyword_param_list COMMA excess_args_param'
+        t[0] = t[1] + t[3]
+
+    def p_nonpositional_param_list_1(self, t):
+        '''nonpositional_param_list : keyword_param_list
+                                    | excess_args_param'''
+        t[0] = t[1]
+
+    def p_keyword_param_list_0(self, t):
+        'keyword_param_list : keyword_param'
+        t[0] = [t[1]]
+
+    def p_keyword_param_list_1(self, t):
+        'keyword_param_list : keyword_param_list COMMA keyword_param'
+        t[0] = t[1] + [t[3]]
+
+    def p_keyword_param(self, t):
+        'keyword_param : ID EQUALS expr'
+        t[0] = t[1] + ' = ' + t[3].__repr__()
+
+    def p_excess_args_param(self, t):
+        'excess_args_param : ASTERISK ID'
+        # Just concatenate them: '*ID'.  Wrap in list to be consistent
+        # with positional_param_list and keyword_param_list.
+        t[0] = [t[1] + t[2]]
+
+    # End of format definition-related rules.
+    ##############
+
+    #
+    # A decode block looks like:
+    #       decode <field1> [, <field2>]* [default <inst>] { ... }
+    #
+    def p_decode_block(self, t):
+        'decode_block : DECODE ID opt_default LBRACE decode_stmt_list RBRACE'
+        default_defaults = defaultStack.pop()
+        codeObj = t[5]
+        # use the "default defaults" only if there was no explicit
+        # default statement in decode_stmt_list
+        if not codeObj.has_decode_default:
+            codeObj += default_defaults
+        codeObj.wrap_decode_block('switch (%s) {\n' % t[2], '}\n')
+        t[0] = codeObj
+
+    # The opt_default statement serves only to push the "default
+    # defaults" onto defaultStack.  This value will be used by nested
+    # decode blocks, and used and popped off when the current
+    # decode_block is processed (in p_decode_block() above).
+    def p_opt_default_0(self, t):
+        'opt_default : empty'
+        # no default specified: reuse the one currently at the top of
+        # the stack
+        defaultStack.push(defaultStack.top())
+        # no meaningful value returned
+        t[0] = None
+
+    def p_opt_default_1(self, t):
+        'opt_default : DEFAULT inst'
+        # push the new default
+        codeObj = t[2]
+        codeObj.wrap_decode_block('\ndefault:\n', 'break;\n')
+        defaultStack.push(codeObj)
+        # no meaningful value returned
+        t[0] = None
+
+    def p_decode_stmt_list_0(self, t):
+        'decode_stmt_list : decode_stmt'
+        t[0] = t[1]
+
+    def p_decode_stmt_list_1(self, t):
+        'decode_stmt_list : decode_stmt decode_stmt_list'
+        if (t[1].has_decode_default and t[2].has_decode_default):
+            error(t.lexer.lineno, 'Two default cases in decode block')
+        t[0] = t[1] + t[2]
+
+    #
+    # Decode statement rules
+    #
+    # There are four types of statements allowed in a decode block:
+    # 1. Format blocks 'format <foo> { ... }'
+    # 2. Nested decode blocks
+    # 3. Instruction definitions.
+    # 4. C preprocessor directives.
+
+
+    # Preprocessor directives found in a decode statement list are
+    # passed through to the output, replicated to all of the output
+    # code streams.  This works well for ifdefs, so we can ifdef out
+    # both the declarations and the decode cases generated by an
+    # instruction definition.  Handling them as part of the grammar
+    # makes it easy to keep them in the right place with respect to
+    # the code generated by the other statements.
+    def p_decode_stmt_cpp(self, t):
+        'decode_stmt : CPPDIRECTIVE'
+        t[0] = GenCode(t[1], t[1], t[1], t[1])
+
+    # A format block 'format <foo> { ... }' sets the default
+    # instruction format used to handle instruction definitions inside
+    # the block.  This format can be overridden by using an explicit
+    # format on the instruction definition or with a nested format
+    # block.
+    def p_decode_stmt_format(self, t):
+        'decode_stmt : FORMAT push_format_id LBRACE decode_stmt_list RBRACE'
+        # The format will be pushed on the stack when 'push_format_id'
+        # is processed (see below).  Once the parser has recognized
+        # the full production (though the right brace), we're done
+        # with the format, so now we can pop it.
+        formatStack.pop()
+        t[0] = t[4]
+
+    # This rule exists so we can set the current format (& push the
+    # stack) when we recognize the format name part of the format
+    # block.
+    def p_push_format_id(self, t):
+        'push_format_id : ID'
+        try:
+            formatStack.push(formatMap[t[1]])
+            t[0] = ('', '// format %s' % t[1])
+        except KeyError:
+            error(t.lexer.lineno,
+                  'instruction format "%s" not defined.' % t[1])
+
+    # Nested decode block: if the value of the current field matches
+    # the specified constant, do a nested decode on some other field.
+    def p_decode_stmt_decode(self, t):
+        'decode_stmt : case_label COLON decode_block'
+        label = t[1]
+        codeObj = t[3]
+        # just wrap the decoding code from the block as a case in the
+        # outer switch statement.
+        codeObj.wrap_decode_block('\n%s:\n' % label)
+        codeObj.has_decode_default = (label == 'default')
+        t[0] = codeObj
+
+    # Instruction definition (finally!).
+    def p_decode_stmt_inst(self, t):
+        'decode_stmt : case_label COLON inst SEMI'
+        label = t[1]
+        codeObj = t[3]
+        codeObj.wrap_decode_block('\n%s:' % label, 'break;\n')
+        codeObj.has_decode_default = (label == 'default')
+        t[0] = codeObj
+
+    # The case label is either a list of one or more constants or
+    # 'default'
+    def p_case_label_0(self, t):
+        'case_label : intlit_list'
+        def make_case(intlit):
+            if intlit >= 2**32:
+                return 'case ULL(%#x)' % intlit
+            else:
+                return 'case %#x' % intlit
+        t[0] = ': '.join(map(make_case, t[1]))
+
+    def p_case_label_1(self, t):
+        'case_label : DEFAULT'
+        t[0] = 'default'
+
+    #
+    # The constant list for a decode case label must be non-empty, but
+    # may have one or more comma-separated integer literals in it.
+    #
+    def p_intlit_list_0(self, t):
+        'intlit_list : INTLIT'
+        t[0] = [t[1]]
+
+    def p_intlit_list_1(self, t):
+        'intlit_list : intlit_list COMMA INTLIT'
+        t[0] = t[1]
+        t[0].append(t[3])
+
+    # Define an instruction using the current instruction format
+    # (specified by an enclosing format block).
+    # "<mnemonic>(<args>)"
+    def p_inst_0(self, t):
+        'inst : ID LPAREN arg_list RPAREN'
+        # Pass the ID and arg list to the current format class to deal with.
+        currentFormat = formatStack.top()
+        codeObj = currentFormat.defineInst(t[1], t[3], t.lexer.lineno)
+        args = ','.join(map(str, t[3]))
+        args = re.sub('(?m)^', '//', args)
+        args = re.sub('^//', '', args)
+        comment = '\n// %s::%s(%s)\n' % (currentFormat.id, t[1], args)
+        codeObj.prepend_all(comment)
+        t[0] = codeObj
+
+    # Define an instruction using an explicitly specified format:
+    # "<fmt>::<mnemonic>(<args>)"
+    def p_inst_1(self, t):
+        'inst : ID DBLCOLON ID LPAREN arg_list RPAREN'
+        try:
+            format = formatMap[t[1]]
+        except KeyError:
+            error(t.lexer.lineno,
+                  'instruction format "%s" not defined.' % t[1])
+        codeObj = format.defineInst(t[3], t[5], t.lexer.lineno)
+        comment = '\n// %s::%s(%s)\n' % (t[1], t[3], t[5])
+        codeObj.prepend_all(comment)
+        t[0] = codeObj
+
+    # The arg list generates a tuple, where the first element is a
+    # list of the positional args and the second element is a dict
+    # containing the keyword args.
+    def p_arg_list_0(self, t):
+        'arg_list : positional_arg_list COMMA keyword_arg_list'
+        t[0] = ( t[1], t[3] )
+
+    def p_arg_list_1(self, t):
+        'arg_list : positional_arg_list'
+        t[0] = ( t[1], {} )
+
+    def p_arg_list_2(self, t):
+        'arg_list : keyword_arg_list'
+        t[0] = ( [], t[1] )
+
+    def p_positional_arg_list_0(self, t):
+        'positional_arg_list : empty'
+        t[0] = []
+
+    def p_positional_arg_list_1(self, t):
+        'positional_arg_list : expr'
+        t[0] = [t[1]]
+
+    def p_positional_arg_list_2(self, t):
+        'positional_arg_list : positional_arg_list COMMA expr'
+        t[0] = t[1] + [t[3]]
+
+    def p_keyword_arg_list_0(self, t):
+        'keyword_arg_list : keyword_arg'
+        t[0] = t[1]
+
+    def p_keyword_arg_list_1(self, t):
+        'keyword_arg_list : keyword_arg_list COMMA keyword_arg'
+        t[0] = t[1]
+        t[0].update(t[3])
+
+    def p_keyword_arg(self, t):
+        'keyword_arg : ID EQUALS expr'
+        t[0] = { t[1] : t[3] }
+
+    #
+    # Basic expressions.  These constitute the argument values of
+    # "function calls" (i.e. instruction definitions in the decode
+    # block) and default values for formal parameters of format
+    # functions.
+    #
+    # Right now, these are either strings, integers, or (recursively)
+    # lists of exprs (using Python square-bracket list syntax).  Note
+    # that bare identifiers are trated as string constants here (since
+    # there isn't really a variable namespace to refer to).
+    #
+    def p_expr_0(self, t):
+        '''expr : ID
+                | INTLIT
+                | STRLIT
+                | CODELIT'''
+        t[0] = t[1]
+
+    def p_expr_1(self, t):
+        '''expr : LBRACKET list_expr RBRACKET'''
+        t[0] = t[2]
+
+    def p_list_expr_0(self, t):
+        'list_expr : expr'
+        t[0] = [t[1]]
+
+    def p_list_expr_1(self, t):
+        'list_expr : list_expr COMMA expr'
+        t[0] = t[1] + [t[3]]
+
+    def p_list_expr_2(self, t):
+        'list_expr : empty'
+        t[0] = []
+
+    #
+    # Empty production... use in other rules for readability.
+    #
+    def p_empty(self, t):
+        'empty :'
+        pass
+
+    # Parse error handler.  Note that the argument here is the
+    # offending *token*, not a grammar symbol (hence the need to use
+    # t.value)
+    def p_error(self, t):
+        if t:
+            error(t.lexer.lineno, "syntax error at '%s'" % t.value)
+        else:
+            error(0, "unknown syntax error", True)
  
-#
-# Empty production... use in other rules for readability.
-#
-def p_empty(t):
-    'empty :'
-    pass
-
-# Parse error handler.  Note that the argument here is the offending
-# *token*, not a grammar symbol (hence the need to use t.value)
-def p_error(t):
-    if t:
-        error(t.lineno, "syntax error at '%s'" % t.value)
-    else:
-        error(0, "unknown syntax error", True)
+    # END OF GRAMMAR RULES
  
-# END OF GRAMMAR RULES
-#
  # Now build the parser.
-yacc.yacc()
-
+parser = ISAParser()
  
  #####################################################################
  #
@@ -741,6 +768,11 @@ def expand_cpu_symbols_to_string(template):
  def protect_cpu_symbols(template):
      return re.sub(r'%(?=\(CPU_)', '%%', template)
  
+# Protect any non-dict-substitution '%'s in a format string
+# (i.e. those not followed by '(')
+def protect_non_subst_percents(s):
+    return re.sub(r'%(?!\()', '%%', s)
+
  ###############
  # GenCode class
  #
@@ -814,7 +846,7 @@ exportContext = {}
  
  def updateExportContext():
      exportContext.update(exportDict(*exportContextSymbols))
-    exportContext.update(templateMap)
+    exportContext.update(parser.templateMap)
  
  def exportDict(*symNames):
      return dict([(s, eval(s)) for s in symNames])
@@ -840,7 +872,11 @@ class Format:
          context = {}
          updateExportContext()
          context.update(exportContext)
-        context.update({ 'name': name, 'Name': string.capitalize(name) })
+        if len(name):
+            Name = name[0].upper()
+            if len(name) > 1:
+                Name += name[1:]
+        context.update({ 'name': name, 'Name': Name })
          try:
              vars = self.func(self.user_code, context, *args[0], **args[1])
          except Exception, exc:
@@ -1002,7 +1038,7 @@ def substBitOps(code):
  # Template objects are format strings that allow substitution from
  # the attribute spaces of other objects (e.g. InstObjParams instances).
  
-labelRE = re.compile(r'[^%]%\(([^\)]+)\)[sd]')
+labelRE = re.compile(r'(?<!%)%\(([^\)]+)\)[sd]')
  
  class Template:
      def __init__(self, t):
@@ -1020,7 +1056,7 @@ class Template:
          # Build a dict ('myDict') to use for the template substitution.
          # Start with the template namespace.  Make a copy since we're
          # going to modify it.
-        myDict = templateMap.copy()
+        myDict = parser.templateMap.copy()
  
          if isinstance(d, InstObjParams):
              # If we're dealing with an InstObjParams object, we need
@@ -1119,14 +1155,17 @@ def buildOperandTypeMap(userDict, lineno):
              ctype = 'uint%d_t' % size
              is_signed = 0
          elif desc == 'float':
-            is_signed = 1      # shouldn't really matter
+            is_signed = 1       # shouldn't really matter
              if size == 32:
                  ctype = 'float'
              elif size == 64:
                  ctype = 'double'
-        elif desc == 'twin int':
+        elif desc == 'twin64 int':
              is_signed = 0
              ctype = 'Twin64_t'
+        elif desc == 'twin32 int':
+            is_signed = 0
+            ctype = 'Twin32_t'
          if ctype == '':
              error(lineno, 'Unrecognized type description "%s" in userDict')
          operandTypeMap[ext] = (size, ctype, is_signed)
@@ -1141,6 +1180,39 @@ def buildOperandTypeMap(userDict, lineno):
  # (e.g., "32-bit integer register").
  #
  class Operand(object):
+    def buildReadCode(self, func = None):
+        code = self.read_code % {"name": self.base_name,
+                                 "func": func,
+                                 "op_idx": self.src_reg_idx,
+                                 "reg_idx": self.reg_spec,
+                                 "size": self.size,
+                                 "ctype": self.ctype}
+        if self.size != self.dflt_size:
+            return '%s = bits(%s, %d, 0);\n' % \
+                   (self.base_name, code, self.size-1)
+        else:
+            return '%s = %s;\n' % \
+                   (self.base_name, code)
+
+    def buildWriteCode(self, func = None):
+        if (self.size != self.dflt_size and self.is_signed):
+            final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
+        else:
+            final_val = self.base_name
+        code = self.write_code % {"name": self.base_name,
+                                  "func": func,
+                                  "op_idx": self.dest_reg_idx,
+                                  "reg_idx": self.reg_spec,
+                                  "size": self.size,
+                                  "ctype": self.ctype,
+                                  "final_val": final_val}
+        return '''
+        {
+            %s final_val = %s;
+            %s;
+            if (traceData) { traceData->setData(final_val); }
+        }''' % (self.dflt_ctype, final_val, code)
+
      def __init__(self, full_name, ext, is_src, is_dest):
          self.full_name = full_name
          self.ext = ext
@@ -1159,7 +1231,7 @@ class Operand(object):
          # template must be careful not to use it if it doesn't apply.
          if self.isMem():
              self.mem_acc_size = self.makeAccSize()
-            if self.ctype == 'Twin64_t':
+            if self.ctype in ['Twin32_t', 'Twin64_t']:
                  self.mem_acc_type = 'Twin'
              else:
                  self.mem_acc_type = 'uint'
@@ -1237,6 +1309,8 @@ class IntRegOperand(Operand):
      def makeRead(self):
          if (self.ctype == 'float' or self.ctype == 'double'):
              error(0, 'Attempt to read integer register as FP')
+        if self.read_code != None:
+            return self.buildReadCode('readIntRegOperand')
          if (self.size == self.dflt_size):
              return '%s = xc->readIntRegOperand(this, %d);\n' % \
                     (self.base_name, self.src_reg_idx)
@@ -1253,6 +1327,8 @@ class IntRegOperand(Operand):
      def makeWrite(self):
          if (self.ctype == 'float' or self.ctype == 'double'):
              error(0, 'Attempt to write integer register as FP')
+        if self.write_code != None:
+            return self.buildWriteCode('setIntRegOperand')
          if (self.size != self.dflt_size and self.is_signed):
              final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
          else:
@@ -1284,27 +1360,15 @@ class FloatRegOperand(Operand):
  
      def makeRead(self):
          bit_select = 0
-        width = 0;
-        if (self.ctype == 'float'):
-            func = 'readFloatRegOperand'
-            width = 32;
-        elif (self.ctype == 'double'):
+        if (self.ctype == 'float' or self.ctype == 'double'):
              func = 'readFloatRegOperand'
-            width = 64;
          else:
              func = 'readFloatRegOperandBits'
-            if (self.ctype == 'uint32_t'):
-                width = 32;
-            elif (self.ctype == 'uint64_t'):
-                width = 64;
              if (self.size != self.dflt_size):
                  bit_select = 1
-        if width:
-            base = 'xc->%s(this, %d, %d)' % \
-                   (func, self.src_reg_idx, width)
-        else:
-            base = 'xc->%s(this, %d)' % \
-                   (func, self.src_reg_idx)
+        base = 'xc->%s(this, %d)' % (func, self.src_reg_idx)
+        if self.read_code != None:
+            return self.buildReadCode(func)
          if bit_select:
              return '%s = bits(%s, %d, 0);\n' % \
                     (self.base_name, base, self.size-1)
@@ -1314,34 +1378,23 @@ class FloatRegOperand(Operand):
      def makeWrite(self):
          final_val = self.base_name
          final_ctype = self.ctype
-        widthSpecifier = ''
-        width = 0
-        if (self.ctype == 'float'):
-            width = 32
-            func = 'setFloatRegOperand'
-        elif (self.ctype == 'double'):
-            width = 64
+        if (self.ctype == 'float' or self.ctype == 'double'):
              func = 'setFloatRegOperand'
-        elif (self.ctype == 'uint32_t'):
-            func = 'setFloatRegOperandBits'
-            width = 32
-        elif (self.ctype == 'uint64_t'):
+        elif (self.ctype == 'uint32_t' or self.ctype == 'uint64_t'):
              func = 'setFloatRegOperandBits'
-            width = 64
          else:
              func = 'setFloatRegOperandBits'
              final_ctype = 'uint%d_t' % self.dflt_size
              if (self.size != self.dflt_size and self.is_signed):
                  final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
-        if width:
-            widthSpecifier = ', %d' % width
+        if self.write_code != None:
+            return self.buildWriteCode(func)
          wb = '''
          {
              %s final_val = %s;
-            xc->%s(this, %d, final_val%s);\n
+            xc->%s(this, %d, final_val);\n
              if (traceData) { traceData->setData(final_val); }
-        }''' % (final_ctype, final_val, func, self.dest_reg_idx,
-                widthSpecifier)
+        }''' % (final_ctype, final_val, func, self.dest_reg_idx)
          return wb
  
  class ControlRegOperand(Operand):
@@ -1365,7 +1418,9 @@ class ControlRegOperand(Operand):
          bit_select = 0
          if (self.ctype == 'float' or self.ctype == 'double'):
              error(0, 'Attempt to read control register as FP')
-        base = 'xc->readMiscRegOperandWithEffect(this, %s)' % self.src_reg_idx
+        if self.read_code != None:
+            return self.buildReadCode('readMiscRegOperand')
+        base = 'xc->readMiscRegOperand(this, %s)' % self.src_reg_idx
          if self.size == self.dflt_size:
              return '%s = %s;\n' % (self.base_name, base)
          else:
@@ -1375,7 +1430,9 @@ class ControlRegOperand(Operand):
      def makeWrite(self):
          if (self.ctype == 'float' or self.ctype == 'double'):
              error(0, 'Attempt to write control register as FP')
-        wb = 'xc->setMiscRegOperandWithEffect(this, %s, %s);\n' % \
+        if self.write_code != None:
+            return self.buildWriteCode('setMiscRegOperand')
+        wb = 'xc->setMiscRegOperand(this, %s, %s);\n' % \
               (self.dest_reg_idx, self.base_name)
          wb += 'if (traceData) { traceData->setData(%s); }' % \
                self.base_name
@@ -1392,16 +1449,20 @@ class MemOperand(Operand):
          # Note that initializations in the declarations are solely
          # to avoid 'uninitialized variable' errors from the compiler.
          # Declare memory data variable.
-        if self.ctype == 'Twin64_t':
+        if self.ctype in ['Twin32_t','Twin64_t']:
              return "%s %s; %s.a = 0; %s.b = 0;\n" % (self.ctype, self.base_name,
                      self.base_name, self.base_name)
          c = '%s %s = 0;\n' % (self.ctype, self.base_name)
          return c
  
      def makeRead(self):
+        if self.read_code != None:
+            return self.buildReadCode()
          return ''
  
      def makeWrite(self):
+        if self.write_code != None:
+            return self.buildWriteCode()
          return ''
  
      # Return the memory access size *in bits*, suitable for
@@ -1409,15 +1470,56 @@ class MemOperand(Operand):
      def makeAccSize(self):
          return self.size
  
+class PCOperand(Operand):
+    def makeConstructor(self):
+        return ''
+
+    def makeRead(self):
+        return '%s = xc->readPC();\n' % self.base_name
+
+    def makeWrite(self):
+        return 'xc->setPC(%s);\n' % self.base_name
+
+class UPCOperand(Operand):
+    def makeConstructor(self):
+        return ''
+
+    def makeRead(self):
+        if self.read_code != None:
+            return self.buildReadCode('readMicroPC')
+        return '%s = xc->readMicroPC();\n' % self.base_name
+
+    def makeWrite(self):
+        if self.write_code != None:
+            return self.buildWriteCode('setMicroPC')
+        return 'xc->setMicroPC(%s);\n' % self.base_name
+
+class NUPCOperand(Operand):
+    def makeConstructor(self):
+        return ''
+
+    def makeRead(self):
+        if self.read_code != None:
+            return self.buildReadCode('readNextMicroPC')
+        return '%s = xc->readNextMicroPC();\n' % self.base_name
+
+    def makeWrite(self):
+        if self.write_code != None:
+            return self.buildWriteCode('setNextMicroPC')
+        return 'xc->setNextMicroPC(%s);\n' % self.base_name
  
  class NPCOperand(Operand):
      def makeConstructor(self):
          return ''
  
      def makeRead(self):
+        if self.read_code != None:
+            return self.buildReadCode('readNextPC')
          return '%s = xc->readNextPC();\n' % self.base_name
  
      def makeWrite(self):
+        if self.write_code != None:
+            return self.buildWriteCode('setNextPC')
          return 'xc->setNextPC(%s);\n' % self.base_name
  
  class NNPCOperand(Operand):
@@ -1425,16 +1527,33 @@ class NNPCOperand(Operand):
          return ''
  
      def makeRead(self):
+        if self.read_code != None:
+            return self.buildReadCode('readNextNPC')
          return '%s = xc->readNextNPC();\n' % self.base_name
  
      def makeWrite(self):
+        if self.write_code != None:
+            return self.buildWriteCode('setNextNPC')
          return 'xc->setNextNPC(%s);\n' % self.base_name
  
  def buildOperandNameMap(userDict, lineno):
      global operandNameMap
      operandNameMap = {}
      for (op_name, val) in userDict.iteritems():
-        (base_cls_name, dflt_ext, reg_spec, flags, sort_pri) = val
+        (base_cls_name, dflt_ext, reg_spec, flags, sort_pri) = val[:5]
+        if len(val) > 5:
+            read_code = val[5]
+        else:
+            read_code = None
+        if len(val) > 6:
+            write_code = val[6]
+        else:
+            write_code = None
+        if len(val) > 7:
+            error(lineno,
+                  'error: too many attributes for operand "%s"' %
+                  base_cls_name)
+            
          (dflt_size, dflt_ctype, dflt_is_signed) = operandTypeMap[dflt_ext]
          # Canonical flag structure is a triple of lists, where each list
          # indicates the set of flags implied by this operand always, when
@@ -1459,7 +1578,8 @@ def buildOperandNameMap(userDict, lineno):
          # Accumulate attributes of new operand class in tmp_dict
          tmp_dict = {}
          for attr in ('dflt_ext', 'reg_spec', 'flags', 'sort_pri',
-                     'dflt_size', 'dflt_ctype', 'dflt_is_signed'):
+                     'dflt_size', 'dflt_ctype', 'dflt_is_signed',
+                     'read_code', 'write_code'):
              tmp_dict[attr] = eval(attr)
          tmp_dict['base_name'] = op_name
          # New class name will be e.g. "IntReg_Ra"
@@ -1481,9 +1601,9 @@ def buildOperandNameMap(userDict, lineno):
      operands = userDict.keys()
  
      operandsREString = (r'''
-    (?<![\w\.])             # neg. lookbehind assertion: prevent partial matches
+    (?<![\w\.])      # neg. lookbehind assertion: prevent partial matches
      ((%s)(?:\.(\w+))?)   # match: operand with optional '.' then suffix
-    (?![\w\.])      # neg. lookahead assertion: prevent partial matches
+    (?![\w\.])       # neg. lookahead assertion: prevent partial matches
      '''
                          % string.join(operands, '|'))
  
@@ -1499,6 +1619,8 @@ def buildOperandNameMap(userDict, lineno):
      global operandsWithExtRE
      operandsWithExtRE = re.compile(operandsWithExtREString, re.MULTILINE)
  
+maxInstSrcRegs = 0
+maxInstDestRegs = 0
  
  class OperandList:
  
@@ -1562,6 +1684,12 @@ class OperandList:
                  if self.memOperand:
                      error(0, "Code block has more than one memory operand.")
                  self.memOperand = op_desc
+        global maxInstSrcRegs
+        global maxInstDestRegs
+        if maxInstSrcRegs < self.numSrcRegs:
+            maxInstSrcRegs = self.numSrcRegs
+        if maxInstDestRegs < self.numDestRegs:
+            maxInstDestRegs = self.numDestRegs
          # now make a final pass to finalize op_desc fields that may depend
          # on the register enumeration
          for op_desc in self.items:
@@ -1781,6 +1909,22 @@ namespace %(namespace)s {
  %(decode_function)s
  '''
  
+max_inst_regs_template = '''
+/*
+ * DO NOT EDIT THIS FILE!!!
+ *
+ * It was automatically generated from the ISA description in %(filename)s
+ */
+
+namespace %(namespace)s {
+
+    const int MaxInstSrcRegs = %(MaxInstSrcRegs)d;
+    const int MaxInstDestRegs = %(MaxInstDestRegs)d;
+
+} // namespace %(namespace)s
+
+'''
+
  
  # Update the output file only if the new contents are different from
  # the current contents.  Minimizes the files that need to be rebuilt
@@ -1848,7 +1992,7 @@ def parse_isa_desc(isa_desc_file, output_dir):
      fileNameStack.push((isa_desc_file, 0))
  
      # Parse it.
-    (isa_name, namespace, global_code, namespace_code) = yacc.parse(isa_desc)
+    (isa_name, namespace, global_code, namespace_code) = parser.parse(isa_desc)
  
      # grab the last three path components of isa_desc_file to put in
      # the output
@@ -1879,6 +2023,16 @@ def parse_isa_desc(isa_desc_file, output_dir):
          update_if_needed(output_dir + '/' + cpu.filename,
                            file_template % vars())
  
+    # The variable names here are hacky, but this will creat local variables
+    # which will be referenced in vars() which have the value of the globals.
+    global maxInstSrcRegs
+    MaxInstSrcRegs = maxInstSrcRegs
+    global maxInstDestRegs
+    MaxInstDestRegs = maxInstDestRegs
+    # max_inst_regs.hh
+    update_if_needed(output_dir + '/max_inst_regs.hh', \
+            max_inst_regs_template % vars())
+
  # global list of CpuModel objects (see cpu_models.py)
  cpu_models = []