arch/isa_parser.py

   1 #! /usr/bin/env python
   2
   3 # $Id$
   4
   5 # Copyright (c) 2003 The Regents of The University of Michigan
   6 # All rights reserved.
   7 #
   8 # Redistribution and use in source and binary forms, with or without
   9 # modification, are permitted provided that the following conditions are
  10 # met: redistributions of source code must retain the above copyright
  11 # notice, this list of conditions and the following disclaimer;
  12 # redistributions in binary form must reproduce the above copyright
  13 # notice, this list of conditions and the following disclaimer in the
  14 # documentation and/or other materials provided with the distribution;
  15 # neither the name of the copyright holders nor the names of its
  16 # contributors may be used to endorse or promote products derived from
  17 # this software without specific prior written permission.
  18 #
  19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30
  31 import os
  32 import sys
  33 import re
  34 import string
  35 import traceback
  36 # get type names
  37 from types import *
  38
  39 # Prepend the directory where the PLY lex & yacc modules are found
  40 # to the search path.  Assumes we're compiling in a subdirectory
  41 # of 'build' in the current tree.
  42 sys.path[0:0] = [os.environ['M5_EXT'] + '/ply']
  43
  44 import lex
  45 import yacc
  46
  47 #####################################################################
  48 #
  49 #                                Lexer
  50 #
  51 # The PLY lexer module takes two things as input:
  52 # - A list of token names (the string list 'tokens')
  53 # - A regular expression describing a match for each token.  The
  54 #   regexp for token FOO can be provided in two ways:
  55 #   - as a string variable named t_FOO
  56 #   - as the doc string for a function named t_FOO.  In this case,
  57 #     the function is also executed, allowing an action to be
  58 #     associated with each token match.
  59 #
  60 #####################################################################
  61
  62 # Reserved words.  These are listed separately as they are matched
  63 # using the same regexp as generic IDs, but distinguished in the
  64 # t_ID() function.  The PLY documentation suggests this approach.
  65 reserved = (
  66     'BITFIELD', 'DECLARE', 'DECODE', 'DEFAULT', 'DEF', 'FORMAT',
  67     'LET', 'NAMESPACE', 'SIGNED', 'TEMPLATE'
  68     )
  69
  70 # List of tokens.  The lex module requires this.
  71 tokens = reserved + (
  72     # identifier
  73     'ID',
  74
  75     # integer literal
  76     'INTLIT',
  77
  78     # string literal
  79     'STRLIT',
  80
  81     # code literal
  82     'CODELIT',
  83
  84     # ( ) [ ] { } < > , ; : :: *
  85     'LPAREN', 'RPAREN',
  86 # not used any more... commented out to suppress PLY warning
  87 #    'LBRACKET', 'RBRACKET',
  88     'LBRACE', 'RBRACE',
  89     'LESS', 'GREATER',
  90     'COMMA', 'SEMI', 'COLON', 'DBLCOLON',
  91     'ASTERISK',
  92
  93     # C preprocessor directives
  94     'CPPDIRECTIVE'
  95 )
  96
  97 # Regular expressions for token matching
  98 t_LPAREN           = r'\('
  99 t_RPAREN           = r'\)'
 100 # not used any more... commented out to suppress PLY warning
 101 # t_LBRACKET         = r'\['
 102 # t_RBRACKET         = r'\]'
 103 t_LBRACE           = r'\{'
 104 t_RBRACE           = r'\}'
 105 t_LESS             = r'\<'
 106 t_GREATER          = r'\>'
 107 t_COMMA            = r','
 108 t_SEMI             = r';'
 109 t_COLON            = r':'
 110 t_DBLCOLON         = r'::'
 111 t_ASTERISK         = r'\*'
 112
 113 # Identifiers and reserved words
 114 reserved_map = { }
 115 for r in reserved:
 116     reserved_map[r.lower()] = r
 117
 118 def t_ID(t):
 119     r'[A-Za-z_]\w*'
 120     t.type = reserved_map.get(t.value,'ID')
 121     return t
 122
 123 # Integer literal
 124 def t_INTLIT(t):
 125     r'(0x[\da-fA-F]+)|\d+'
 126     try:
 127         t.value = int(t.value,0)
 128     except ValueError:
 129         error(t.lineno, 'Integer value "%s" too large' % t.value)
 130         t.value = 0
 131     return t
 132
 133 # String literal.  Note that these use only single quotes, and
 134 # can span multiple lines.
 135 def t_STRLIT(t):
 136     r"(?m)'([^'])+'"
 137     # strip off quotes
 138     t.value = t.value[1:-1]
 139     t.lineno += t.value.count('\n')
 140     return t
 141
 142
 143 # "Code literal"... like a string literal, but delimiters are
 144 # '{{' and '}}' so they get formatted nicely under emacs c-mode
 145 def t_CODELIT(t):
 146     r"(?m)\{\{([^\}]|}(?!\}))+\}\}"
 147     # strip off {{ & }}
 148     t.value = t.value[2:-2]
 149     t.lineno += t.value.count('\n')
 150     return t
 151
 152 def t_CPPDIRECTIVE(t):
 153     r'^\#.*\n'
 154     t.lineno += t.value.count('\n')
 155     return t
 156
 157 #
 158 # The functions t_NEWLINE, t_ignore, and t_error are
 159 # special for the lex module.
 160 #
 161
 162 # Newlines
 163 def t_NEWLINE(t):
 164     r'\n+'
 165     t.lineno += t.value.count('\n')
 166
 167 # Comments
 168 def t_comment(t):
 169     r'//.*'
 170
 171 # Completely ignored characters
 172 t_ignore           = ' \t\x0c'
 173
 174 # Error handler
 175 def t_error(t):
 176     error(t.lineno, "illegal character '%s'" % t.value[0])
 177     t.skip(1)
 178
 179 # Build the lexer
 180 lex.lex()
 181
 182 #####################################################################
 183 #
 184 #                                Parser
 185 #
 186 # Every function whose name starts with 'p_' defines a grammar rule.
 187 # The rule is encoded in the function's doc string, while the
 188 # function body provides the action taken when the rule is matched.
 189 # The argument to each function is a list of the values of the
 190 # rule's symbols: t[0] for the LHS, and t[1..n] for the symbols
 191 # on the RHS.  For tokens, the value is copied from the t.value
 192 # attribute provided by the lexer.  For non-terminals, the value
 193 # is assigned by the producing rule; i.e., the job of the grammar
 194 # rule function is to set the value for the non-terminal on the LHS
 195 # (by assigning to t[0]).
 196 #####################################################################
 197
 198 # Not sure why, but we get a handful of shift/reduce conflicts on DECLARE.
 199 # By default these get resolved as shifts, which is correct, but
 200 # warnings are printed.  Explicitly marking DECLARE as right-associative
 201 # suppresses the warnings.
 202 precedence = (
 203     ('right', 'DECLARE'),
 204     )
 205
 206 # The LHS of the first grammar rule is used as the start symbol
 207 # (in this case, 'specification').  Note that this rule enforces
 208 # that there will be exactly one namespace declaration, with 0 or more
 209 # global defs/decls before and after it.  The defs & decls before
 210 # the namespace decl will be outside the namespace; those after
 211 # will be inside.  The decoder function is always inside the namespace.
 212 def p_specification(t):
 213     'specification : opt_defs_and_declares name_decl opt_defs_and_declares decode_block'
 214     global_decls1 = t[1]
 215     isa_name = t[2]
 216     namespace = isa_name + "Inst"
 217     global_decls2 = t[3]
 218     (inst_decls, decode_code, exec_code) = t[4]
 219     decode_code = indent(decode_code)
 220     # grab the last three path components of isa_desc_filename
 221     filename = '/'.join(isa_desc_filename.split('/')[-3:])
 222     # if the isa_desc file defines a 'rcs_id' string,
 223     # echo that into the output too
 224     try:
 225         local_rcs_id = rcs_id
 226         # strip $s out of ID so it doesn't get re-substituted
 227         local_rcs_id = re.sub(r'\$', '', local_rcs_id)
 228     except NameError:
 229         local_rcs_id = 'Id: no RCS id found'
 230     output = open(decoder_filename, 'w')
 231     # split string to keep rcs from substituting this file's RCS id in
 232     print >> output, '/* $Id' + '''$ */
 233
 234 /*
 235  * Copyright (c) 2003
 236  * The Regents of The University of Michigan
 237  * All Rights Reserved
 238  *
 239  * This code is part of the M5 simulator, developed by Nathan Binkert,
 240  * Erik Hallnor, Steve Raasch, and Steve Reinhardt, with contributions
 241  * from Ron Dreslinski, Dave Greene, and Lisa Hsu.
 242  *
 243  * Permission is granted to use, copy, create derivative works and
 244  * redistribute this software and such derivative works for any
 245  * purpose, so long as the copyright notice above, this grant of
 246  * permission, and the disclaimer below appear in all copies made; and
 247  * so long as the name of The University of Michigan is not used in
 248  * any advertising or publicity pertaining to the use or distribution
 249  * of this software without specific, written prior authorization.
 250  *
 251  * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION FROM THE
 252  * UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY PURPOSE, AND
 253  * WITHOUT WARRANTY BY THE UNIVERSITY OF MICHIGAN OF ANY KIND, EITHER
 254  * EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED
 255  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 256  * PURPOSE. THE REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE
 257  * LIABLE FOR ANY DAMAGES, INCLUDING DIRECT, SPECIAL, INDIRECT,
 258  * INCIDENTAL, OR CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM
 259  * ARISING OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN
 260  * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF SUCH
 261  * DAMAGES.
 262  */
 263
 264 /*
 265  * DO NOT EDIT THIS FILE!!!
 266  *
 267  * It was automatically generated from this ISA description:
 268  *  Filename: %(filename)s
 269  *    RCS %(local_rcs_id)s
 270  */
 271
 272 #include "base/bitfield.hh"     // required for bitfield support
 273
 274
 275 /////////////////////////////////////
 276 // Global defs (outside namespace) //
 277 /////////////////////////////////////
 278
 279 %(global_decls1)s
 280
 281 /**
 282  * Namespace for %(isa_name)s static instruction objects.
 283  */
 284 namespace %(namespace)s
 285 {
 286
 287 /////////////////////////////////////
 288 // Global defs  (within namespace) //
 289 /////////////////////////////////////
 290
 291 %(global_decls2)s
 292
 293 ////////////////////////////////////
 294 // Declares from inst definitions //
 295 ////////////////////////////////////
 296
 297 %(inst_decls)s
 298
 299 %(exec_code)s
 300
 301 } // namespace %(namespace)s
 302
 303 //////////////////////
 304 // Decoder function //
 305 //////////////////////
 306
 307 StaticInstPtr<%(isa_name)s>
 308 %(isa_name)s::decodeInst(%(isa_name)s::MachInst machInst)
 309 {
 310     using namespace %(namespace)s;
 311 %(decode_code)s
 312 } // decodeInst
 313 ''' % vars()
 314     output.close()
 315
 316 # ISA name declaration looks like "namespace <foo>;"
 317 def p_name_decl(t):
 318     'name_decl : NAMESPACE ID SEMI'
 319     t[0] = t[2]
 320
 321 # 'opt_defs_and_declares' is a possibly empty sequence of
 322 # defs and/or declares.
 323 def p_opt_defs_and_declares_0(t):
 324     'opt_defs_and_declares : empty'
 325     t[0] = ''
 326
 327 def p_opt_defs_and_declares_1(t):
 328     'opt_defs_and_declares : defs_and_declares'
 329     t[0] = t[1]
 330
 331 def p_defs_and_declares_0(t):
 332     'defs_and_declares : def_or_declare'
 333     t[0] = t[1]
 334
 335 def p_defs_and_declares_1(t):
 336     'defs_and_declares : defs_and_declares def_or_declare'
 337     t[0] = t[1] + t[2]
 338
 339 # The list of possible definition/declaration statements.
 340 def p_def_or_declare(t):
 341     '''def_or_declare : def_format
 342                       | def_bitfield
 343                       | def_template
 344                       | global_declare
 345                       | global_let
 346                       | cpp_directive'''
 347     t[0] = t[1]
 348
 349 # preprocessor directives are copied directly to the output.
 350 def p_cpp_directive(t):
 351     '''cpp_directive : CPPDIRECTIVE'''
 352     t[0] = t[1]
 353
 354 # Global declares 'declare {{...}}' (C++ code blocks) are copied
 355 # directly to the output.
 356 def p_global_declare(t):
 357     'global_declare : DECLARE CODELIT SEMI'
 358     t[0] = substBitOps(t[2])
 359
 360 # global let blocks 'let {{...}}' (Python code blocks) are executed
 361 # directly when seen.  These are typically used to initialize global
 362 # Python variables used in later format definitions.
 363 def p_global_let(t):
 364     'global_let : LET CODELIT SEMI'
 365     try:
 366         exec(fixPythonIndentation(t[2]))
 367     except:
 368         error_bt(t.lineno(1), 'error in global let block "%s".' % t[2])
 369     t[0] = '' # contributes nothing to the output C++ file
 370
 371 # A bitfield definition looks like:
 372 # 'def [signed] bitfield <ID> [<first>:<last>]'
 373 # This generates a preprocessor macro in the output file.
 374 def p_def_bitfield_0(t):
 375     'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT COLON INTLIT GREATER SEMI'
 376     expr = 'bits(machInst, %2d, %2d)' % (t[6], t[8])
 377     if (t[2] == 'signed'):
 378         expr = 'sext<%d>(%s)' % (t[6] - t[8] + 1, expr)
 379     t[0] = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
 380
 381 # alternate form for single bit: 'def [signed] bitfield <ID> [<bit>]'
 382 def p_def_bitfield_1(t):
 383     'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT GREATER SEMI'
 384     expr = 'bits(machInst, %2d, %2d)' % (t[6], t[6])
 385     if (t[2] == 'signed'):
 386         expr = 'sext<%d>(%s)' % (1, expr)
 387     t[0] = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
 388
 389 def p_opt_signed_0(t):
 390     'opt_signed : SIGNED'
 391     t[0] = t[1]
 392
 393 def p_opt_signed_1(t):
 394     'opt_signed : empty'
 395     t[0] = ''
 396
 397 # Global map variable to hold templates
 398 templateMap = {}
 399
 400 def p_def_template(t):
 401     'def_template : DEF TEMPLATE ID CODELIT SEMI'
 402     templateMap[t[3]] = t[4]
 403     t[0] = ''
 404
 405 # An instruction format definition looks like
 406 # "def format <fmt>(<params>) {{...}};"
 407 def p_def_format(t):
 408     'def_format : DEF FORMAT ID LPAREN param_list RPAREN CODELIT SEMI'
 409     (id, params, code) = (t[3], t[5], t[7])
 410     defFormat(id, params, code, t.lineno(1))
 411     # insert a comment into the output to note that the def was processed
 412     t[0] = '''
 413 //
 414 // parser: format %s defined
 415 //
 416 ''' % id
 417
 418 # The formal parameter list for an instruction format is a possibly
 419 # empty list of comma-separated parameters.
 420 def p_param_list_0(t):
 421     'param_list : empty'
 422     t[0] = [ ]
 423
 424 def p_param_list_1(t):
 425     'param_list : param'
 426     t[0] = [t[1]]
 427
 428 def p_param_list_2(t):
 429     'param_list : param_list COMMA param'
 430     t[0] = t[1]
 431     t[0].append(t[3])
 432
 433 # Each formal parameter is either an identifier or an identifier
 434 # preceded by an asterisk.  As in Python, the latter (if present) gets
 435 # a tuple containing all the excess positional arguments, allowing
 436 # varargs functions.
 437 def p_param_0(t):
 438     'param : ID'
 439     t[0] = t[1]
 440
 441 def p_param_1(t):
 442     'param : ASTERISK ID'
 443     # just concatenate them: '*ID'
 444     t[0] = t[1] + t[2]
 445
 446 # End of format definition-related rules.
 447 ##############
 448
 449 #
 450 # A decode block looks like:
 451 #       decode <field1> [, <field2>]* [default <inst>] { ... }
 452 #
 453 def p_decode_block(t):
 454     'decode_block : DECODE ID opt_default LBRACE decode_stmt_list RBRACE'
 455     default_defaults = defaultStack.pop()
 456     (decls, decode_code, exec_code, has_default) = t[5]
 457     # use the "default defaults" only if there was no explicit
 458     # default statement in decode_stmt_list
 459     if not has_default:
 460         (default_decls, default_decode, default_exec) = default_defaults
 461         decls += default_decls
 462         decode_code += default_decode
 463         exec_code += default_exec
 464     t[0] = (decls,  '''
 465 switch (%s) {
 466 %s
 467 }
 468 ''' % (t[2], indent(decode_code)), exec_code)
 469
 470 # The opt_default statement serves only to push the "default defaults"
 471 # onto defaultStack.  This value will be used by nested decode blocks,
 472 # and used and popped off when the current decode_block is processed
 473 # (in p_decode_block() above).
 474 def p_opt_default_0(t):
 475     'opt_default : empty'
 476     # no default specified: reuse the one currently at the top of the stack
 477     defaultStack.push(defaultStack.top())
 478     # no meaningful value returned
 479     t[0] = None
 480
 481 def p_opt_default_1(t):
 482     'opt_default : DEFAULT inst'
 483     # push the new default
 484     (decls, decode_code, exec_code) = t[2]
 485     defaultStack.push((decls, '\ndefault:\n%sbreak;' % decode_code, exec_code))
 486     # no meaningful value returned
 487     t[0] = None
 488
 489 def p_decode_stmt_list_0(t):
 490     'decode_stmt_list : decode_stmt'
 491     t[0] = t[1]
 492
 493 def p_decode_stmt_list_1(t):
 494     'decode_stmt_list : decode_stmt decode_stmt_list'
 495     (decls1, decode_code1, exec_code1, has_default1) = t[1]
 496     (decls2, decode_code2, exec_code2, has_default2) = t[2]
 497     if (has_default1 and has_default2):
 498         error(t.lineno(1), 'Two default cases in decode block')
 499     t[0] = (decls1 + '\n' + decls2, decode_code1 + '\n' + decode_code2,
 500             exec_code1 + '\n' + exec_code2, has_default1 or has_default2)
 501
 502 #
 503 # Decode statement rules
 504 #
 505 # There are four types of statements allowed in a decode block:
 506 # 1. Format blocks 'format <foo> { ... }'
 507 # 2. Nested decode blocks
 508 # 3. Instruction definitions.
 509 # 4. C preprocessor directives.
 510
 511
 512 # Preprocessor directives found in a decode statement list are passed
 513 # through to the output, replicated to both the declaration and decode
 514 # streams.  This works well for ifdefs, so we can ifdef out both the
 515 # declarations and the decode cases generated by an instruction
 516 # definition.  Handling them as part of the grammar makes it easy to
 517 # keep them in the right place with respect to the code generated by
 518 # the other statements.
 519 def p_decode_stmt_cpp(t):
 520     'decode_stmt : CPPDIRECTIVE'
 521     t[0] = (t[1], t[1], t[1], 0)
 522
 523 # A format block 'format <foo> { ... }' sets the default instruction
 524 # format used to handle instruction definitions inside the block.
 525 # This format can be overridden by using an explicit format on the
 526 # instruction definition or with a nested format block.
 527 def p_decode_stmt_format(t):
 528     'decode_stmt : FORMAT push_format_id LBRACE decode_stmt_list RBRACE'
 529     # The format will be pushed on the stack when 'push_format_id' is
 530     # processed (see below).  Once the parser has recognized the full
 531     # production (though the right brace), we're done with the format,
 532     # so now we can pop it.
 533     formatStack.pop()
 534     t[0] = t[4]
 535
 536 # This rule exists so we can set the current format (& push the stack)
 537 # when we recognize the format name part of the format block.
 538 def p_push_format_id(t):
 539     'push_format_id : ID'
 540     try:
 541         formatStack.push(formatMap[t[1]])
 542         t[0] = ('', '// format %s' % t[1])
 543     except KeyError:
 544         error(t.lineno(1), 'instruction format "%s" not defined.' % t[1])
 545
 546 # Nested decode block: if the value of the current field matches the
 547 # specified constant, do a nested decode on some other field.
 548 def p_decode_stmt_decode(t):
 549     'decode_stmt : case_label COLON decode_block'
 550     (label, is_default) = t[1]
 551     (decls, decode_code, exec_code) = t[3]
 552     # just wrap the decoding code from the block as a case in the
 553     # outer switch statement.
 554     t[0] = (decls, '\n%s:\n%s' % (label, indent(decode_code)),
 555             exec_code, is_default)
 556
 557 # Instruction definition (finally!).
 558 def p_decode_stmt_inst(t):
 559     'decode_stmt : case_label COLON inst SEMI'
 560     (label, is_default) = t[1]
 561     (decls, decode_code, exec_code) = t[3]
 562     t[0] = (decls, '\n%s:%sbreak;' % (label, indent(decode_code)),
 563             exec_code, is_default)
 564
 565 # The case label is either a list of one or more constants or 'default'
 566 def p_case_label_0(t):
 567     'case_label : intlit_list'
 568     t[0] = (': '.join(map(lambda a: 'case %#x' % a, t[1])), 0)
 569
 570 def p_case_label_1(t):
 571     'case_label : DEFAULT'
 572     t[0] = ('default', 1)
 573
 574 #
 575 # The constant list for a decode case label must be non-empty, but may have
 576 # one or more comma-separated integer literals in it.
 577 #
 578 def p_intlit_list_0(t):
 579     'intlit_list : INTLIT'
 580     t[0] = [t[1]]
 581
 582 def p_intlit_list_1(t):
 583     'intlit_list : intlit_list COMMA INTLIT'
 584     t[0] = t[1]
 585     t[0].append(t[3])
 586
 587 # Define an instruction using the current instruction format (specified
 588 # by an enclosing format block).
 589 # "<mnemonic>(<args>)"
 590 def p_inst_0(t):
 591     'inst : ID LPAREN arg_list RPAREN'
 592     # Pass the ID and arg list to the current format class to deal with.
 593     currentFormat = formatStack.top()
 594     (decls, decode_code, exec_code) = \
 595             currentFormat.defineInst(t[1], t[3], t.lineno(1))
 596     args = ','.join(map(str, t[3]))
 597     args = re.sub('(?m)^', '//', args)
 598     args = re.sub('^//', '', args)
 599     comment = '// %s::%s(%s)\n' % (currentFormat.id, t[1], args)
 600     t[0] = (comment + decls, comment + decode_code, comment + exec_code)
 601
 602 # Define an instruction using an explicitly specified format:
 603 # "<fmt>::<mnemonic>(<args>)"
 604 def p_inst_1(t):
 605     'inst : ID DBLCOLON ID LPAREN arg_list RPAREN'
 606     try:
 607         format = formatMap[t[1]]
 608     except KeyError:
 609         error(t.lineno(1), 'instruction format "%s" not defined.' % t[1])
 610     (decls, decode_code, exec_code) = \
 611             format.defineInst(t[3], t[5], t.lineno(1))
 612     comment = '// %s::%s(%s)\n' % (t[1], t[3], t[5])
 613     t[0] = (comment + decls, comment + decode_code, comment + exec_code)
 614
 615 def p_arg_list_0(t):
 616     'arg_list : empty'
 617     t[0] = [ ]
 618
 619 def p_arg_list_1(t):
 620     'arg_list : arg'
 621     t[0] = [t[1]]
 622
 623 def p_arg_list_2(t):
 624     'arg_list : arg_list COMMA arg'
 625     t[0] = t[1]
 626     t[0].append(t[3])
 627
 628 def p_arg(t):
 629     '''arg : ID
 630            | INTLIT
 631            | STRLIT
 632            | CODELIT'''
 633     t[0] = t[1]
 634
 635 #
 636 # Empty production... use in other rules for readability.
 637 #
 638 def p_empty(t):
 639     'empty :'
 640     pass
 641
 642 # Parse error handler.  Note that the argument here is the offending
 643 # *token*, not a grammar symbol (hence the need to use t.value)
 644 def p_error(t):
 645     if t:
 646         error(t.lineno, "syntax error at '%s'" % t.value)
 647     else:
 648         error_bt(0, "unknown syntax error")
 649
 650 # END OF GRAMMAR RULES
 651 #
 652 # Now build the parser.
 653 yacc.yacc()
 654
 655 ################
 656 # Format object.
 657 #
 658 # A format object encapsulates an instruction format.  It must provide
 659 # a defineInst() method that generates the code for an instruction
 660 # definition.
 661
 662 class Format:
 663     def __init__(self, id, params, code):
 664         # constructor: just save away arguments
 665         self.id = id
 666         self.params = params
 667         # strip blank lines from code (ones at the end are troublesome)
 668         code = re.sub(r'(?m)^\s*$', '', code);
 669         if code == '':
 670             code = '    pass\n'
 671         param_list = string.join(params, ", ")
 672         f = 'def defInst(name, Name, ' + param_list + '):\n' + code
 673         c = compile(f, 'def format ' + id, 'exec')
 674         exec(c)
 675         self.func = defInst
 676
 677     def defineInst(self, name, args, lineno):
 678         # automatically provide a capitalized version of mnemonic
 679         Name = string.capitalize(name)
 680         try:
 681             retval = self.func(name, Name, *args)
 682         except:
 683             error_bt(lineno, 'error defining "%s".' % name)
 684         return retval
 685
 686 # Special null format to catch an implicit-format instruction
 687 # definition outside of any format block.
 688 class NoFormat:
 689     def __init__(self):
 690         self.defaultInst = ''
 691
 692     def defineInst(self, name, args, lineno):
 693         error(lineno,
 694               'instruction definition "%s" with no active format!' % name)
 695
 696 # This dictionary maps format name strings to Format objects.
 697 formatMap = {}
 698
 699 # Define a new format
 700 def defFormat(id, params, code, lineno):
 701     # make sure we haven't already defined this one
 702     if formatMap.get(id, None) != None:
 703         error(lineno, 'format %s redefined.' % id)
 704     # create new object and store in global map
 705     formatMap[id] = Format(id, params, code)
 706
 707
 708 ##############
 709 # Stack: a simple stack object.  Used for both formats (formatStack)
 710 # and default cases (defaultStack).
 711
 712 class Stack:
 713     def __init__(self, initItem):
 714         self.stack = [ initItem ]
 715
 716     def push(self, item):
 717         self.stack.append(item);
 718
 719     def pop(self):
 720         return self.stack.pop()
 721
 722     def top(self):
 723         return self.stack[-1]
 724
 725 # The global format stack.
 726 formatStack = Stack(NoFormat())
 727
 728 # The global default case stack.
 729 defaultStack = Stack( None )
 730
 731 ###################
 732 # Utility functions
 733
 734 #
 735 # Indent every line in string 's' by two spaces
 736 # (except preprocessor directives).
 737 # Used to make nested code blocks look pretty.
 738 #
 739 def indent(s):
 740     return re.sub(r'(?m)^(?!\#)', '  ', s)
 741
 742 #
 743 # Munge a somewhat arbitrarily formatted piece of Python code
 744 # (e.g. from a format 'let' block) into something whose indentation
 745 # will get by the Python parser.
 746 #
 747 # The two keys here are that Python will give a syntax error if
 748 # there's any whitespace at the beginning of the first line, and that
 749 # all lines at the same lexical nesting level must have identical
 750 # indentation.  Unfortunately the way code literals work, an entire
 751 # let block tends to have some initial indentation.  Rather than
 752 # trying to figure out what that is and strip it off, we prepend 'if
 753 # 1:' to make the let code the nested block inside the if (and have
 754 # the parser automatically deal with the indentation for us).
 755 #
 756 # We don't want to do this if (1) the code block is empty or (2) the
 757 # first line of the block doesn't have any whitespace at the front.
 758
 759 def fixPythonIndentation(s):
 760     # get rid of blank lines first
 761     s = re.sub(r'(?m)^\s*\n', '', s);
 762     if (s != '' and re.match(r'[ \t]', s[0])):
 763         s = 'if 1:\n' + s
 764     return s
 765
 766 # Error handler.  Just call exit.  Output formatted to work under
 767 # Emacs compile-mode.
 768 def error(lineno, string):
 769     sys.exit("%s:%d: %s" % (isa_desc_filename, lineno, string))
 770
 771 # Like error(), but include a Python stack backtrace (for processing
 772 # Python exceptions).
 773 def error_bt(lineno, string):
 774     traceback.print_exc()
 775     print >> sys.stderr, "%s:%d: %s" % (isa_desc_filename, lineno, string)
 776     sys.exit(1)
 777
 778
 779 #####################################################################
 780 #
 781 #                      Bitfield Operator Support
 782 #
 783 #####################################################################
 784
 785 bitOp1ArgRE = re.compile(r'<\s*(\w+)\s*:\s*>')
 786
 787 bitOpWordRE = re.compile(r'(?<![\w\.])([\w\.]+)<\s*(\w+)\s*:\s*(\w+)\s*>')
 788 bitOpExprRE = re.compile(r'\)<\s*(\w+)\s*:\s*(\w+)\s*>')
 789
 790 def substBitOps(code):
 791     # first convert single-bit selectors to two-index form
 792     # i.e., <n> --> <n:n>
 793     code = bitOp1ArgRE.sub(r'<\1:\1>', code)
 794     # simple case: selector applied to ID (name)
 795     # i.e., foo<a:b> --> bits(foo, a, b)
 796     code = bitOpWordRE.sub(r'bits(\1, \2, \3)', code)
 797     # if selector is applied to expression (ending in ')'),
 798     # we need to search backward for matching '('
 799     match = bitOpExprRE.search(code)
 800     while match:
 801         exprEnd = match.start()
 802         here = exprEnd - 1
 803         nestLevel = 1
 804         while nestLevel > 0:
 805             if code[here] == '(':
 806                 nestLevel -= 1
 807             elif code[here] == ')':
 808                 nestLevel += 1
 809             here -= 1
 810             if here < 0:
 811                 sys.exit("Didn't find '('!")
 812         exprStart = here+1
 813         newExpr = r'bits(%s, %s, %s)' % (code[exprStart:exprEnd+1],
 814                                          match.group(1), match.group(2))
 815         code = code[:exprStart] + newExpr + code[match.end():]
 816         match = bitOpExprRE.search(code)
 817     return code
 818
 819
 820 #####################################################################
 821 #
 822 #                             Code Parser
 823 #
 824 # The remaining code is the support for automatically extracting
 825 # instruction characteristics from pseudocode.
 826 #
 827 #####################################################################
 828
 829 # Force the argument to be a list
 830 def makeList(list_or_item):
 831     if not list_or_item:
 832         return []
 833     elif type(list_or_item) == ListType:
 834         return list_or_item
 835     else:
 836         return [ list_or_item ]
 837
 838 # generate operandSizeMap based on provided operandTypeMap:
 839 # basically generate equiv. C++ type and make is_signed flag
 840 def buildOperandSizeMap():
 841     global operandSizeMap
 842     operandSizeMap = {}
 843     for ext in operandTypeMap.keys():
 844         (desc, size) = operandTypeMap[ext]
 845         if desc == 'signed int':
 846             type = 'int%d_t' % size
 847             is_signed = 1
 848         elif desc == 'unsigned int':
 849             type = 'uint%d_t' % size
 850             is_signed = 0
 851         elif desc == 'float':
 852             is_signed = 1       # shouldn't really matter
 853             if size == 32:
 854                 type = 'float'
 855             elif size == 64:
 856                 type = 'double'
 857         if type == '':
 858             error(0, 'Unrecognized type description "%s" in operandTypeMap')
 859         operandSizeMap[ext] = (size, type, is_signed)
 860
 861 #
 862 # Base class for operand traits.  An instance of this class (or actually
 863 # a class derived from this one) encapsulates the traits of a particular
 864 # operand type (e.g., "32-bit integer register").
 865 #
 866 class OperandTraits:
 867     def __init__(self, dflt_ext, reg_spec, flags, sort_pri):
 868         # Force construction of operandSizeMap from operandTypeMap
 869         # if it hasn't happened yet
 870         if not globals().has_key('operandSizeMap'):
 871             buildOperandSizeMap()
 872         self.dflt_ext = dflt_ext
 873         (self.dflt_size, self.dflt_type, self.dflt_is_signed) = \
 874                          operandSizeMap[dflt_ext]
 875         self.reg_spec = reg_spec
 876         # Canonical flag structure is a triple of lists, where each list
 877         # indicates the set of flags implied by this operand always, when
 878         # used as a source, and when used as a dest, respectively.
 879         # For simplicity this can be initialized using a variety of fairly
 880         # obvious shortcuts; we convert these to canonical form here.
 881         if not flags:
 882             # no flags specified (e.g., 'None')
 883             self.flags = ( [], [], [] )
 884         elif type(flags) == StringType:
 885             # a single flag: assumed to be unconditional
 886             self.flags = ( [ flags ], [], [] )
 887         elif type(flags) == ListType:
 888             # a list of flags: also assumed to be unconditional
 889             self.flags = ( flags, [], [] )
 890         elif type(flags) == TupleType:
 891             # it's a tuple: it should be a triple,
 892             # but each item could be a single string or a list
 893             (uncond_flags, src_flags, dest_flags) = flags
 894             self.flags = (makeList(uncond_flags),
 895                           makeList(src_flags), makeList(dest_flags))
 896         self.sort_pri = sort_pri
 897
 898     def isMem(self):
 899         return 0
 900
 901     def isReg(self):
 902         return 0
 903
 904     def isFloatReg(self):
 905         return 0
 906
 907     def isIntReg(self):
 908         return 0
 909
 910     def isControlReg(self):
 911         return 0
 912
 913     def getFlags(self, op_desc):
 914         # note the empty slice '[:]' gives us a copy of self.flags[0]
 915         # instead of a reference to it
 916         my_flags = self.flags[0][:]
 917         if op_desc.is_src:
 918             my_flags += self.flags[1]
 919         if op_desc.is_dest:
 920             my_flags += self.flags[2]
 921         return my_flags
 922
 923     def makeDecl(self, op_desc):
 924         (size, type, is_signed) = operandSizeMap[op_desc.eff_ext]
 925         # Note that initializations in the declarations are solely
 926         # to avoid 'uninitialized variable' errors from the compiler.
 927         return type + ' ' + op_desc.munged_name + ' = 0;\n';
 928
 929 class IntRegOperandTraits(OperandTraits):
 930     def isReg(self):
 931         return 1
 932
 933     def isIntReg(self):
 934         return 1
 935
 936     def makeConstructor(self, op_desc):
 937         c = ''
 938         if op_desc.is_src:
 939             c += '\n\t_srcRegIdx[%d] = %s;' % \
 940                  (op_desc.src_reg_idx, self.reg_spec)
 941         if op_desc.is_dest:
 942             c += '\n\t_destRegIdx[%d] = %s;' % \
 943                  (op_desc.dest_reg_idx, self.reg_spec)
 944         return c
 945
 946     def makeRead(self, op_desc):
 947         (size, type, is_signed) = operandSizeMap[op_desc.eff_ext]
 948         if (type == 'float' or type == 'double'):
 949             error(0, 'Attempt to read integer register as FP')
 950         if (size == self.dflt_size):
 951             return '%s = xc->readIntReg(_srcRegIdx[%d]);\n' % \
 952                    (op_desc.munged_name, op_desc.src_reg_idx)
 953         else:
 954             return '%s = bits(xc->readIntReg(_srcRegIdx[%d]), %d, 0);\n' % \
 955                    (op_desc.munged_name, op_desc.src_reg_idx, size-1)
 956
 957     def makeWrite(self, op_desc):
 958         (size, type, is_signed) = operandSizeMap[op_desc.eff_ext]
 959         if (type == 'float' or type == 'double'):
 960             error(0, 'Attempt to write integer register as FP')
 961         if (size != self.dflt_size and is_signed):
 962             final_val = 'sext<%d>(%s)' % (size, op_desc.munged_name)
 963         else:
 964             final_val = op_desc.munged_name
 965         wb = '''
 966         {
 967             %s final_val = %s;
 968             xc->setIntReg(_destRegIdx[%d], final_val);\n
 969             if (traceData) { traceData->setData(final_val); }
 970         }''' % (self.dflt_type, final_val, op_desc.dest_reg_idx)
 971         return wb
 972
 973 class FloatRegOperandTraits(OperandTraits):
 974     def isReg(self):
 975         return 1
 976
 977     def isFloatReg(self):
 978         return 1
 979
 980     def makeConstructor(self, op_desc):
 981         c = ''
 982         if op_desc.is_src:
 983             c += '\n\t_srcRegIdx[%d] = %s + FP_Base_DepTag;' % \
 984                  (op_desc.src_reg_idx, self.reg_spec)
 985         if op_desc.is_dest:
 986             c += '\n\t_destRegIdx[%d] = %s + FP_Base_DepTag;' % \
 987                  (op_desc.dest_reg_idx, self.reg_spec)
 988         return c
 989
 990     def makeRead(self, op_desc):
 991         (size, type, is_signed) = operandSizeMap[op_desc.eff_ext]
 992         bit_select = 0
 993         if (type == 'float'):
 994             func = 'readFloatRegSingle'
 995         elif (type == 'double'):
 996             func = 'readFloatRegDouble'
 997         else:
 998             func = 'readFloatRegInt'
 999             if (size != self.dflt_size):
1000                 bit_select = 1
1001         base = 'xc->%s(_srcRegIdx[%d] - FP_Base_DepTag)' % \
1002                (func, op_desc.src_reg_idx)
1003         if bit_select:
1004             return '%s = bits(%s, %d, 0);\n' % \
1005                    (op_desc.munged_name, base, size-1)
1006         else:
1007             return '%s = %s;\n' % (op_desc.munged_name, base)
1008
1009     def makeWrite(self, op_desc):
1010         (size, type, is_signed) = operandSizeMap[op_desc.eff_ext]
1011         final_val = op_desc.munged_name
1012         if (type == 'float'):
1013             func = 'setFloatRegSingle'
1014         elif (type == 'double'):
1015             func = 'setFloatRegDouble'
1016         else:
1017             func = 'setFloatRegInt'
1018             type = 'uint%d_t' % self.dflt_size
1019             if (size != self.dflt_size and is_signed):
1020                 final_val = 'sext<%d>(%s)' % (size, op_desc.munged_name)
1021         wb = '''
1022         {
1023             %s final_val = %s;
1024             xc->%s(_destRegIdx[%d] - FP_Base_DepTag, final_val);\n
1025             if (traceData) { traceData->setData(final_val); }
1026         }''' % (type, final_val, func, op_desc.dest_reg_idx)
1027         return wb
1028
1029 class ControlRegOperandTraits(OperandTraits):
1030     def isReg(self):
1031         return 1
1032
1033     def isControlReg(self):
1034         return 1
1035
1036     def makeConstructor(self, op_desc):
1037         c = ''
1038         if op_desc.is_src:
1039             c += '\n\t_srcRegIdx[%d] = %s_DepTag;' % \
1040                  (op_desc.src_reg_idx, self.reg_spec)
1041         if op_desc.is_dest:
1042             c += '\n\t_destRegIdx[%d] = %s_DepTag;' % \
1043                  (op_desc.dest_reg_idx, self.reg_spec)
1044         return c
1045
1046     def makeRead(self, op_desc):
1047         (size, type, is_signed) = operandSizeMap[op_desc.eff_ext]
1048         bit_select = 0
1049         if (type == 'float' or type == 'double'):
1050             error(0, 'Attempt to read control register as FP')
1051         base = 'xc->read%s()' % self.reg_spec
1052         if size == self.dflt_size:
1053             return '%s = %s;\n' % (op_desc.munged_name, base)
1054         else:
1055             return '%s = bits(%s, %d, 0);\n' % \
1056                    (op_desc.munged_name, base, size-1)
1057
1058     def makeWrite(self, op_desc):
1059         (size, type, is_signed) = operandSizeMap[op_desc.eff_ext]
1060         if (type == 'float' or type == 'double'):
1061             error(0, 'Attempt to write control register as FP')
1062         wb = 'xc->set%s(%s);\n' % (self.reg_spec, op_desc.munged_name)
1063         wb += 'if (traceData) { traceData->setData(%s); }' % \
1064               op_desc.munged_name
1065         return wb
1066
1067 class MemOperandTraits(OperandTraits):
1068     def isMem(self):
1069         return 1
1070
1071     def makeConstructor(self, op_desc):
1072         return ''
1073
1074     def makeDecl(self, op_desc):
1075         (size, type, is_signed) = operandSizeMap[op_desc.eff_ext]
1076         # Note that initializations in the declarations are solely
1077         # to avoid 'uninitialized variable' errors from the compiler.
1078         # Declare memory data variable.
1079         c = '%s %s = 0;\n' % (type, op_desc.munged_name)
1080         # Declare var to hold memory access flags.
1081         c += 'unsigned %s_flags = memAccessFlags;\n' % op_desc.base_name
1082         # If this operand is a dest (i.e., it's a store operation),
1083         # then we need to declare a variable for the write result code
1084         # as well.
1085         if op_desc.is_dest:
1086             c += 'uint64_t %s_write_result = 0;\n' % op_desc.base_name
1087         return c
1088
1089     def makeRead(self, op_desc):
1090         (size, type, is_signed) = operandSizeMap[op_desc.eff_ext]
1091         eff_type = 'uint%d_t' % size
1092         return 'fault = xc->read(EA, (%s&)%s, %s_flags);\n' \
1093                % (eff_type, op_desc.munged_name, op_desc.base_name)
1094
1095     def makeWrite(self, op_desc):
1096         (size, type, is_signed) = operandSizeMap[op_desc.eff_ext]
1097         eff_type = 'uint%d_t' % size
1098         return 'fault = xc->write((%s&)%s, EA, %s_flags,' \
1099                ' &%s_write_result);\n' \
1100                % (eff_type, op_desc.munged_name, op_desc.base_name,
1101                   op_desc.base_name)
1102
1103 class NPCOperandTraits(OperandTraits):
1104     def makeConstructor(self, op_desc):
1105         return ''
1106
1107     def makeRead(self, op_desc):
1108         return '%s = xc->readPC() + 4;\n' % op_desc.munged_name
1109
1110     def makeWrite(self, op_desc):
1111         return 'xc->setNextPC(%s);\n' % op_desc.munged_name
1112
1113
1114 #
1115 # Define operand variables that get derived from the basic declaration
1116 # of ISA-specific operands in operandTraitsMap.  This function must be
1117 # called by the ISA description file explicitly after defining
1118 # operandTraitsMap (in a 'let' block).
1119 #
1120 def defineDerivedOperandVars():
1121     global operands
1122     operands = operandTraitsMap.keys()
1123
1124     operandsREString = (r'''
1125     (?<![\w\.])      # neg. lookbehind assertion: prevent partial matches
1126     ((%s)(?:\.(\w+))?)   # match: operand with optional '.' then suffix
1127     (?![\w\.])       # neg. lookahead assertion: prevent partial matches
1128     '''
1129                         % string.join(operands, '|'))
1130
1131     global operandsRE
1132     operandsRE = re.compile(operandsREString, re.MULTILINE|re.VERBOSE)
1133
1134     # Same as operandsREString, but extension is mandatory, and only two
1135     # groups are returned (base and ext, not full name as above).
1136     # Used for subtituting '_' for '.' to make C++ identifiers.
1137     operandsWithExtREString = (r'(?<![\w\.])(%s)\.(\w+)(?![\w\.])'
1138                                % string.join(operands, '|'))
1139
1140     global operandsWithExtRE
1141     operandsWithExtRE = re.compile(operandsWithExtREString, re.MULTILINE)
1142
1143
1144 #
1145 # Operand descriptor class.  An instance of this class represents
1146 # a specific operand for a code block.
1147 #
1148 class OperandDescriptor:
1149     def __init__(self, full_name, base_name, ext, is_src, is_dest):
1150         self.full_name = full_name
1151         self.base_name = base_name
1152         self.ext = ext
1153         self.is_src = is_src
1154         self.is_dest = is_dest
1155         self.traits = operandTraitsMap[base_name]
1156         # The 'effective extension' (eff_ext) is either the actual
1157         # extension, if one was explicitly provided, or the default.
1158         # The 'munged name' replaces the '.' between the base and
1159         # extension (if any) with a '_' to make a legal C++ variable name.
1160         if ext:
1161             self.eff_ext = ext
1162             self.munged_name = base_name + '_' + ext
1163         else:
1164             self.eff_ext = self.traits.dflt_ext
1165             self.munged_name = base_name
1166
1167     # Finalize additional fields (primarily code fields).  This step
1168     # is done separately since some of these fields may depend on the
1169     # register index enumeration that hasn't been performed yet at the
1170     # time of __init__().
1171     def finalize(self):
1172         self.flags = self.traits.getFlags(self)
1173         self.constructor = self.traits.makeConstructor(self)
1174         self.op_decl = self.traits.makeDecl(self)
1175
1176         if self.is_src:
1177             self.op_rd = self.traits.makeRead(self)
1178         else:
1179             self.op_rd = ''
1180
1181         if self.is_dest:
1182             self.op_wb = self.traits.makeWrite(self)
1183         else:
1184             self.op_wb = ''
1185
1186 class OperandDescriptorList:
1187     def __init__(self):
1188         self.items = []
1189         self.bases = {}
1190
1191     def __len__(self):
1192         return len(self.items)
1193
1194     def __getitem__(self, index):
1195         return self.items[index]
1196
1197     def append(self, op_desc):
1198         self.items.append(op_desc)
1199         self.bases[op_desc.base_name] = op_desc
1200
1201     def find_base(self, base_name):
1202         # like self.bases[base_name], but returns None if not found
1203         # (rather than raising exception)
1204         return self.bases.get(base_name)
1205
1206     # internal helper function for concat[Some]Attr{Strings|Lists}
1207     def __internalConcatAttrs(self, attr_name, filter, result):
1208         for op_desc in self.items:
1209             if filter(op_desc):
1210                 result += getattr(op_desc, attr_name)
1211         return result
1212
1213     # return a single string that is the concatenation of the (string)
1214     # values of the specified attribute for all operands
1215     def concatAttrStrings(self, attr_name):
1216         return self.__internalConcatAttrs(attr_name, lambda x: 1, '')
1217
1218     # like concatAttrStrings, but only include the values for the operands
1219     # for which the provided filter function returns true
1220     def concatSomeAttrStrings(self, filter, attr_name):
1221         return self.__internalConcatAttrs(attr_name, filter, '')
1222
1223     # return a single list that is the concatenation of the (list)
1224     # values of the specified attribute for all operands
1225     def concatAttrLists(self, attr_name):
1226         return self.__internalConcatAttrs(attr_name, lambda x: 1, [])
1227
1228     # like concatAttrLists, but only include the values for the operands
1229     # for which the provided filter function returns true
1230     def concatSomeAttrLists(self, filter, attr_name):
1231         return self.__internalConcatAttrs(attr_name, filter, [])
1232
1233     def sort(self):
1234         self.items.sort(lambda a, b: a.traits.sort_pri - b.traits.sort_pri)
1235
1236 # Regular expression object to match C++ comments
1237 # (used in findOperands())
1238 commentRE = re.compile(r'//.*\n')
1239
1240 # Regular expression object to match assignment statements
1241 # (used in findOperands())
1242 assignRE = re.compile(r'\s*=(?!=)', re.MULTILINE)
1243
1244 #
1245 # Find all the operands in the given code block.  Returns an operand
1246 # descriptor list (instance of class OperandDescriptorList).
1247 #
1248 def findOperands(code):
1249     operands = OperandDescriptorList()
1250     # delete comments so we don't accidentally match on reg specifiers inside
1251     code = commentRE.sub('', code)
1252     # search for operands
1253     next_pos = 0
1254     while 1:
1255         match = operandsRE.search(code, next_pos)
1256         if not match:
1257             # no more matches: we're done
1258             break
1259         op = match.groups()
1260         # regexp groups are operand full name, base, and extension
1261         (op_full, op_base, op_ext) = op
1262         # if the token following the operand is an assignment, this is
1263         # a destination (LHS), else it's a source (RHS)
1264         is_dest = (assignRE.match(code, match.end()) != None)
1265         is_src = not is_dest
1266         # see if we've already seen this one
1267         op_desc = operands.find_base(op_base)
1268         if op_desc:
1269             if op_desc.ext != op_ext:
1270                 error(0, 'Inconsistent extensions for operand %s' % op_base)
1271             op_desc.is_src = op_desc.is_src or is_src
1272             op_desc.is_dest = op_desc.is_dest or is_dest
1273         else:
1274             # new operand: create new descriptor
1275             op_desc = OperandDescriptor(op_full, op_base, op_ext,
1276                                         is_src, is_dest)
1277             operands.append(op_desc)
1278         # start next search after end of current match
1279         next_pos = match.end()
1280     operands.sort()
1281     # enumerate source & dest register operands... used in building
1282     # constructor later
1283     srcRegs = 0
1284     destRegs = 0
1285     operands.numFPDestRegs = 0
1286     operands.numIntDestRegs = 0
1287     for op_desc in operands:
1288         if op_desc.traits.isReg():
1289             if op_desc.is_src:
1290                 op_desc.src_reg_idx = srcRegs
1291                 srcRegs += 1
1292             if op_desc.is_dest:
1293                 op_desc.dest_reg_idx = destRegs
1294                 destRegs += 1
1295                 if op_desc.traits.isFloatReg():
1296                     operands.numFPDestRegs += 1
1297                 elif op_desc.traits.isIntReg():
1298                     operands.numIntDestRegs += 1
1299     operands.numSrcRegs = srcRegs
1300     operands.numDestRegs = destRegs
1301     # now make a final pass to finalize op_desc fields that may depend
1302     # on the register enumeration
1303     for op_desc in operands:
1304         op_desc.finalize()
1305     return operands
1306
1307 # Munge operand names in code string to make legal C++ variable names.
1308 # (Will match munged_name attribute of OperandDescriptor object.)
1309 def substMungedOpNames(code):
1310     return operandsWithExtRE.sub(r'\1_\2', code)
1311
1312 def joinLists(t):
1313     return map(string.join, t)
1314
1315 def makeFlagConstructor(flag_list):
1316     if len(flag_list) == 0:
1317         return ''
1318     # filter out repeated flags
1319     flag_list.sort()
1320     i = 1
1321     while i < len(flag_list):
1322         if flag_list[i] == flag_list[i-1]:
1323             del flag_list[i]
1324         else:
1325             i += 1
1326     pre = '\n\tflags['
1327     post = '] = true;'
1328     code = pre + string.join(flag_list, post + pre) + post
1329     return code
1330
1331 class CodeBlock:
1332     def __init__(self, code):
1333         self.orig_code = code
1334         self.operands = findOperands(code)
1335         self.code = substMungedOpNames(substBitOps(code))
1336         self.constructor = self.operands.concatAttrStrings('constructor')
1337         self.constructor += \
1338                  '\n\t_numSrcRegs = %d;' % self.operands.numSrcRegs
1339         self.constructor += \
1340                  '\n\t_numDestRegs = %d;' % self.operands.numDestRegs
1341         self.constructor += \
1342                  '\n\t_numFPDestRegs = %d;' % self.operands.numFPDestRegs
1343         self.constructor += \
1344                  '\n\t_numIntDestRegs = %d;' % self.operands.numIntDestRegs
1345
1346         self.op_decl = self.operands.concatAttrStrings('op_decl')
1347
1348         is_mem = lambda op: op.traits.isMem()
1349         not_mem = lambda op: not op.traits.isMem()
1350
1351         self.op_rd = self.operands.concatAttrStrings('op_rd')
1352         self.op_wb = self.operands.concatAttrStrings('op_wb')
1353         self.op_mem_rd = \
1354                  self.operands.concatSomeAttrStrings(is_mem, 'op_rd')
1355         self.op_mem_wb = \
1356                  self.operands.concatSomeAttrStrings(is_mem, 'op_wb')
1357         self.op_nonmem_rd = \
1358                  self.operands.concatSomeAttrStrings(not_mem, 'op_rd')
1359         self.op_nonmem_wb = \
1360                  self.operands.concatSomeAttrStrings(not_mem, 'op_wb')
1361
1362         self.flags = self.operands.concatAttrLists('flags')
1363
1364         # Make a basic guess on the operand class (function unit type).
1365         # These are good enough for most cases, and will be overridden
1366         # later otherwise.
1367         if 'IsStore' in self.flags:
1368             self.op_class = 'WrPort'
1369         elif 'IsLoad' in self.flags or 'IsPrefetch' in self.flags:
1370             self.op_class = 'RdPort'
1371         elif 'IsFloating' in self.flags:
1372             self.op_class = 'FloatADD'
1373         else:
1374             self.op_class = 'IntALU'
1375
1376 # Assume all instruction flags are of the form 'IsFoo'
1377 instFlagRE = re.compile(r'Is.*')
1378
1379 # OpClass constants are just a little more complicated
1380 opClassRE = re.compile(r'Int.*|Float.*|.*Port|No_OpClass')
1381
1382 class InstObjParams:
1383     def __init__(self, mnem, class_name, base_class = '',
1384                  code_block = None, opt_args = []):
1385         self.mnemonic = mnem
1386         self.class_name = class_name
1387         self.base_class = base_class
1388         self.exec_func_declarations = '''
1389     Fault execute(SimpleCPUExecContext *, Trace::InstRecord *);
1390     Fault execute(FullCPUExecContext *, Trace::InstRecord *);
1391 '''
1392         if code_block:
1393             for code_attr in code_block.__dict__.keys():
1394                 setattr(self, code_attr, getattr(code_block, code_attr))
1395         else:
1396             self.constructor = ''
1397             self.flags = []
1398         # Optional arguments are assumed to be either StaticInst flags
1399         # or an OpClass value.  To avoid having to import a complete
1400         # list of these values to match against, we do it ad-hoc
1401         # with regexps.
1402         for oa in opt_args:
1403             if instFlagRE.match(oa):
1404                 self.flags.append(oa)
1405             elif opClassRE.match(oa):
1406                 self.op_class = oa
1407             else:
1408                 error(0, 'InstObjParams: optional arg "%s" not recognized '
1409                       'as StaticInst::Flag or OpClass.' % oa)
1410
1411         # add flag initialization to contructor here to include
1412         # any flags added via opt_args
1413         self.constructor += makeFlagConstructor(self.flags)
1414
1415         # if 'IsFloating' is set, add call to the FP enable check
1416         # function (which should be provided by isa_desc via a declare)
1417         if 'IsFloating' in self.flags:
1418             self.fp_enable_check = 'fault = checkFpEnableFault(xc);'
1419         else:
1420             self.fp_enable_check = ''
1421
1422     def _subst(self, template):
1423         try:
1424             return template % self.__dict__
1425         except KeyError, key:
1426             raise KeyError, 'InstObjParams.subst: no definition for %s' % key
1427
1428     def subst(self, *args):
1429         result = []
1430         for t in args:
1431             try: template = templateMap[t]
1432             except KeyError:
1433                 error(0, 'InstObjParams::subst: undefined template "%s"' % t)
1434             if template.find('%(cpu_model)') != -1:
1435                 tmp = ''
1436                 for cpu_model in ('SimpleCPUExecContext', 'FullCPUExecContext'):
1437                     self.cpu_model = cpu_model
1438                     tmp += self._subst(template)
1439                 result.append(tmp)
1440             else:
1441                 result.append(self._subst(template))
1442         if len(args) == 1:
1443             result = result[0]
1444         return result
1445
1446 #
1447 # Read in and parse the ISA description.
1448 #
1449 def parse_isa_desc(isa_desc_file, decoder_file):
1450     # Arguments are the name of the ISA description (input) file and
1451     # the name of the C++ decoder (output) file.
1452     global isa_desc_filename, decoder_filename
1453     isa_desc_filename = isa_desc_file
1454     decoder_filename = decoder_file
1455
1456     # Suck the ISA description file in.
1457     input = open(isa_desc_filename)
1458     isa_desc = input.read()
1459     input.close()
1460
1461     # Parse it.
1462     yacc.parse(isa_desc)
1463
1464 # Called as script: get args from command line.
1465 if __name__ == '__main__':
1466     parse_isa_desc(sys.argv[1], sys.argv[2])