src/arch/isa_parser.py

   1 # Copyright (c) 2003-2005 The Regents of The University of Michigan
   2 # Copyright (c) 2013 Advanced Micro Devices, Inc.
   3 # All rights reserved.
   4 #
   5 # Redistribution and use in source and binary forms, with or without
   6 # modification, are permitted provided that the following conditions are
   7 # met: redistributions of source code must retain the above copyright
   8 # notice, this list of conditions and the following disclaimer;
   9 # redistributions in binary form must reproduce the above copyright
  10 # notice, this list of conditions and the following disclaimer in the
  11 # documentation and/or other materials provided with the distribution;
  12 # neither the name of the copyright holders nor the names of its
  13 # contributors may be used to endorse or promote products derived from
  14 # this software without specific prior written permission.
  15 #
  16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27 #
  28 # Authors: Steve Reinhardt
  29
  30 import os
  31 import sys
  32 import re
  33 import string
  34 import inspect, traceback
  35 # get type names
  36 from types import *
  37
  38 from m5.util.grammar import Grammar
  39
  40 debug=False
  41
  42 ###################
  43 # Utility functions
  44
  45 #
  46 # Indent every line in string 's' by two spaces
  47 # (except preprocessor directives).
  48 # Used to make nested code blocks look pretty.
  49 #
  50 def indent(s):
  51     return re.sub(r'(?m)^(?!#)', '  ', s)
  52
  53 #
  54 # Munge a somewhat arbitrarily formatted piece of Python code
  55 # (e.g. from a format 'let' block) into something whose indentation
  56 # will get by the Python parser.
  57 #
  58 # The two keys here are that Python will give a syntax error if
  59 # there's any whitespace at the beginning of the first line, and that
  60 # all lines at the same lexical nesting level must have identical
  61 # indentation.  Unfortunately the way code literals work, an entire
  62 # let block tends to have some initial indentation.  Rather than
  63 # trying to figure out what that is and strip it off, we prepend 'if
  64 # 1:' to make the let code the nested block inside the if (and have
  65 # the parser automatically deal with the indentation for us).
  66 #
  67 # We don't want to do this if (1) the code block is empty or (2) the
  68 # first line of the block doesn't have any whitespace at the front.
  69
  70 def fixPythonIndentation(s):
  71     # get rid of blank lines first
  72     s = re.sub(r'(?m)^\s*\n', '', s);
  73     if (s != '' and re.match(r'[ \t]', s[0])):
  74         s = 'if 1:\n' + s
  75     return s
  76
  77 class ISAParserError(Exception):
  78     """Error handler for parser errors"""
  79     def __init__(self, first, second=None):
  80         if second is None:
  81             self.lineno = 0
  82             self.string = first
  83         else:
  84             if hasattr(first, 'lexer'):
  85                 first = first.lexer.lineno
  86             self.lineno = first
  87             self.string = second
  88
  89     def display(self, filename_stack, print_traceback=debug):
  90         # Output formatted to work under Emacs compile-mode.  Optional
  91         # 'print_traceback' arg, if set to True, prints a Python stack
  92         # backtrace too (can be handy when trying to debug the parser
  93         # itself).
  94
  95         spaces = ""
  96         for (filename, line) in filename_stack[:-1]:
  97             print "%sIn file included from %s:" % (spaces, filename)
  98             spaces += "  "
  99
 100         # Print a Python stack backtrace if requested.
 101         if print_traceback or not self.lineno:
 102             traceback.print_exc()
 103
 104         line_str = "%s:" % (filename_stack[-1][0], )
 105         if self.lineno:
 106             line_str += "%d:" % (self.lineno, )
 107
 108         return "%s%s %s" % (spaces, line_str, self.string)
 109
 110     def exit(self, filename_stack, print_traceback=debug):
 111         # Just call exit.
 112
 113         sys.exit(self.display(filename_stack, print_traceback))
 114
 115 def error(*args):
 116     raise ISAParserError(*args)
 117
 118 ####################
 119 # Template objects.
 120 #
 121 # Template objects are format strings that allow substitution from
 122 # the attribute spaces of other objects (e.g. InstObjParams instances).
 123
 124 labelRE = re.compile(r'(?<!%)%\(([^\)]+)\)[sd]')
 125
 126 class Template(object):
 127     def __init__(self, parser, t):
 128         self.parser = parser
 129         self.template = t
 130
 131     def subst(self, d):
 132         myDict = None
 133
 134         # Protect non-Python-dict substitutions (e.g. if there's a printf
 135         # in the templated C++ code)
 136         template = self.parser.protectNonSubstPercents(self.template)
 137         # CPU-model-specific substitutions are handled later (in GenCode).
 138         template = self.parser.protectCpuSymbols(template)
 139
 140         # Build a dict ('myDict') to use for the template substitution.
 141         # Start with the template namespace.  Make a copy since we're
 142         # going to modify it.
 143         myDict = self.parser.templateMap.copy()
 144
 145         if isinstance(d, InstObjParams):
 146             # If we're dealing with an InstObjParams object, we need
 147             # to be a little more sophisticated.  The instruction-wide
 148             # parameters are already formed, but the parameters which
 149             # are only function wide still need to be generated.
 150             compositeCode = ''
 151
 152             myDict.update(d.__dict__)
 153             # The "operands" and "snippets" attributes of the InstObjParams
 154             # objects are for internal use and not substitution.
 155             del myDict['operands']
 156             del myDict['snippets']
 157
 158             snippetLabels = [l for l in labelRE.findall(template)
 159                              if d.snippets.has_key(l)]
 160
 161             snippets = dict([(s, self.parser.mungeSnippet(d.snippets[s]))
 162                              for s in snippetLabels])
 163
 164             myDict.update(snippets)
 165
 166             compositeCode = ' '.join(map(str, snippets.values()))
 167
 168             # Add in template itself in case it references any
 169             # operands explicitly (like Mem)
 170             compositeCode += ' ' + template
 171
 172             operands = SubOperandList(self.parser, compositeCode, d.operands)
 173
 174             myDict['op_decl'] = operands.concatAttrStrings('op_decl')
 175             if operands.readPC or operands.setPC:
 176                 myDict['op_decl'] += 'TheISA::PCState __parserAutoPCState;\n'
 177
 178             # In case there are predicated register reads and write, declare
 179             # the variables for register indicies. It is being assumed that
 180             # all the operands in the OperandList are also in the
 181             # SubOperandList and in the same order. Otherwise, it is
 182             # expected that predication would not be used for the operands.
 183             if operands.predRead:
 184                 myDict['op_decl'] += 'uint8_t _sourceIndex = 0;\n'
 185             if operands.predWrite:
 186                 myDict['op_decl'] += 'uint8_t M5_VAR_USED _destIndex = 0;\n'
 187
 188             is_src = lambda op: op.is_src
 189             is_dest = lambda op: op.is_dest
 190
 191             myDict['op_src_decl'] = \
 192                       operands.concatSomeAttrStrings(is_src, 'op_src_decl')
 193             myDict['op_dest_decl'] = \
 194                       operands.concatSomeAttrStrings(is_dest, 'op_dest_decl')
 195             if operands.readPC:
 196                 myDict['op_src_decl'] += \
 197                     'TheISA::PCState __parserAutoPCState;\n'
 198             if operands.setPC:
 199                 myDict['op_dest_decl'] += \
 200                     'TheISA::PCState __parserAutoPCState;\n'
 201
 202             myDict['op_rd'] = operands.concatAttrStrings('op_rd')
 203             if operands.readPC:
 204                 myDict['op_rd'] = '__parserAutoPCState = xc->pcState();\n' + \
 205                                   myDict['op_rd']
 206
 207             # Compose the op_wb string. If we're going to write back the
 208             # PC state because we changed some of its elements, we'll need to
 209             # do that as early as possible. That allows later uncoordinated
 210             # modifications to the PC to layer appropriately.
 211             reordered = list(operands.items)
 212             reordered.reverse()
 213             op_wb_str = ''
 214             pcWbStr = 'xc->pcState(__parserAutoPCState);\n'
 215             for op_desc in reordered:
 216                 if op_desc.isPCPart() and op_desc.is_dest:
 217                     op_wb_str = op_desc.op_wb + pcWbStr + op_wb_str
 218                     pcWbStr = ''
 219                 else:
 220                     op_wb_str = op_desc.op_wb + op_wb_str
 221             myDict['op_wb'] = op_wb_str
 222
 223         elif isinstance(d, dict):
 224             # if the argument is a dictionary, we just use it.
 225             myDict.update(d)
 226         elif hasattr(d, '__dict__'):
 227             # if the argument is an object, we use its attribute map.
 228             myDict.update(d.__dict__)
 229         else:
 230             raise TypeError, "Template.subst() arg must be or have dictionary"
 231         return template % myDict
 232
 233     # Convert to string.  This handles the case when a template with a
 234     # CPU-specific term gets interpolated into another template or into
 235     # an output block.
 236     def __str__(self):
 237         return self.parser.expandCpuSymbolsToString(self.template)
 238
 239 ################
 240 # Format object.
 241 #
 242 # A format object encapsulates an instruction format.  It must provide
 243 # a defineInst() method that generates the code for an instruction
 244 # definition.
 245
 246 class Format(object):
 247     def __init__(self, id, params, code):
 248         self.id = id
 249         self.params = params
 250         label = 'def format ' + id
 251         self.user_code = compile(fixPythonIndentation(code), label, 'exec')
 252         param_list = string.join(params, ", ")
 253         f = '''def defInst(_code, _context, %s):
 254                 my_locals = vars().copy()
 255                 exec _code in _context, my_locals
 256                 return my_locals\n''' % param_list
 257         c = compile(f, label + ' wrapper', 'exec')
 258         exec c
 259         self.func = defInst
 260
 261     def defineInst(self, parser, name, args, lineno):
 262         parser.updateExportContext()
 263         context = parser.exportContext.copy()
 264         if len(name):
 265             Name = name[0].upper()
 266             if len(name) > 1:
 267                 Name += name[1:]
 268         context.update({ 'name' : name, 'Name' : Name })
 269         try:
 270             vars = self.func(self.user_code, context, *args[0], **args[1])
 271         except Exception, exc:
 272             if debug:
 273                 raise
 274             error(lineno, 'error defining "%s": %s.' % (name, exc))
 275         for k in vars.keys():
 276             if k not in ('header_output', 'decoder_output',
 277                          'exec_output', 'decode_block'):
 278                 del vars[k]
 279         return GenCode(parser, **vars)
 280
 281 # Special null format to catch an implicit-format instruction
 282 # definition outside of any format block.
 283 class NoFormat(object):
 284     def __init__(self):
 285         self.defaultInst = ''
 286
 287     def defineInst(self, parser, name, args, lineno):
 288         error(lineno,
 289               'instruction definition "%s" with no active format!' % name)
 290
 291 ###############
 292 # GenCode class
 293 #
 294 # The GenCode class encapsulates generated code destined for various
 295 # output files.  The header_output and decoder_output attributes are
 296 # strings containing code destined for decoder.hh and decoder.cc
 297 # respectively.  The decode_block attribute contains code to be
 298 # incorporated in the decode function itself (that will also end up in
 299 # decoder.cc).  The exec_output attribute is a dictionary with a key
 300 # for each CPU model name; the value associated with a particular key
 301 # is the string of code for that CPU model's exec.cc file.  The
 302 # has_decode_default attribute is used in the decode block to allow
 303 # explicit default clauses to override default default clauses.
 304
 305 class GenCode(object):
 306     # Constructor.  At this point we substitute out all CPU-specific
 307     # symbols.  For the exec output, these go into the per-model
 308     # dictionary.  For all other output types they get collapsed into
 309     # a single string.
 310     def __init__(self, parser,
 311                  header_output = '', decoder_output = '', exec_output = '',
 312                  decode_block = '', has_decode_default = False):
 313         self.parser = parser
 314         self.header_output = parser.expandCpuSymbolsToString(header_output)
 315         self.decoder_output = parser.expandCpuSymbolsToString(decoder_output)
 316         if isinstance(exec_output, dict):
 317             self.exec_output = exec_output
 318         elif isinstance(exec_output, str):
 319             # If the exec_output arg is a single string, we replicate
 320             # it for each of the CPU models, substituting and
 321             # %(CPU_foo)s params appropriately.
 322             self.exec_output = parser.expandCpuSymbolsToDict(exec_output)
 323         self.decode_block = parser.expandCpuSymbolsToString(decode_block)
 324         self.has_decode_default = has_decode_default
 325
 326     # Override '+' operator: generate a new GenCode object that
 327     # concatenates all the individual strings in the operands.
 328     def __add__(self, other):
 329         exec_output = {}
 330         for cpu in self.parser.cpuModels:
 331             n = cpu.name
 332             exec_output[n] = self.exec_output[n] + other.exec_output[n]
 333         return GenCode(self.parser,
 334                        self.header_output + other.header_output,
 335                        self.decoder_output + other.decoder_output,
 336                        exec_output,
 337                        self.decode_block + other.decode_block,
 338                        self.has_decode_default or other.has_decode_default)
 339
 340     # Prepend a string (typically a comment) to all the strings.
 341     def prepend_all(self, pre):
 342         self.header_output = pre + self.header_output
 343         self.decoder_output  = pre + self.decoder_output
 344         self.decode_block = pre + self.decode_block
 345         for cpu in self.parser.cpuModels:
 346             self.exec_output[cpu.name] = pre + self.exec_output[cpu.name]
 347
 348     # Wrap the decode block in a pair of strings (e.g., 'case foo:'
 349     # and 'break;').  Used to build the big nested switch statement.
 350     def wrap_decode_block(self, pre, post = ''):
 351         self.decode_block = pre + indent(self.decode_block) + post
 352
 353 #####################################################################
 354 #
 355 #                      Bitfield Operator Support
 356 #
 357 #####################################################################
 358
 359 bitOp1ArgRE = re.compile(r'<\s*(\w+)\s*:\s*>')
 360
 361 bitOpWordRE = re.compile(r'(?<![\w\.])([\w\.]+)<\s*(\w+)\s*:\s*(\w+)\s*>')
 362 bitOpExprRE = re.compile(r'\)<\s*(\w+)\s*:\s*(\w+)\s*>')
 363
 364 def substBitOps(code):
 365     # first convert single-bit selectors to two-index form
 366     # i.e., <n> --> <n:n>
 367     code = bitOp1ArgRE.sub(r'<\1:\1>', code)
 368     # simple case: selector applied to ID (name)
 369     # i.e., foo<a:b> --> bits(foo, a, b)
 370     code = bitOpWordRE.sub(r'bits(\1, \2, \3)', code)
 371     # if selector is applied to expression (ending in ')'),
 372     # we need to search backward for matching '('
 373     match = bitOpExprRE.search(code)
 374     while match:
 375         exprEnd = match.start()
 376         here = exprEnd - 1
 377         nestLevel = 1
 378         while nestLevel > 0:
 379             if code[here] == '(':
 380                 nestLevel -= 1
 381             elif code[here] == ')':
 382                 nestLevel += 1
 383             here -= 1
 384             if here < 0:
 385                 sys.exit("Didn't find '('!")
 386         exprStart = here+1
 387         newExpr = r'bits(%s, %s, %s)' % (code[exprStart:exprEnd+1],
 388                                          match.group(1), match.group(2))
 389         code = code[:exprStart] + newExpr + code[match.end():]
 390         match = bitOpExprRE.search(code)
 391     return code
 392
 393
 394 #####################################################################
 395 #
 396 #                             Code Parser
 397 #
 398 # The remaining code is the support for automatically extracting
 399 # instruction characteristics from pseudocode.
 400 #
 401 #####################################################################
 402
 403 # Force the argument to be a list.  Useful for flags, where a caller
 404 # can specify a singleton flag or a list of flags.  Also usful for
 405 # converting tuples to lists so they can be modified.
 406 def makeList(arg):
 407     if isinstance(arg, list):
 408         return arg
 409     elif isinstance(arg, tuple):
 410         return list(arg)
 411     elif not arg:
 412         return []
 413     else:
 414         return [ arg ]
 415
 416 class Operand(object):
 417     '''Base class for operand descriptors.  An instance of this class
 418     (or actually a class derived from this one) represents a specific
 419     operand for a code block (e.g, "Rc.sq" as a dest). Intermediate
 420     derived classes encapsulates the traits of a particular operand
 421     type (e.g., "32-bit integer register").'''
 422
 423     def buildReadCode(self, func = None):
 424         subst_dict = {"name": self.base_name,
 425                       "func": func,
 426                       "reg_idx": self.reg_spec,
 427                       "ctype": self.ctype}
 428         if hasattr(self, 'src_reg_idx'):
 429             subst_dict['op_idx'] = self.src_reg_idx
 430         code = self.read_code % subst_dict
 431         return '%s = %s;\n' % (self.base_name, code)
 432
 433     def buildWriteCode(self, func = None):
 434         subst_dict = {"name": self.base_name,
 435                       "func": func,
 436                       "reg_idx": self.reg_spec,
 437                       "ctype": self.ctype,
 438                       "final_val": self.base_name}
 439         if hasattr(self, 'dest_reg_idx'):
 440             subst_dict['op_idx'] = self.dest_reg_idx
 441         code = self.write_code % subst_dict
 442         return '''
 443         {
 444             %s final_val = %s;
 445             %s;
 446             if (traceData) { traceData->setData(final_val); }
 447         }''' % (self.dflt_ctype, self.base_name, code)
 448
 449     def __init__(self, parser, full_name, ext, is_src, is_dest):
 450         self.full_name = full_name
 451         self.ext = ext
 452         self.is_src = is_src
 453         self.is_dest = is_dest
 454         # The 'effective extension' (eff_ext) is either the actual
 455         # extension, if one was explicitly provided, or the default.
 456         if ext:
 457             self.eff_ext = ext
 458         elif hasattr(self, 'dflt_ext'):
 459             self.eff_ext = self.dflt_ext
 460
 461         if hasattr(self, 'eff_ext'):
 462             self.ctype = parser.operandTypeMap[self.eff_ext]
 463
 464     # Finalize additional fields (primarily code fields).  This step
 465     # is done separately since some of these fields may depend on the
 466     # register index enumeration that hasn't been performed yet at the
 467     # time of __init__(). The register index enumeration is affected
 468     # by predicated register reads/writes. Hence, we forward the flags
 469     # that indicate whether or not predication is in use.
 470     def finalize(self, predRead, predWrite):
 471         self.flags = self.getFlags()
 472         self.constructor = self.makeConstructor(predRead, predWrite)
 473         self.op_decl = self.makeDecl()
 474
 475         if self.is_src:
 476             self.op_rd = self.makeRead(predRead)
 477             self.op_src_decl = self.makeDecl()
 478         else:
 479             self.op_rd = ''
 480             self.op_src_decl = ''
 481
 482         if self.is_dest:
 483             self.op_wb = self.makeWrite(predWrite)
 484             self.op_dest_decl = self.makeDecl()
 485         else:
 486             self.op_wb = ''
 487             self.op_dest_decl = ''
 488
 489     def isMem(self):
 490         return 0
 491
 492     def isReg(self):
 493         return 0
 494
 495     def isFloatReg(self):
 496         return 0
 497
 498     def isIntReg(self):
 499         return 0
 500
 501     def isCCReg(self):
 502         return 0
 503
 504     def isControlReg(self):
 505         return 0
 506
 507     def isPCState(self):
 508         return 0
 509
 510     def isPCPart(self):
 511         return self.isPCState() and self.reg_spec
 512
 513     def hasReadPred(self):
 514         return self.read_predicate != None
 515
 516     def hasWritePred(self):
 517         return self.write_predicate != None
 518
 519     def getFlags(self):
 520         # note the empty slice '[:]' gives us a copy of self.flags[0]
 521         # instead of a reference to it
 522         my_flags = self.flags[0][:]
 523         if self.is_src:
 524             my_flags += self.flags[1]
 525         if self.is_dest:
 526             my_flags += self.flags[2]
 527         return my_flags
 528
 529     def makeDecl(self):
 530         # Note that initializations in the declarations are solely
 531         # to avoid 'uninitialized variable' errors from the compiler.
 532         return self.ctype + ' ' + self.base_name + ' = 0;\n';
 533
 534 class IntRegOperand(Operand):
 535     def isReg(self):
 536         return 1
 537
 538     def isIntReg(self):
 539         return 1
 540
 541     def makeConstructor(self, predRead, predWrite):
 542         c_src = ''
 543         c_dest = ''
 544
 545         if self.is_src:
 546             c_src = '\n\t_srcRegIdx[_numSrcRegs++] = %s;' % (self.reg_spec)
 547             if self.hasReadPred():
 548                 c_src = '\n\tif (%s) {%s\n\t}' % \
 549                         (self.read_predicate, c_src)
 550
 551         if self.is_dest:
 552             c_dest = '\n\t_destRegIdx[_numDestRegs++] = %s;' % \
 553                     (self.reg_spec)
 554             c_dest += '\n\t_numIntDestRegs++;'
 555             if self.hasWritePred():
 556                 c_dest = '\n\tif (%s) {%s\n\t}' % \
 557                          (self.write_predicate, c_dest)
 558
 559         return c_src + c_dest
 560
 561     def makeRead(self, predRead):
 562         if (self.ctype == 'float' or self.ctype == 'double'):
 563             error('Attempt to read integer register as FP')
 564         if self.read_code != None:
 565             return self.buildReadCode('readIntRegOperand')
 566
 567         int_reg_val = ''
 568         if predRead:
 569             int_reg_val = 'xc->readIntRegOperand(this, _sourceIndex++)'
 570             if self.hasReadPred():
 571                 int_reg_val = '(%s) ? %s : 0' % \
 572                               (self.read_predicate, int_reg_val)
 573         else:
 574             int_reg_val = 'xc->readIntRegOperand(this, %d)' % self.src_reg_idx
 575
 576         return '%s = %s;\n' % (self.base_name, int_reg_val)
 577
 578     def makeWrite(self, predWrite):
 579         if (self.ctype == 'float' or self.ctype == 'double'):
 580             error('Attempt to write integer register as FP')
 581         if self.write_code != None:
 582             return self.buildWriteCode('setIntRegOperand')
 583
 584         if predWrite:
 585             wp = 'true'
 586             if self.hasWritePred():
 587                 wp = self.write_predicate
 588
 589             wcond = 'if (%s)' % (wp)
 590             windex = '_destIndex++'
 591         else:
 592             wcond = ''
 593             windex = '%d' % self.dest_reg_idx
 594
 595         wb = '''
 596         %s
 597         {
 598             %s final_val = %s;
 599             xc->setIntRegOperand(this, %s, final_val);\n
 600             if (traceData) { traceData->setData(final_val); }
 601         }''' % (wcond, self.ctype, self.base_name, windex)
 602
 603         return wb
 604
 605 class FloatRegOperand(Operand):
 606     def isReg(self):
 607         return 1
 608
 609     def isFloatReg(self):
 610         return 1
 611
 612     def makeConstructor(self, predRead, predWrite):
 613         c_src = ''
 614         c_dest = ''
 615
 616         if self.is_src:
 617             c_src = '\n\t_srcRegIdx[_numSrcRegs++] = %s + FP_Reg_Base;' % \
 618                     (self.reg_spec)
 619
 620         if self.is_dest:
 621             c_dest = \
 622               '\n\t_destRegIdx[_numDestRegs++] = %s + FP_Reg_Base;' % \
 623               (self.reg_spec)
 624             c_dest += '\n\t_numFPDestRegs++;'
 625
 626         return c_src + c_dest
 627
 628     def makeRead(self, predRead):
 629         bit_select = 0
 630         if (self.ctype == 'float' or self.ctype == 'double'):
 631             func = 'readFloatRegOperand'
 632         else:
 633             func = 'readFloatRegOperandBits'
 634         if self.read_code != None:
 635             return self.buildReadCode(func)
 636
 637         if predRead:
 638             rindex = '_sourceIndex++'
 639         else:
 640             rindex = '%d' % self.src_reg_idx
 641
 642         return '%s = xc->%s(this, %s);\n' % \
 643             (self.base_name, func, rindex)
 644
 645     def makeWrite(self, predWrite):
 646         if (self.ctype == 'float' or self.ctype == 'double'):
 647             func = 'setFloatRegOperand'
 648         else:
 649             func = 'setFloatRegOperandBits'
 650         if self.write_code != None:
 651             return self.buildWriteCode(func)
 652
 653         if predWrite:
 654             wp = '_destIndex++'
 655         else:
 656             wp = '%d' % self.dest_reg_idx
 657         wp = 'xc->%s(this, %s, final_val);' % (func, wp)
 658
 659         wb = '''
 660         {
 661             %s final_val = %s;
 662             %s\n
 663             if (traceData) { traceData->setData(final_val); }
 664         }''' % (self.ctype, self.base_name, wp)
 665         return wb
 666
 667 class CCRegOperand(Operand):
 668     def isReg(self):
 669         return 1
 670
 671     def isCCReg(self):
 672         return 1
 673
 674     def makeConstructor(self, predRead, predWrite):
 675         c_src = ''
 676         c_dest = ''
 677
 678         if self.is_src:
 679             c_src = '\n\t_srcRegIdx[_numSrcRegs++] = %s + CC_Reg_Base;' % \
 680                      (self.reg_spec)
 681             if self.hasReadPred():
 682                 c_src = '\n\tif (%s) {%s\n\t}' % \
 683                         (self.read_predicate, c_src)
 684
 685         if self.is_dest:
 686             c_dest = \
 687               '\n\t_destRegIdx[_numDestRegs++] = %s + CC_Reg_Base;' % \
 688               (self.reg_spec)
 689             c_dest += '\n\t_numCCDestRegs++;'
 690             if self.hasWritePred():
 691                 c_dest = '\n\tif (%s) {%s\n\t}' % \
 692                          (self.write_predicate, c_dest)
 693
 694         return c_src + c_dest
 695
 696     def makeRead(self, predRead):
 697         if (self.ctype == 'float' or self.ctype == 'double'):
 698             error('Attempt to read condition-code register as FP')
 699         if self.read_code != None:
 700             return self.buildReadCode('readCCRegOperand')
 701
 702         int_reg_val = ''
 703         if predRead:
 704             int_reg_val = 'xc->readCCRegOperand(this, _sourceIndex++)'
 705             if self.hasReadPred():
 706                 int_reg_val = '(%s) ? %s : 0' % \
 707                               (self.read_predicate, int_reg_val)
 708         else:
 709             int_reg_val = 'xc->readCCRegOperand(this, %d)' % self.src_reg_idx
 710
 711         return '%s = %s;\n' % (self.base_name, int_reg_val)
 712
 713     def makeWrite(self, predWrite):
 714         if (self.ctype == 'float' or self.ctype == 'double'):
 715             error('Attempt to write condition-code register as FP')
 716         if self.write_code != None:
 717             return self.buildWriteCode('setCCRegOperand')
 718
 719         if predWrite:
 720             wp = 'true'
 721             if self.hasWritePred():
 722                 wp = self.write_predicate
 723
 724             wcond = 'if (%s)' % (wp)
 725             windex = '_destIndex++'
 726         else:
 727             wcond = ''
 728             windex = '%d' % self.dest_reg_idx
 729
 730         wb = '''
 731         %s
 732         {
 733             %s final_val = %s;
 734             xc->setCCRegOperand(this, %s, final_val);\n
 735             if (traceData) { traceData->setData(final_val); }
 736         }''' % (wcond, self.ctype, self.base_name, windex)
 737
 738         return wb
 739
 740 class ControlRegOperand(Operand):
 741     def isReg(self):
 742         return 1
 743
 744     def isControlReg(self):
 745         return 1
 746
 747     def makeConstructor(self, predRead, predWrite):
 748         c_src = ''
 749         c_dest = ''
 750
 751         if self.is_src:
 752             c_src = \
 753               '\n\t_srcRegIdx[_numSrcRegs++] = %s + Misc_Reg_Base;' % \
 754               (self.reg_spec)
 755
 756         if self.is_dest:
 757             c_dest = \
 758               '\n\t_destRegIdx[_numDestRegs++] = %s + Misc_Reg_Base;' % \
 759               (self.reg_spec)
 760
 761         return c_src + c_dest
 762
 763     def makeRead(self, predRead):
 764         bit_select = 0
 765         if (self.ctype == 'float' or self.ctype == 'double'):
 766             error('Attempt to read control register as FP')
 767         if self.read_code != None:
 768             return self.buildReadCode('readMiscRegOperand')
 769
 770         if predRead:
 771             rindex = '_sourceIndex++'
 772         else:
 773             rindex = '%d' % self.src_reg_idx
 774
 775         return '%s = xc->readMiscRegOperand(this, %s);\n' % \
 776             (self.base_name, rindex)
 777
 778     def makeWrite(self, predWrite):
 779         if (self.ctype == 'float' or self.ctype == 'double'):
 780             error('Attempt to write control register as FP')
 781         if self.write_code != None:
 782             return self.buildWriteCode('setMiscRegOperand')
 783
 784         if predWrite:
 785             windex = '_destIndex++'
 786         else:
 787             windex = '%d' % self.dest_reg_idx
 788
 789         wb = 'xc->setMiscRegOperand(this, %s, %s);\n' % \
 790              (windex, self.base_name)
 791         wb += 'if (traceData) { traceData->setData(%s); }' % \
 792               self.base_name
 793
 794         return wb
 795
 796 class MemOperand(Operand):
 797     def isMem(self):
 798         return 1
 799
 800     def makeConstructor(self, predRead, predWrite):
 801         return ''
 802
 803     def makeDecl(self):
 804         # Note that initializations in the declarations are solely
 805         # to avoid 'uninitialized variable' errors from the compiler.
 806         # Declare memory data variable.
 807         return '%s %s = 0;\n' % (self.ctype, self.base_name)
 808
 809     def makeRead(self, predRead):
 810         if self.read_code != None:
 811             return self.buildReadCode()
 812         return ''
 813
 814     def makeWrite(self, predWrite):
 815         if self.write_code != None:
 816             return self.buildWriteCode()
 817         return ''
 818
 819 class PCStateOperand(Operand):
 820     def makeConstructor(self, predRead, predWrite):
 821         return ''
 822
 823     def makeRead(self, predRead):
 824         if self.reg_spec:
 825             # A component of the PC state.
 826             return '%s = __parserAutoPCState.%s();\n' % \
 827                 (self.base_name, self.reg_spec)
 828         else:
 829             # The whole PC state itself.
 830             return '%s = xc->pcState();\n' % self.base_name
 831
 832     def makeWrite(self, predWrite):
 833         if self.reg_spec:
 834             # A component of the PC state.
 835             return '__parserAutoPCState.%s(%s);\n' % \
 836                 (self.reg_spec, self.base_name)
 837         else:
 838             # The whole PC state itself.
 839             return 'xc->pcState(%s);\n' % self.base_name
 840
 841     def makeDecl(self):
 842         ctype = 'TheISA::PCState'
 843         if self.isPCPart():
 844             ctype = self.ctype
 845         return "%s %s;\n" % (ctype, self.base_name)
 846
 847     def isPCState(self):
 848         return 1
 849
 850 class OperandList(object):
 851     '''Find all the operands in the given code block.  Returns an operand
 852     descriptor list (instance of class OperandList).'''
 853     def __init__(self, parser, code):
 854         self.items = []
 855         self.bases = {}
 856         # delete strings and comments so we don't match on operands inside
 857         for regEx in (stringRE, commentRE):
 858             code = regEx.sub('', code)
 859         # search for operands
 860         next_pos = 0
 861         while 1:
 862             match = parser.operandsRE.search(code, next_pos)
 863             if not match:
 864                 # no more matches: we're done
 865                 break
 866             op = match.groups()
 867             # regexp groups are operand full name, base, and extension
 868             (op_full, op_base, op_ext) = op
 869             # if the token following the operand is an assignment, this is
 870             # a destination (LHS), else it's a source (RHS)
 871             is_dest = (assignRE.match(code, match.end()) != None)
 872             is_src = not is_dest
 873             # see if we've already seen this one
 874             op_desc = self.find_base(op_base)
 875             if op_desc:
 876                 if op_desc.ext != op_ext:
 877                     error('Inconsistent extensions for operand %s' % \
 878                           op_base)
 879                 op_desc.is_src = op_desc.is_src or is_src
 880                 op_desc.is_dest = op_desc.is_dest or is_dest
 881             else:
 882                 # new operand: create new descriptor
 883                 op_desc = parser.operandNameMap[op_base](parser,
 884                     op_full, op_ext, is_src, is_dest)
 885                 self.append(op_desc)
 886             # start next search after end of current match
 887             next_pos = match.end()
 888         self.sort()
 889         # enumerate source & dest register operands... used in building
 890         # constructor later
 891         self.numSrcRegs = 0
 892         self.numDestRegs = 0
 893         self.numFPDestRegs = 0
 894         self.numIntDestRegs = 0
 895         self.numCCDestRegs = 0
 896         self.numMiscDestRegs = 0
 897         self.memOperand = None
 898
 899         # Flags to keep track if one or more operands are to be read/written
 900         # conditionally.
 901         self.predRead = False
 902         self.predWrite = False
 903
 904         for op_desc in self.items:
 905             if op_desc.isReg():
 906                 if op_desc.is_src:
 907                     op_desc.src_reg_idx = self.numSrcRegs
 908                     self.numSrcRegs += 1
 909                 if op_desc.is_dest:
 910                     op_desc.dest_reg_idx = self.numDestRegs
 911                     self.numDestRegs += 1
 912                     if op_desc.isFloatReg():
 913                         self.numFPDestRegs += 1
 914                     elif op_desc.isIntReg():
 915                         self.numIntDestRegs += 1
 916                     elif op_desc.isCCReg():
 917                         self.numCCDestRegs += 1
 918                     elif op_desc.isControlReg():
 919                         self.numMiscDestRegs += 1
 920             elif op_desc.isMem():
 921                 if self.memOperand:
 922                     error("Code block has more than one memory operand.")
 923                 self.memOperand = op_desc
 924
 925             # Check if this operand has read/write predication. If true, then
 926             # the microop will dynamically index source/dest registers.
 927             self.predRead = self.predRead or op_desc.hasReadPred()
 928             self.predWrite = self.predWrite or op_desc.hasWritePred()
 929
 930         if parser.maxInstSrcRegs < self.numSrcRegs:
 931             parser.maxInstSrcRegs = self.numSrcRegs
 932         if parser.maxInstDestRegs < self.numDestRegs:
 933             parser.maxInstDestRegs = self.numDestRegs
 934         if parser.maxMiscDestRegs < self.numMiscDestRegs:
 935             parser.maxMiscDestRegs = self.numMiscDestRegs
 936
 937         # now make a final pass to finalize op_desc fields that may depend
 938         # on the register enumeration
 939         for op_desc in self.items:
 940             op_desc.finalize(self.predRead, self.predWrite)
 941
 942     def __len__(self):
 943         return len(self.items)
 944
 945     def __getitem__(self, index):
 946         return self.items[index]
 947
 948     def append(self, op_desc):
 949         self.items.append(op_desc)
 950         self.bases[op_desc.base_name] = op_desc
 951
 952     def find_base(self, base_name):
 953         # like self.bases[base_name], but returns None if not found
 954         # (rather than raising exception)
 955         return self.bases.get(base_name)
 956
 957     # internal helper function for concat[Some]Attr{Strings|Lists}
 958     def __internalConcatAttrs(self, attr_name, filter, result):
 959         for op_desc in self.items:
 960             if filter(op_desc):
 961                 result += getattr(op_desc, attr_name)
 962         return result
 963
 964     # return a single string that is the concatenation of the (string)
 965     # values of the specified attribute for all operands
 966     def concatAttrStrings(self, attr_name):
 967         return self.__internalConcatAttrs(attr_name, lambda x: 1, '')
 968
 969     # like concatAttrStrings, but only include the values for the operands
 970     # for which the provided filter function returns true
 971     def concatSomeAttrStrings(self, filter, attr_name):
 972         return self.__internalConcatAttrs(attr_name, filter, '')
 973
 974     # return a single list that is the concatenation of the (list)
 975     # values of the specified attribute for all operands
 976     def concatAttrLists(self, attr_name):
 977         return self.__internalConcatAttrs(attr_name, lambda x: 1, [])
 978
 979     # like concatAttrLists, but only include the values for the operands
 980     # for which the provided filter function returns true
 981     def concatSomeAttrLists(self, filter, attr_name):
 982         return self.__internalConcatAttrs(attr_name, filter, [])
 983
 984     def sort(self):
 985         self.items.sort(lambda a, b: a.sort_pri - b.sort_pri)
 986
 987 class SubOperandList(OperandList):
 988     '''Find all the operands in the given code block.  Returns an operand
 989     descriptor list (instance of class OperandList).'''
 990     def __init__(self, parser, code, master_list):
 991         self.items = []
 992         self.bases = {}
 993         # delete strings and comments so we don't match on operands inside
 994         for regEx in (stringRE, commentRE):
 995             code = regEx.sub('', code)
 996         # search for operands
 997         next_pos = 0
 998         while 1:
 999             match = parser.operandsRE.search(code, next_pos)
1000             if not match:
1001                 # no more matches: we're done
1002                 break
1003             op = match.groups()
1004             # regexp groups are operand full name, base, and extension
1005             (op_full, op_base, op_ext) = op
1006             # find this op in the master list
1007             op_desc = master_list.find_base(op_base)
1008             if not op_desc:
1009                 error('Found operand %s which is not in the master list!' \
1010                       ' This is an internal error' % op_base)
1011             else:
1012                 # See if we've already found this operand
1013                 op_desc = self.find_base(op_base)
1014                 if not op_desc:
1015                     # if not, add a reference to it to this sub list
1016                     self.append(master_list.bases[op_base])
1017
1018             # start next search after end of current match
1019             next_pos = match.end()
1020         self.sort()
1021         self.memOperand = None
1022         # Whether the whole PC needs to be read so parts of it can be accessed
1023         self.readPC = False
1024         # Whether the whole PC needs to be written after parts of it were
1025         # changed
1026         self.setPC = False
1027         # Whether this instruction manipulates the whole PC or parts of it.
1028         # Mixing the two is a bad idea and flagged as an error.
1029         self.pcPart = None
1030
1031         # Flags to keep track if one or more operands are to be read/written
1032         # conditionally.
1033         self.predRead = False
1034         self.predWrite = False
1035
1036         for op_desc in self.items:
1037             if op_desc.isPCPart():
1038                 self.readPC = True
1039                 if op_desc.is_dest:
1040                     self.setPC = True
1041
1042             if op_desc.isPCState():
1043                 if self.pcPart is not None:
1044                     if self.pcPart and not op_desc.isPCPart() or \
1045                             not self.pcPart and op_desc.isPCPart():
1046                         error("Mixed whole and partial PC state operands.")
1047                 self.pcPart = op_desc.isPCPart()
1048
1049             if op_desc.isMem():
1050                 if self.memOperand:
1051                     error("Code block has more than one memory operand.")
1052                 self.memOperand = op_desc
1053
1054             # Check if this operand has read/write predication. If true, then
1055             # the microop will dynamically index source/dest registers.
1056             self.predRead = self.predRead or op_desc.hasReadPred()
1057             self.predWrite = self.predWrite or op_desc.hasWritePred()
1058
1059 # Regular expression object to match C++ strings
1060 stringRE = re.compile(r'"([^"\\]|\\.)*"')
1061
1062 # Regular expression object to match C++ comments
1063 # (used in findOperands())
1064 commentRE = re.compile(r'(^)?[^\S\n]*/(?:\*(.*?)\*/[^\S\n]*|/[^\n]*)($)?',
1065         re.DOTALL | re.MULTILINE)
1066
1067 # Regular expression object to match assignment statements
1068 # (used in findOperands())
1069 assignRE = re.compile(r'\s*=(?!=)', re.MULTILINE)
1070
1071 def makeFlagConstructor(flag_list):
1072     if len(flag_list) == 0:
1073         return ''
1074     # filter out repeated flags
1075     flag_list.sort()
1076     i = 1
1077     while i < len(flag_list):
1078         if flag_list[i] == flag_list[i-1]:
1079             del flag_list[i]
1080         else:
1081             i += 1
1082     pre = '\n\tflags['
1083     post = '] = true;'
1084     code = pre + string.join(flag_list, post + pre) + post
1085     return code
1086
1087 # Assume all instruction flags are of the form 'IsFoo'
1088 instFlagRE = re.compile(r'Is.*')
1089
1090 # OpClass constants end in 'Op' except No_OpClass
1091 opClassRE = re.compile(r'.*Op|No_OpClass')
1092
1093 class InstObjParams(object):
1094     def __init__(self, parser, mnem, class_name, base_class = '',
1095                  snippets = {}, opt_args = []):
1096         self.mnemonic = mnem
1097         self.class_name = class_name
1098         self.base_class = base_class
1099         if not isinstance(snippets, dict):
1100             snippets = {'code' : snippets}
1101         compositeCode = ' '.join(map(str, snippets.values()))
1102         self.snippets = snippets
1103
1104         self.operands = OperandList(parser, compositeCode)
1105
1106         # The header of the constructor declares the variables to be used
1107         # in the body of the constructor.
1108         header = ''
1109         header += '\n\t_numSrcRegs = 0;'
1110         header += '\n\t_numDestRegs = 0;'
1111         header += '\n\t_numFPDestRegs = 0;'
1112         header += '\n\t_numIntDestRegs = 0;'
1113         header += '\n\t_numCCDestRegs = 0;'
1114
1115         self.constructor = header + \
1116                            self.operands.concatAttrStrings('constructor')
1117
1118         self.flags = self.operands.concatAttrLists('flags')
1119
1120         # Make a basic guess on the operand class (function unit type).
1121         # These are good enough for most cases, and can be overridden
1122         # later otherwise.
1123         if 'IsStore' in self.flags:
1124             self.op_class = 'MemWriteOp'
1125         elif 'IsLoad' in self.flags or 'IsPrefetch' in self.flags:
1126             self.op_class = 'MemReadOp'
1127         elif 'IsFloating' in self.flags:
1128             self.op_class = 'FloatAddOp'
1129         else:
1130             self.op_class = 'IntAluOp'
1131
1132         # Optional arguments are assumed to be either StaticInst flags
1133         # or an OpClass value.  To avoid having to import a complete
1134         # list of these values to match against, we do it ad-hoc
1135         # with regexps.
1136         for oa in opt_args:
1137             if instFlagRE.match(oa):
1138                 self.flags.append(oa)
1139             elif opClassRE.match(oa):
1140                 self.op_class = oa
1141             else:
1142                 error('InstObjParams: optional arg "%s" not recognized '
1143                       'as StaticInst::Flag or OpClass.' % oa)
1144
1145         # add flag initialization to contructor here to include
1146         # any flags added via opt_args
1147         self.constructor += makeFlagConstructor(self.flags)
1148
1149         # if 'IsFloating' is set, add call to the FP enable check
1150         # function (which should be provided by isa_desc via a declare)
1151         if 'IsFloating' in self.flags:
1152             self.fp_enable_check = 'fault = checkFpEnableFault(xc);'
1153         else:
1154             self.fp_enable_check = ''
1155
1156 ##############
1157 # Stack: a simple stack object.  Used for both formats (formatStack)
1158 # and default cases (defaultStack).  Simply wraps a list to give more
1159 # stack-like syntax and enable initialization with an argument list
1160 # (as opposed to an argument that's a list).
1161
1162 class Stack(list):
1163     def __init__(self, *items):
1164         list.__init__(self, items)
1165
1166     def push(self, item):
1167         self.append(item);
1168
1169     def top(self):
1170         return self[-1]
1171
1172 #######################
1173 #
1174 # Output file template
1175 #
1176
1177 file_template = '''
1178 /*
1179  * DO NOT EDIT THIS FILE!!!
1180  *
1181  * It was automatically generated from the ISA description in %(filename)s
1182  */
1183
1184 %(includes)s
1185
1186 %(global_output)s
1187
1188 namespace %(namespace)s {
1189
1190 %(namespace_output)s
1191
1192 } // namespace %(namespace)s
1193
1194 %(decode_function)s
1195 '''
1196
1197 max_inst_regs_template = '''
1198 /*
1199  * DO NOT EDIT THIS FILE!!!
1200  *
1201  * It was automatically generated from the ISA description in %(filename)s
1202  */
1203
1204 namespace %(namespace)s {
1205
1206     const int MaxInstSrcRegs = %(MaxInstSrcRegs)d;
1207     const int MaxInstDestRegs = %(MaxInstDestRegs)d;
1208     const int MaxMiscDestRegs = %(MaxMiscDestRegs)d;
1209
1210 } // namespace %(namespace)s
1211
1212 '''
1213
1214 class ISAParser(Grammar):
1215     def __init__(self, output_dir, cpu_models):
1216         super(ISAParser, self).__init__()
1217         self.output_dir = output_dir
1218
1219         self.cpuModels = cpu_models
1220
1221         # variable to hold templates
1222         self.templateMap = {}
1223
1224         # This dictionary maps format name strings to Format objects.
1225         self.formatMap = {}
1226
1227         # The format stack.
1228         self.formatStack = Stack(NoFormat())
1229
1230         # The default case stack.
1231         self.defaultStack = Stack(None)
1232
1233         # Stack that tracks current file and line number.  Each
1234         # element is a tuple (filename, lineno) that records the
1235         # *current* filename and the line number in the *previous*
1236         # file where it was included.
1237         self.fileNameStack = Stack()
1238
1239         symbols = ('makeList', 're', 'string')
1240         self.exportContext = dict([(s, eval(s)) for s in symbols])
1241
1242         self.maxInstSrcRegs = 0
1243         self.maxInstDestRegs = 0
1244         self.maxMiscDestRegs = 0
1245
1246     #####################################################################
1247     #
1248     #                                Lexer
1249     #
1250     # The PLY lexer module takes two things as input:
1251     # - A list of token names (the string list 'tokens')
1252     # - A regular expression describing a match for each token.  The
1253     #   regexp for token FOO can be provided in two ways:
1254     #   - as a string variable named t_FOO
1255     #   - as the doc string for a function named t_FOO.  In this case,
1256     #     the function is also executed, allowing an action to be
1257     #     associated with each token match.
1258     #
1259     #####################################################################
1260
1261     # Reserved words.  These are listed separately as they are matched
1262     # using the same regexp as generic IDs, but distinguished in the
1263     # t_ID() function.  The PLY documentation suggests this approach.
1264     reserved = (
1265         'BITFIELD', 'DECODE', 'DECODER', 'DEFAULT', 'DEF', 'EXEC', 'FORMAT',
1266         'HEADER', 'LET', 'NAMESPACE', 'OPERAND_TYPES', 'OPERANDS',
1267         'OUTPUT', 'SIGNED', 'TEMPLATE'
1268         )
1269
1270     # List of tokens.  The lex module requires this.
1271     tokens = reserved + (
1272         # identifier
1273         'ID',
1274
1275         # integer literal
1276         'INTLIT',
1277
1278         # string literal
1279         'STRLIT',
1280
1281         # code literal
1282         'CODELIT',
1283
1284         # ( ) [ ] { } < > , ; . : :: *
1285         'LPAREN', 'RPAREN',
1286         'LBRACKET', 'RBRACKET',
1287         'LBRACE', 'RBRACE',
1288         'LESS', 'GREATER', 'EQUALS',
1289         'COMMA', 'SEMI', 'DOT', 'COLON', 'DBLCOLON',
1290         'ASTERISK',
1291
1292         # C preprocessor directives
1293         'CPPDIRECTIVE'
1294
1295     # The following are matched but never returned. commented out to
1296     # suppress PLY warning
1297         # newfile directive
1298     #    'NEWFILE',
1299
1300         # endfile directive
1301     #    'ENDFILE'
1302     )
1303
1304     # Regular expressions for token matching
1305     t_LPAREN           = r'\('
1306     t_RPAREN           = r'\)'
1307     t_LBRACKET         = r'\['
1308     t_RBRACKET         = r'\]'
1309     t_LBRACE           = r'\{'
1310     t_RBRACE           = r'\}'
1311     t_LESS             = r'\<'
1312     t_GREATER          = r'\>'
1313     t_EQUALS           = r'='
1314     t_COMMA            = r','
1315     t_SEMI             = r';'
1316     t_DOT              = r'\.'
1317     t_COLON            = r':'
1318     t_DBLCOLON         = r'::'
1319     t_ASTERISK         = r'\*'
1320
1321     # Identifiers and reserved words
1322     reserved_map = { }
1323     for r in reserved:
1324         reserved_map[r.lower()] = r
1325
1326     def t_ID(self, t):
1327         r'[A-Za-z_]\w*'
1328         t.type = self.reserved_map.get(t.value, 'ID')
1329         return t
1330
1331     # Integer literal
1332     def t_INTLIT(self, t):
1333         r'-?(0x[\da-fA-F]+)|\d+'
1334         try:
1335             t.value = int(t.value,0)
1336         except ValueError:
1337             error(t, 'Integer value "%s" too large' % t.value)
1338             t.value = 0
1339         return t
1340
1341     # String literal.  Note that these use only single quotes, and
1342     # can span multiple lines.
1343     def t_STRLIT(self, t):
1344         r"(?m)'([^'])+'"
1345         # strip off quotes
1346         t.value = t.value[1:-1]
1347         t.lexer.lineno += t.value.count('\n')
1348         return t
1349
1350
1351     # "Code literal"... like a string literal, but delimiters are
1352     # '{{' and '}}' so they get formatted nicely under emacs c-mode
1353     def t_CODELIT(self, t):
1354         r"(?m)\{\{([^\}]|}(?!\}))+\}\}"
1355         # strip off {{ & }}
1356         t.value = t.value[2:-2]
1357         t.lexer.lineno += t.value.count('\n')
1358         return t
1359
1360     def t_CPPDIRECTIVE(self, t):
1361         r'^\#[^\#].*\n'
1362         t.lexer.lineno += t.value.count('\n')
1363         return t
1364
1365     def t_NEWFILE(self, t):
1366         r'^\#\#newfile\s+"[^"]*"'
1367         self.fileNameStack.push((t.value[11:-1], t.lexer.lineno))
1368         t.lexer.lineno = 0
1369
1370     def t_ENDFILE(self, t):
1371         r'^\#\#endfile'
1372         (old_filename, t.lexer.lineno) = self.fileNameStack.pop()
1373
1374     #
1375     # The functions t_NEWLINE, t_ignore, and t_error are
1376     # special for the lex module.
1377     #
1378
1379     # Newlines
1380     def t_NEWLINE(self, t):
1381         r'\n+'
1382         t.lexer.lineno += t.value.count('\n')
1383
1384     # Comments
1385     def t_comment(self, t):
1386         r'//.*'
1387
1388     # Completely ignored characters
1389     t_ignore = ' \t\x0c'
1390
1391     # Error handler
1392     def t_error(self, t):
1393         error(t, "illegal character '%s'" % t.value[0])
1394         t.skip(1)
1395
1396     #####################################################################
1397     #
1398     #                                Parser
1399     #
1400     # Every function whose name starts with 'p_' defines a grammar
1401     # rule.  The rule is encoded in the function's doc string, while
1402     # the function body provides the action taken when the rule is
1403     # matched.  The argument to each function is a list of the values
1404     # of the rule's symbols: t[0] for the LHS, and t[1..n] for the
1405     # symbols on the RHS.  For tokens, the value is copied from the
1406     # t.value attribute provided by the lexer.  For non-terminals, the
1407     # value is assigned by the producing rule; i.e., the job of the
1408     # grammar rule function is to set the value for the non-terminal
1409     # on the LHS (by assigning to t[0]).
1410     #####################################################################
1411
1412     # The LHS of the first grammar rule is used as the start symbol
1413     # (in this case, 'specification').  Note that this rule enforces
1414     # that there will be exactly one namespace declaration, with 0 or
1415     # more global defs/decls before and after it.  The defs & decls
1416     # before the namespace decl will be outside the namespace; those
1417     # after will be inside.  The decoder function is always inside the
1418     # namespace.
1419     def p_specification(self, t):
1420         'specification : opt_defs_and_outputs name_decl opt_defs_and_outputs decode_block'
1421         global_code = t[1]
1422         isa_name = t[2]
1423         namespace = isa_name + "Inst"
1424         # wrap the decode block as a function definition
1425         t[4].wrap_decode_block('''
1426 StaticInstPtr
1427 %(isa_name)s::Decoder::decodeInst(%(isa_name)s::ExtMachInst machInst)
1428 {
1429     using namespace %(namespace)s;
1430 ''' % vars(), '}')
1431         # both the latter output blocks and the decode block are in
1432         # the namespace
1433         namespace_code = t[3] + t[4]
1434         # pass it all back to the caller of yacc.parse()
1435         t[0] = (isa_name, namespace, global_code, namespace_code)
1436
1437     # ISA name declaration looks like "namespace <foo>;"
1438     def p_name_decl(self, t):
1439         'name_decl : NAMESPACE ID SEMI'
1440         t[0] = t[2]
1441
1442     # 'opt_defs_and_outputs' is a possibly empty sequence of
1443     # def and/or output statements.
1444     def p_opt_defs_and_outputs_0(self, t):
1445         'opt_defs_and_outputs : empty'
1446         t[0] = GenCode(self)
1447
1448     def p_opt_defs_and_outputs_1(self, t):
1449         'opt_defs_and_outputs : defs_and_outputs'
1450         t[0] = t[1]
1451
1452     def p_defs_and_outputs_0(self, t):
1453         'defs_and_outputs : def_or_output'
1454         t[0] = t[1]
1455
1456     def p_defs_and_outputs_1(self, t):
1457         'defs_and_outputs : defs_and_outputs def_or_output'
1458         t[0] = t[1] + t[2]
1459
1460     # The list of possible definition/output statements.
1461     def p_def_or_output(self, t):
1462         '''def_or_output : def_format
1463                          | def_bitfield
1464                          | def_bitfield_struct
1465                          | def_template
1466                          | def_operand_types
1467                          | def_operands
1468                          | output_header
1469                          | output_decoder
1470                          | output_exec
1471                          | global_let'''
1472         t[0] = t[1]
1473
1474     # Output blocks 'output <foo> {{...}}' (C++ code blocks) are copied
1475     # directly to the appropriate output section.
1476
1477     # Massage output block by substituting in template definitions and
1478     # bit operators.  We handle '%'s embedded in the string that don't
1479     # indicate template substitutions (or CPU-specific symbols, which
1480     # get handled in GenCode) by doubling them first so that the
1481     # format operation will reduce them back to single '%'s.
1482     def process_output(self, s):
1483         s = self.protectNonSubstPercents(s)
1484         # protects cpu-specific symbols too
1485         s = self.protectCpuSymbols(s)
1486         return substBitOps(s % self.templateMap)
1487
1488     def p_output_header(self, t):
1489         'output_header : OUTPUT HEADER CODELIT SEMI'
1490         t[0] = GenCode(self, header_output = self.process_output(t[3]))
1491
1492     def p_output_decoder(self, t):
1493         'output_decoder : OUTPUT DECODER CODELIT SEMI'
1494         t[0] = GenCode(self, decoder_output = self.process_output(t[3]))
1495
1496     def p_output_exec(self, t):
1497         'output_exec : OUTPUT EXEC CODELIT SEMI'
1498         t[0] = GenCode(self, exec_output = self.process_output(t[3]))
1499
1500     # global let blocks 'let {{...}}' (Python code blocks) are
1501     # executed directly when seen.  Note that these execute in a
1502     # special variable context 'exportContext' to prevent the code
1503     # from polluting this script's namespace.
1504     def p_global_let(self, t):
1505         'global_let : LET CODELIT SEMI'
1506         self.updateExportContext()
1507         self.exportContext["header_output"] = ''
1508         self.exportContext["decoder_output"] = ''
1509         self.exportContext["exec_output"] = ''
1510         self.exportContext["decode_block"] = ''
1511         try:
1512             exec fixPythonIndentation(t[2]) in self.exportContext
1513         except Exception, exc:
1514             if debug:
1515                 raise
1516             error(t, 'error: %s in global let block "%s".' % (exc, t[2]))
1517         t[0] = GenCode(self,
1518                        header_output=self.exportContext["header_output"],
1519                        decoder_output=self.exportContext["decoder_output"],
1520                        exec_output=self.exportContext["exec_output"],
1521                        decode_block=self.exportContext["decode_block"])
1522
1523     # Define the mapping from operand type extensions to C++ types and
1524     # bit widths (stored in operandTypeMap).
1525     def p_def_operand_types(self, t):
1526         'def_operand_types : DEF OPERAND_TYPES CODELIT SEMI'
1527         try:
1528             self.operandTypeMap = eval('{' + t[3] + '}')
1529         except Exception, exc:
1530             if debug:
1531                 raise
1532             error(t,
1533                   'error: %s in def operand_types block "%s".' % (exc, t[3]))
1534         t[0] = GenCode(self) # contributes nothing to the output C++ file
1535
1536     # Define the mapping from operand names to operand classes and
1537     # other traits.  Stored in operandNameMap.
1538     def p_def_operands(self, t):
1539         'def_operands : DEF OPERANDS CODELIT SEMI'
1540         if not hasattr(self, 'operandTypeMap'):
1541             error(t, 'error: operand types must be defined before operands')
1542         try:
1543             user_dict = eval('{' + t[3] + '}', self.exportContext)
1544         except Exception, exc:
1545             if debug:
1546                 raise
1547             error(t, 'error: %s in def operands block "%s".' % (exc, t[3]))
1548         self.buildOperandNameMap(user_dict, t.lexer.lineno)
1549         t[0] = GenCode(self) # contributes nothing to the output C++ file
1550
1551     # A bitfield definition looks like:
1552     # 'def [signed] bitfield <ID> [<first>:<last>]'
1553     # This generates a preprocessor macro in the output file.
1554     def p_def_bitfield_0(self, t):
1555         'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT COLON INTLIT GREATER SEMI'
1556         expr = 'bits(machInst, %2d, %2d)' % (t[6], t[8])
1557         if (t[2] == 'signed'):
1558             expr = 'sext<%d>(%s)' % (t[6] - t[8] + 1, expr)
1559         hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1560         t[0] = GenCode(self, header_output=hash_define)
1561
1562     # alternate form for single bit: 'def [signed] bitfield <ID> [<bit>]'
1563     def p_def_bitfield_1(self, t):
1564         'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT GREATER SEMI'
1565         expr = 'bits(machInst, %2d, %2d)' % (t[6], t[6])
1566         if (t[2] == 'signed'):
1567             expr = 'sext<%d>(%s)' % (1, expr)
1568         hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1569         t[0] = GenCode(self, header_output=hash_define)
1570
1571     # alternate form for structure member: 'def bitfield <ID> <ID>'
1572     def p_def_bitfield_struct(self, t):
1573         'def_bitfield_struct : DEF opt_signed BITFIELD ID id_with_dot SEMI'
1574         if (t[2] != ''):
1575             error(t, 'error: structure bitfields are always unsigned.')
1576         expr = 'machInst.%s' % t[5]
1577         hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1578         t[0] = GenCode(self, header_output=hash_define)
1579
1580     def p_id_with_dot_0(self, t):
1581         'id_with_dot : ID'
1582         t[0] = t[1]
1583
1584     def p_id_with_dot_1(self, t):
1585         'id_with_dot : ID DOT id_with_dot'
1586         t[0] = t[1] + t[2] + t[3]
1587
1588     def p_opt_signed_0(self, t):
1589         'opt_signed : SIGNED'
1590         t[0] = t[1]
1591
1592     def p_opt_signed_1(self, t):
1593         'opt_signed : empty'
1594         t[0] = ''
1595
1596     def p_def_template(self, t):
1597         'def_template : DEF TEMPLATE ID CODELIT SEMI'
1598         self.templateMap[t[3]] = Template(self, t[4])
1599         t[0] = GenCode(self)
1600
1601     # An instruction format definition looks like
1602     # "def format <fmt>(<params>) {{...}};"
1603     def p_def_format(self, t):
1604         'def_format : DEF FORMAT ID LPAREN param_list RPAREN CODELIT SEMI'
1605         (id, params, code) = (t[3], t[5], t[7])
1606         self.defFormat(id, params, code, t.lexer.lineno)
1607         t[0] = GenCode(self)
1608
1609     # The formal parameter list for an instruction format is a
1610     # possibly empty list of comma-separated parameters.  Positional
1611     # (standard, non-keyword) parameters must come first, followed by
1612     # keyword parameters, followed by a '*foo' parameter that gets
1613     # excess positional arguments (as in Python).  Each of these three
1614     # parameter categories is optional.
1615     #
1616     # Note that we do not support the '**foo' parameter for collecting
1617     # otherwise undefined keyword args.  Otherwise the parameter list
1618     # is (I believe) identical to what is supported in Python.
1619     #
1620     # The param list generates a tuple, where the first element is a
1621     # list of the positional params and the second element is a dict
1622     # containing the keyword params.
1623     def p_param_list_0(self, t):
1624         'param_list : positional_param_list COMMA nonpositional_param_list'
1625         t[0] = t[1] + t[3]
1626
1627     def p_param_list_1(self, t):
1628         '''param_list : positional_param_list
1629                       | nonpositional_param_list'''
1630         t[0] = t[1]
1631
1632     def p_positional_param_list_0(self, t):
1633         'positional_param_list : empty'
1634         t[0] = []
1635
1636     def p_positional_param_list_1(self, t):
1637         'positional_param_list : ID'
1638         t[0] = [t[1]]
1639
1640     def p_positional_param_list_2(self, t):
1641         'positional_param_list : positional_param_list COMMA ID'
1642         t[0] = t[1] + [t[3]]
1643
1644     def p_nonpositional_param_list_0(self, t):
1645         'nonpositional_param_list : keyword_param_list COMMA excess_args_param'
1646         t[0] = t[1] + t[3]
1647
1648     def p_nonpositional_param_list_1(self, t):
1649         '''nonpositional_param_list : keyword_param_list
1650                                     | excess_args_param'''
1651         t[0] = t[1]
1652
1653     def p_keyword_param_list_0(self, t):
1654         'keyword_param_list : keyword_param'
1655         t[0] = [t[1]]
1656
1657     def p_keyword_param_list_1(self, t):
1658         'keyword_param_list : keyword_param_list COMMA keyword_param'
1659         t[0] = t[1] + [t[3]]
1660
1661     def p_keyword_param(self, t):
1662         'keyword_param : ID EQUALS expr'
1663         t[0] = t[1] + ' = ' + t[3].__repr__()
1664
1665     def p_excess_args_param(self, t):
1666         'excess_args_param : ASTERISK ID'
1667         # Just concatenate them: '*ID'.  Wrap in list to be consistent
1668         # with positional_param_list and keyword_param_list.
1669         t[0] = [t[1] + t[2]]
1670
1671     # End of format definition-related rules.
1672     ##############
1673
1674     #
1675     # A decode block looks like:
1676     #       decode <field1> [, <field2>]* [default <inst>] { ... }
1677     #
1678     def p_decode_block(self, t):
1679         'decode_block : DECODE ID opt_default LBRACE decode_stmt_list RBRACE'
1680         default_defaults = self.defaultStack.pop()
1681         codeObj = t[5]
1682         # use the "default defaults" only if there was no explicit
1683         # default statement in decode_stmt_list
1684         if not codeObj.has_decode_default:
1685             codeObj += default_defaults
1686         codeObj.wrap_decode_block('switch (%s) {\n' % t[2], '}\n')
1687         t[0] = codeObj
1688
1689     # The opt_default statement serves only to push the "default
1690     # defaults" onto defaultStack.  This value will be used by nested
1691     # decode blocks, and used and popped off when the current
1692     # decode_block is processed (in p_decode_block() above).
1693     def p_opt_default_0(self, t):
1694         'opt_default : empty'
1695         # no default specified: reuse the one currently at the top of
1696         # the stack
1697         self.defaultStack.push(self.defaultStack.top())
1698         # no meaningful value returned
1699         t[0] = None
1700
1701     def p_opt_default_1(self, t):
1702         'opt_default : DEFAULT inst'
1703         # push the new default
1704         codeObj = t[2]
1705         codeObj.wrap_decode_block('\ndefault:\n', 'break;\n')
1706         self.defaultStack.push(codeObj)
1707         # no meaningful value returned
1708         t[0] = None
1709
1710     def p_decode_stmt_list_0(self, t):
1711         'decode_stmt_list : decode_stmt'
1712         t[0] = t[1]
1713
1714     def p_decode_stmt_list_1(self, t):
1715         'decode_stmt_list : decode_stmt decode_stmt_list'
1716         if (t[1].has_decode_default and t[2].has_decode_default):
1717             error(t, 'Two default cases in decode block')
1718         t[0] = t[1] + t[2]
1719
1720     #
1721     # Decode statement rules
1722     #
1723     # There are four types of statements allowed in a decode block:
1724     # 1. Format blocks 'format <foo> { ... }'
1725     # 2. Nested decode blocks
1726     # 3. Instruction definitions.
1727     # 4. C preprocessor directives.
1728
1729
1730     # Preprocessor directives found in a decode statement list are
1731     # passed through to the output, replicated to all of the output
1732     # code streams.  This works well for ifdefs, so we can ifdef out
1733     # both the declarations and the decode cases generated by an
1734     # instruction definition.  Handling them as part of the grammar
1735     # makes it easy to keep them in the right place with respect to
1736     # the code generated by the other statements.
1737     def p_decode_stmt_cpp(self, t):
1738         'decode_stmt : CPPDIRECTIVE'
1739         t[0] = GenCode(self, t[1], t[1], t[1], t[1])
1740
1741     # A format block 'format <foo> { ... }' sets the default
1742     # instruction format used to handle instruction definitions inside
1743     # the block.  This format can be overridden by using an explicit
1744     # format on the instruction definition or with a nested format
1745     # block.
1746     def p_decode_stmt_format(self, t):
1747         'decode_stmt : FORMAT push_format_id LBRACE decode_stmt_list RBRACE'
1748         # The format will be pushed on the stack when 'push_format_id'
1749         # is processed (see below).  Once the parser has recognized
1750         # the full production (though the right brace), we're done
1751         # with the format, so now we can pop it.
1752         self.formatStack.pop()
1753         t[0] = t[4]
1754
1755     # This rule exists so we can set the current format (& push the
1756     # stack) when we recognize the format name part of the format
1757     # block.
1758     def p_push_format_id(self, t):
1759         'push_format_id : ID'
1760         try:
1761             self.formatStack.push(self.formatMap[t[1]])
1762             t[0] = ('', '// format %s' % t[1])
1763         except KeyError:
1764             error(t, 'instruction format "%s" not defined.' % t[1])
1765
1766     # Nested decode block: if the value of the current field matches
1767     # the specified constant, do a nested decode on some other field.
1768     def p_decode_stmt_decode(self, t):
1769         'decode_stmt : case_label COLON decode_block'
1770         label = t[1]
1771         codeObj = t[3]
1772         # just wrap the decoding code from the block as a case in the
1773         # outer switch statement.
1774         codeObj.wrap_decode_block('\n%s:\n' % label)
1775         codeObj.has_decode_default = (label == 'default')
1776         t[0] = codeObj
1777
1778     # Instruction definition (finally!).
1779     def p_decode_stmt_inst(self, t):
1780         'decode_stmt : case_label COLON inst SEMI'
1781         label = t[1]
1782         codeObj = t[3]
1783         codeObj.wrap_decode_block('\n%s:' % label, 'break;\n')
1784         codeObj.has_decode_default = (label == 'default')
1785         t[0] = codeObj
1786
1787     # The case label is either a list of one or more constants or
1788     # 'default'
1789     def p_case_label_0(self, t):
1790         'case_label : intlit_list'
1791         def make_case(intlit):
1792             if intlit >= 2**32:
1793                 return 'case ULL(%#x)' % intlit
1794             else:
1795                 return 'case %#x' % intlit
1796         t[0] = ': '.join(map(make_case, t[1]))
1797
1798     def p_case_label_1(self, t):
1799         'case_label : DEFAULT'
1800         t[0] = 'default'
1801
1802     #
1803     # The constant list for a decode case label must be non-empty, but
1804     # may have one or more comma-separated integer literals in it.
1805     #
1806     def p_intlit_list_0(self, t):
1807         'intlit_list : INTLIT'
1808         t[0] = [t[1]]
1809
1810     def p_intlit_list_1(self, t):
1811         'intlit_list : intlit_list COMMA INTLIT'
1812         t[0] = t[1]
1813         t[0].append(t[3])
1814
1815     # Define an instruction using the current instruction format
1816     # (specified by an enclosing format block).
1817     # "<mnemonic>(<args>)"
1818     def p_inst_0(self, t):
1819         'inst : ID LPAREN arg_list RPAREN'
1820         # Pass the ID and arg list to the current format class to deal with.
1821         currentFormat = self.formatStack.top()
1822         codeObj = currentFormat.defineInst(self, t[1], t[3], t.lexer.lineno)
1823         args = ','.join(map(str, t[3]))
1824         args = re.sub('(?m)^', '//', args)
1825         args = re.sub('^//', '', args)
1826         comment = '\n// %s::%s(%s)\n' % (currentFormat.id, t[1], args)
1827         codeObj.prepend_all(comment)
1828         t[0] = codeObj
1829
1830     # Define an instruction using an explicitly specified format:
1831     # "<fmt>::<mnemonic>(<args>)"
1832     def p_inst_1(self, t):
1833         'inst : ID DBLCOLON ID LPAREN arg_list RPAREN'
1834         try:
1835             format = self.formatMap[t[1]]
1836         except KeyError:
1837             error(t, 'instruction format "%s" not defined.' % t[1])
1838
1839         codeObj = format.defineInst(self, t[3], t[5], t.lexer.lineno)
1840         comment = '\n// %s::%s(%s)\n' % (t[1], t[3], t[5])
1841         codeObj.prepend_all(comment)
1842         t[0] = codeObj
1843
1844     # The arg list generates a tuple, where the first element is a
1845     # list of the positional args and the second element is a dict
1846     # containing the keyword args.
1847     def p_arg_list_0(self, t):
1848         'arg_list : positional_arg_list COMMA keyword_arg_list'
1849         t[0] = ( t[1], t[3] )
1850
1851     def p_arg_list_1(self, t):
1852         'arg_list : positional_arg_list'
1853         t[0] = ( t[1], {} )
1854
1855     def p_arg_list_2(self, t):
1856         'arg_list : keyword_arg_list'
1857         t[0] = ( [], t[1] )
1858
1859     def p_positional_arg_list_0(self, t):
1860         'positional_arg_list : empty'
1861         t[0] = []
1862
1863     def p_positional_arg_list_1(self, t):
1864         'positional_arg_list : expr'
1865         t[0] = [t[1]]
1866
1867     def p_positional_arg_list_2(self, t):
1868         'positional_arg_list : positional_arg_list COMMA expr'
1869         t[0] = t[1] + [t[3]]
1870
1871     def p_keyword_arg_list_0(self, t):
1872         'keyword_arg_list : keyword_arg'
1873         t[0] = t[1]
1874
1875     def p_keyword_arg_list_1(self, t):
1876         'keyword_arg_list : keyword_arg_list COMMA keyword_arg'
1877         t[0] = t[1]
1878         t[0].update(t[3])
1879
1880     def p_keyword_arg(self, t):
1881         'keyword_arg : ID EQUALS expr'
1882         t[0] = { t[1] : t[3] }
1883
1884     #
1885     # Basic expressions.  These constitute the argument values of
1886     # "function calls" (i.e. instruction definitions in the decode
1887     # block) and default values for formal parameters of format
1888     # functions.
1889     #
1890     # Right now, these are either strings, integers, or (recursively)
1891     # lists of exprs (using Python square-bracket list syntax).  Note
1892     # that bare identifiers are trated as string constants here (since
1893     # there isn't really a variable namespace to refer to).
1894     #
1895     def p_expr_0(self, t):
1896         '''expr : ID
1897                 | INTLIT
1898                 | STRLIT
1899                 | CODELIT'''
1900         t[0] = t[1]
1901
1902     def p_expr_1(self, t):
1903         '''expr : LBRACKET list_expr RBRACKET'''
1904         t[0] = t[2]
1905
1906     def p_list_expr_0(self, t):
1907         'list_expr : expr'
1908         t[0] = [t[1]]
1909
1910     def p_list_expr_1(self, t):
1911         'list_expr : list_expr COMMA expr'
1912         t[0] = t[1] + [t[3]]
1913
1914     def p_list_expr_2(self, t):
1915         'list_expr : empty'
1916         t[0] = []
1917
1918     #
1919     # Empty production... use in other rules for readability.
1920     #
1921     def p_empty(self, t):
1922         'empty :'
1923         pass
1924
1925     # Parse error handler.  Note that the argument here is the
1926     # offending *token*, not a grammar symbol (hence the need to use
1927     # t.value)
1928     def p_error(self, t):
1929         if t:
1930             error(t, "syntax error at '%s'" % t.value)
1931         else:
1932             error("unknown syntax error")
1933
1934     # END OF GRAMMAR RULES
1935
1936     def updateExportContext(self):
1937
1938         # create a continuation that allows us to grab the current parser
1939         def wrapInstObjParams(*args):
1940             return InstObjParams(self, *args)
1941         self.exportContext['InstObjParams'] = wrapInstObjParams
1942         self.exportContext.update(self.templateMap)
1943
1944     def defFormat(self, id, params, code, lineno):
1945         '''Define a new format'''
1946
1947         # make sure we haven't already defined this one
1948         if id in self.formatMap:
1949             error(lineno, 'format %s redefined.' % id)
1950
1951         # create new object and store in global map
1952         self.formatMap[id] = Format(id, params, code)
1953
1954     def expandCpuSymbolsToDict(self, template):
1955         '''Expand template with CPU-specific references into a
1956         dictionary with an entry for each CPU model name.  The entry
1957         key is the model name and the corresponding value is the
1958         template with the CPU-specific refs substituted for that
1959         model.'''
1960
1961         # Protect '%'s that don't go with CPU-specific terms
1962         t = re.sub(r'%(?!\(CPU_)', '%%', template)
1963         result = {}
1964         for cpu in self.cpuModels:
1965             result[cpu.name] = t % cpu.strings
1966         return result
1967
1968     def expandCpuSymbolsToString(self, template):
1969         '''*If* the template has CPU-specific references, return a
1970         single string containing a copy of the template for each CPU
1971         model with the corresponding values substituted in.  If the
1972         template has no CPU-specific references, it is returned
1973         unmodified.'''
1974
1975         if template.find('%(CPU_') != -1:
1976             return reduce(lambda x,y: x+y,
1977                           self.expandCpuSymbolsToDict(template).values())
1978         else:
1979             return template
1980
1981     def protectCpuSymbols(self, template):
1982         '''Protect CPU-specific references by doubling the
1983         corresponding '%'s (in preparation for substituting a different
1984         set of references into the template).'''
1985
1986         return re.sub(r'%(?=\(CPU_)', '%%', template)
1987
1988     def protectNonSubstPercents(self, s):
1989         '''Protect any non-dict-substitution '%'s in a format string
1990         (i.e. those not followed by '(')'''
1991
1992         return re.sub(r'%(?!\()', '%%', s)
1993
1994     def buildOperandNameMap(self, user_dict, lineno):
1995         operand_name = {}
1996         for op_name, val in user_dict.iteritems():
1997
1998             # Check if extra attributes have been specified.
1999             if len(val) > 9:
2000                 error(lineno, 'error: too many attributes for operand "%s"' %
2001                       base_cls_name)
2002
2003             # Pad val with None in case optional args are missing
2004             val += (None, None, None, None)
2005             base_cls_name, dflt_ext, reg_spec, flags, sort_pri, \
2006             read_code, write_code, read_predicate, write_predicate = val[:9]
2007
2008             # Canonical flag structure is a triple of lists, where each list
2009             # indicates the set of flags implied by this operand always, when
2010             # used as a source, and when used as a dest, respectively.
2011             # For simplicity this can be initialized using a variety of fairly
2012             # obvious shortcuts; we convert these to canonical form here.
2013             if not flags:
2014                 # no flags specified (e.g., 'None')
2015                 flags = ( [], [], [] )
2016             elif isinstance(flags, str):
2017                 # a single flag: assumed to be unconditional
2018                 flags = ( [ flags ], [], [] )
2019             elif isinstance(flags, list):
2020                 # a list of flags: also assumed to be unconditional
2021                 flags = ( flags, [], [] )
2022             elif isinstance(flags, tuple):
2023                 # it's a tuple: it should be a triple,
2024                 # but each item could be a single string or a list
2025                 (uncond_flags, src_flags, dest_flags) = flags
2026                 flags = (makeList(uncond_flags),
2027                          makeList(src_flags), makeList(dest_flags))
2028
2029             # Accumulate attributes of new operand class in tmp_dict
2030             tmp_dict = {}
2031             attrList = ['reg_spec', 'flags', 'sort_pri',
2032                         'read_code', 'write_code',
2033                         'read_predicate', 'write_predicate']
2034             if dflt_ext:
2035                 dflt_ctype = self.operandTypeMap[dflt_ext]
2036                 attrList.extend(['dflt_ctype', 'dflt_ext'])
2037             for attr in attrList:
2038                 tmp_dict[attr] = eval(attr)
2039             tmp_dict['base_name'] = op_name
2040
2041             # New class name will be e.g. "IntReg_Ra"
2042             cls_name = base_cls_name + '_' + op_name
2043             # Evaluate string arg to get class object.  Note that the
2044             # actual base class for "IntReg" is "IntRegOperand", i.e. we
2045             # have to append "Operand".
2046             try:
2047                 base_cls = eval(base_cls_name + 'Operand')
2048             except NameError:
2049                 error(lineno,
2050                       'error: unknown operand base class "%s"' % base_cls_name)
2051             # The following statement creates a new class called
2052             # <cls_name> as a subclass of <base_cls> with the attributes
2053             # in tmp_dict, just as if we evaluated a class declaration.
2054             operand_name[op_name] = type(cls_name, (base_cls,), tmp_dict)
2055
2056         self.operandNameMap = operand_name
2057
2058         # Define operand variables.
2059         operands = user_dict.keys()
2060         extensions = self.operandTypeMap.keys()
2061
2062         operandsREString = r'''
2063         (?<!\w)      # neg. lookbehind assertion: prevent partial matches
2064         ((%s)(?:_(%s))?)   # match: operand with optional '_' then suffix
2065         (?!\w)       # neg. lookahead assertion: prevent partial matches
2066         ''' % (string.join(operands, '|'), string.join(extensions, '|'))
2067
2068         self.operandsRE = re.compile(operandsREString, re.MULTILINE|re.VERBOSE)
2069
2070         # Same as operandsREString, but extension is mandatory, and only two
2071         # groups are returned (base and ext, not full name as above).
2072         # Used for subtituting '_' for '.' to make C++ identifiers.
2073         operandsWithExtREString = r'(?<!\w)(%s)_(%s)(?!\w)' \
2074             % (string.join(operands, '|'), string.join(extensions, '|'))
2075
2076         self.operandsWithExtRE = \
2077             re.compile(operandsWithExtREString, re.MULTILINE)
2078
2079     def substMungedOpNames(self, code):
2080         '''Munge operand names in code string to make legal C++
2081         variable names.  This means getting rid of the type extension
2082         if any.  Will match base_name attribute of Operand object.)'''
2083         return self.operandsWithExtRE.sub(r'\1', code)
2084
2085     def mungeSnippet(self, s):
2086         '''Fix up code snippets for final substitution in templates.'''
2087         if isinstance(s, str):
2088             return self.substMungedOpNames(substBitOps(s))
2089         else:
2090             return s
2091
2092     def update_if_needed(self, file, contents):
2093         '''Update the output file only if the new contents are
2094         different from the current contents.  Minimizes the files that
2095         need to be rebuilt after minor changes.'''
2096
2097         file = os.path.join(self.output_dir, file)
2098         update = False
2099         if os.access(file, os.R_OK):
2100             f = open(file, 'r')
2101             old_contents = f.read()
2102             f.close()
2103             if contents != old_contents:
2104                 os.remove(file) # in case it's write-protected
2105                 update = True
2106             else:
2107                 print 'File', file, 'is unchanged'
2108         else:
2109             update = True
2110         if update:
2111             f = open(file, 'w')
2112             f.write(contents)
2113             f.close()
2114
2115     # This regular expression matches '##include' directives
2116     includeRE = re.compile(r'^\s*##include\s+"(?P<filename>[^"]*)".*$',
2117                            re.MULTILINE)
2118
2119     def replace_include(self, matchobj, dirname):
2120         """Function to replace a matched '##include' directive with the
2121         contents of the specified file (with nested ##includes
2122         replaced recursively).  'matchobj' is an re match object
2123         (from a match of includeRE) and 'dirname' is the directory
2124         relative to which the file path should be resolved."""
2125
2126         fname = matchobj.group('filename')
2127         full_fname = os.path.normpath(os.path.join(dirname, fname))
2128         contents = '##newfile "%s"\n%s\n##endfile\n' % \
2129                    (full_fname, self.read_and_flatten(full_fname))
2130         return contents
2131
2132     def read_and_flatten(self, filename):
2133         """Read a file and recursively flatten nested '##include' files."""
2134
2135         current_dir = os.path.dirname(filename)
2136         try:
2137             contents = open(filename).read()
2138         except IOError:
2139             error('Error including file "%s"' % filename)
2140
2141         self.fileNameStack.push((filename, 0))
2142
2143         # Find any includes and include them
2144         def replace(matchobj):
2145             return self.replace_include(matchobj, current_dir)
2146         contents = self.includeRE.sub(replace, contents)
2147
2148         self.fileNameStack.pop()
2149         return contents
2150
2151     def _parse_isa_desc(self, isa_desc_file):
2152         '''Read in and parse the ISA description.'''
2153
2154         # Read file and (recursively) all included files into a string.
2155         # PLY requires that the input be in a single string so we have to
2156         # do this up front.
2157         isa_desc = self.read_and_flatten(isa_desc_file)
2158
2159         # Initialize filename stack with outer file.
2160         self.fileNameStack.push((isa_desc_file, 0))
2161
2162         # Parse it.
2163         (isa_name, namespace, global_code, namespace_code) = \
2164                    self.parse_string(isa_desc)
2165
2166         # grab the last three path components of isa_desc_file to put in
2167         # the output
2168         filename = '/'.join(isa_desc_file.split('/')[-3:])
2169
2170         # generate decoder.hh
2171         includes = '#include "base/bitfield.hh" // for bitfield support'
2172         global_output = global_code.header_output
2173         namespace_output = namespace_code.header_output
2174         decode_function = ''
2175         self.update_if_needed('decoder.hh', file_template % vars())
2176
2177         # generate decoder.cc
2178         includes = '#include "decoder.hh"'
2179         global_output = global_code.decoder_output
2180         namespace_output = namespace_code.decoder_output
2181         # namespace_output += namespace_code.decode_block
2182         decode_function = namespace_code.decode_block
2183         self.update_if_needed('decoder.cc', file_template % vars())
2184
2185         # generate per-cpu exec files
2186         for cpu in self.cpuModels:
2187             includes = '#include "decoder.hh"\n'
2188             includes += cpu.includes
2189             global_output = global_code.exec_output[cpu.name]
2190             namespace_output = namespace_code.exec_output[cpu.name]
2191             decode_function = ''
2192             self.update_if_needed(cpu.filename, file_template % vars())
2193
2194         # The variable names here are hacky, but this will creat local
2195         # variables which will be referenced in vars() which have the
2196         # value of the globals.
2197         MaxInstSrcRegs = self.maxInstSrcRegs
2198         MaxInstDestRegs = self.maxInstDestRegs
2199         MaxMiscDestRegs = self.maxMiscDestRegs
2200         # max_inst_regs.hh
2201         self.update_if_needed('max_inst_regs.hh',
2202                               max_inst_regs_template % vars())
2203
2204     def parse_isa_desc(self, *args, **kwargs):
2205         try:
2206             self._parse_isa_desc(*args, **kwargs)
2207         except ISAParserError, e:
2208             e.exit(self.fileNameStack)
2209
2210 # Called as script: get args from command line.
2211 # Args are: <path to cpu_models.py> <isa desc file> <output dir> <cpu models>
2212 if __name__ == '__main__':
2213     execfile(sys.argv[1])  # read in CpuModel definitions
2214     cpu_models = [CpuModel.dict[cpu] for cpu in sys.argv[4:]]
2215     ISAParser(sys.argv[3], cpu_models).parse_isa_desc(sys.argv[2])