ext/ply/ply/yacc.py

   1 #-----------------------------------------------------------------------------
   2 # ply: yacc.py
   3 #
   4 # Author(s): David M. Beazley (dave@dabeaz.com)
   5 #
   6 # Copyright (C) 2001-2007, David M. Beazley
   7 #
   8 # This library is free software; you can redistribute it and/or
   9 # modify it under the terms of the GNU Lesser General Public
  10 # License as published by the Free Software Foundation; either
  11 # version 2.1 of the License, or (at your option) any later version.
  12 #
  13 # This library is distributed in the hope that it will be useful,
  14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 # Lesser General Public License for more details.
  17 #
  18 # You should have received a copy of the GNU Lesser General Public
  19 # License along with this library; if not, write to the Free Software
  20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21 #
  22 # See the file COPYING for a complete copy of the LGPL.
  23 #
  24 #
  25 # This implements an LR parser that is constructed from grammar rules defined
  26 # as Python functions. The grammer is specified by supplying the BNF inside
  27 # Python documentation strings.  The inspiration for this technique was borrowed
  28 # from John Aycock's Spark parsing system.  PLY might be viewed as cross between
  29 # Spark and the GNU bison utility.
  30 #
  31 # The current implementation is only somewhat object-oriented. The
  32 # LR parser itself is defined in terms of an object (which allows multiple
  33 # parsers to co-exist).  However, most of the variables used during table
  34 # construction are defined in terms of global variables.  Users shouldn't
  35 # notice unless they are trying to define multiple parsers at the same
  36 # time using threads (in which case they should have their head examined).
  37 #
  38 # This implementation supports both SLR and LALR(1) parsing.  LALR(1)
  39 # support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
  40 # using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
  41 # Techniques, and Tools" (The Dragon Book).  LALR(1) has since been replaced
  42 # by the more efficient DeRemer and Pennello algorithm.
  43 #
  44 # :::::::: WARNING :::::::
  45 #
  46 # Construction of LR parsing tables is fairly complicated and expensive.
  47 # To make this module run fast, a *LOT* of work has been put into
  48 # optimization---often at the expensive of readability and what might
  49 # consider to be good Python "coding style."   Modify the code at your
  50 # own risk!
  51 # ----------------------------------------------------------------------------
  52
  53 __version__ = "2.3"
  54
  55 #-----------------------------------------------------------------------------
  56 #                     === User configurable parameters ===
  57 #
  58 # Change these to modify the default behavior of yacc (if you wish)
  59 #-----------------------------------------------------------------------------
  60
  61 yaccdebug   = 1                # Debugging mode.  If set, yacc generates a
  62                                # a 'parser.out' file in the current directory
  63
  64 debug_file  = 'parser.out'     # Default name of the debugging file
  65 tab_module  = 'parsetab'       # Default name of the table module
  66 default_lr  = 'LALR'           # Default LR table generation method
  67
  68 error_count = 3                # Number of symbols that must be shifted to leave recovery mode
  69
  70 import re, types, sys, cStringIO, md5, os.path
  71
  72 # Exception raised for yacc-related errors
  73 class YaccError(Exception):   pass
  74
  75 # Available instance types.  This is used when parsers are defined by a class.
  76 # it's a little funky because I want to preserve backwards compatibility
  77 # with Python 2.0 where types.ObjectType is undefined.
  78
  79 try:
  80    _INSTANCETYPE = (types.InstanceType, types.ObjectType)
  81 except AttributeError:
  82    _INSTANCETYPE = types.InstanceType
  83    class object: pass     # Note: needed if no new-style classes present
  84
  85 #-----------------------------------------------------------------------------
  86 #                        ===  LR Parsing Engine ===
  87 #
  88 # The following classes are used for the LR parser itself.  These are not
  89 # used during table construction and are independent of the actual LR
  90 # table generation algorithm
  91 #-----------------------------------------------------------------------------
  92
  93 # This class is used to hold non-terminal grammar symbols during parsing.
  94 # It normally has the following attributes set:
  95 #        .type       = Grammar symbol type
  96 #        .value      = Symbol value
  97 #        .lineno     = Starting line number
  98 #        .endlineno  = Ending line number (optional, set automatically)
  99 #        .lexpos     = Starting lex position
 100 #        .endlexpos  = Ending lex position (optional, set automatically)
 101
 102 class YaccSymbol(object):
 103     def __str__(self):    return self.type
 104     def __repr__(self):   return str(self)
 105
 106 # This class is a wrapper around the objects actually passed to each
 107 # grammar rule.   Index lookup and assignment actually assign the
 108 # .value attribute of the underlying YaccSymbol object.
 109 # The lineno() method returns the line number of a given
 110 # item (or 0 if not defined).   The linespan() method returns
 111 # a tuple of (startline,endline) representing the range of lines
 112 # for a symbol.  The lexspan() method returns a tuple (lexpos,endlexpos)
 113 # representing the range of positional information for a symbol.
 114
 115 class YaccProduction:
 116     def __init__(self,s,stack=None):
 117         self.slice = s
 118         self.pbstack = []
 119         self.stack = stack
 120     def __getitem__(self,n):
 121         if n >= 0: return self.slice[n].value
 122         else: return self.stack[n].value
 123
 124     def __setitem__(self,n,v):
 125         self.slice[n].value = v
 126
 127     def __getslice__(self,i,j):
 128         return [s.value for s in self.slice[i:j]]
 129
 130     def __len__(self):
 131         return len(self.slice)
 132
 133     def lineno(self,n):
 134         return getattr(self.slice[n],"lineno",0)
 135
 136     def linespan(self,n):
 137         startline = getattr(self.slice[n],"lineno",0)
 138         endline = getattr(self.slice[n],"endlineno",startline)
 139         return startline,endline
 140
 141     def lexpos(self,n):
 142         return getattr(self.slice[n],"lexpos",0)
 143
 144     def lexspan(self,n):
 145         startpos = getattr(self.slice[n],"lexpos",0)
 146         endpos = getattr(self.slice[n],"endlexpos",startpos)
 147         return startpos,endpos
 148
 149     def pushback(self,n):
 150         if n <= 0:
 151             raise ValueError, "Expected a positive value"
 152         if n > (len(self.slice)-1):
 153             raise ValueError, "Can't push %d tokens. Only %d are available." % (n,len(self.slice)-1)
 154         for i in range(0,n):
 155             self.pbstack.append(self.slice[-i-1])
 156
 157 # The LR Parsing engine.   This is defined as a class so that multiple parsers
 158 # can exist in the same process.  A user never instantiates this directly.
 159 # Instead, the global yacc() function should be used to create a suitable Parser
 160 # object.
 161
 162 class Parser:
 163     def __init__(self,magic=None):
 164
 165         # This is a hack to keep users from trying to instantiate a Parser
 166         # object directly.
 167
 168         if magic != "xyzzy":
 169             raise YaccError, "Can't instantiate Parser. Use yacc() instead."
 170
 171         # Reset internal state
 172         self.productions = None          # List of productions
 173         self.errorfunc   = None          # Error handling function
 174         self.action      = { }           # LR Action table
 175         self.goto        = { }           # LR goto table
 176         self.require     = { }           # Attribute require table
 177         self.method      = "Unknown LR"  # Table construction method used
 178
 179     def errok(self):
 180         self.errorok     = 1
 181
 182     def restart(self):
 183         del self.statestack[:]
 184         del self.symstack[:]
 185         sym = YaccSymbol()
 186         sym.type = '$end'
 187         self.symstack.append(sym)
 188         self.statestack.append(0)
 189
 190     def parse(self,input=None,lexer=None,debug=0,tracking=0):
 191         lookahead = None                 # Current lookahead symbol
 192         lookaheadstack = [ ]             # Stack of lookahead symbols
 193         actions = self.action            # Local reference to action table
 194         goto    = self.goto              # Local reference to goto table
 195         prod    = self.productions       # Local reference to production list
 196         pslice  = YaccProduction(None)   # Production object passed to grammar rules
 197         errorcount = 0                   # Used during error recovery
 198
 199         # If no lexer was given, we will try to use the lex module
 200         if not lexer:
 201             import lex
 202             lexer = lex.lexer
 203
 204         pslice.lexer = lexer
 205         pslice.parser = self
 206
 207         # If input was supplied, pass to lexer
 208         if input:
 209             lexer.input(input)
 210
 211         # Tokenize function
 212         get_token = lexer.token
 213
 214         statestack = [ ]                # Stack of parsing states
 215         self.statestack = statestack
 216         symstack   = [ ]                # Stack of grammar symbols
 217         self.symstack = symstack
 218
 219         pslice.stack = symstack         # Put in the production
 220         errtoken   = None               # Err token
 221
 222         # The start state is assumed to be (0,$end)
 223
 224         statestack.append(0)
 225         sym = YaccSymbol()
 226         sym.type = '$end'
 227         symstack.append(sym)
 228         state = 0
 229         while 1:
 230             # Get the next symbol on the input.  If a lookahead symbol
 231             # is already set, we just use that. Otherwise, we'll pull
 232             # the next token off of the lookaheadstack or from the lexer
 233             if debug > 1:
 234                 print 'state', state
 235             if not lookahead:
 236                 if not lookaheadstack:
 237                     lookahead = get_token()     # Get the next token
 238                 else:
 239                     lookahead = lookaheadstack.pop()
 240                 if not lookahead:
 241                     lookahead = YaccSymbol()
 242                     lookahead.type = '$end'
 243             if debug:
 244                 errorlead = ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()
 245
 246             # Check the action table
 247             ltype = lookahead.type
 248             t = actions[state].get(ltype)
 249
 250             if debug > 1:
 251                 print 'action', t
 252             if t is not None:
 253                 if t > 0:
 254                     # shift a symbol on the stack
 255                     if ltype == '$end':
 256                         # Error, end of input
 257                         sys.stderr.write("yacc: Parse error. EOF\n")
 258                         return
 259                     statestack.append(t)
 260                     state = t
 261                     if debug > 1:
 262                         sys.stderr.write("%-60s shift state %s\n" % (errorlead, t))
 263                     symstack.append(lookahead)
 264                     lookahead = None
 265
 266                     # Decrease error count on successful shift
 267                     if errorcount: errorcount -=1
 268                     continue
 269
 270                 if t < 0:
 271                     # reduce a symbol on the stack, emit a production
 272                     p = prod[-t]
 273                     pname = p.name
 274                     plen  = p.len
 275
 276                     # Get production function
 277                     sym = YaccSymbol()
 278                     sym.type = pname       # Production name
 279                     sym.value = None
 280                     if debug > 1:
 281                         sys.stderr.write("%-60s reduce %d\n" % (errorlead, -t))
 282
 283                     if plen:
 284                         targ = symstack[-plen-1:]
 285                         targ[0] = sym
 286                         if tracking:
 287                            t1 = targ[1]
 288                            sym.lineno = t1.lineno
 289                            sym.lexpos = t1.lexpos
 290                            t1 = targ[-1]
 291                            sym.endlineno = getattr(t1,"endlineno",t1.lineno)
 292                            sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
 293                         del symstack[-plen:]
 294                         del statestack[-plen:]
 295                     else:
 296                         if tracking:
 297                            sym.lineno = lexer.lineno
 298                            sym.lexpos = lexer.lexpos
 299                         targ = [ sym ]
 300                     pslice.slice = targ
 301
 302                     # Call the grammar rule with our special slice object
 303                     p.func(pslice)
 304
 305                     # If there was a pushback, put that on the stack
 306                     if pslice.pbstack:
 307                         lookaheadstack.append(lookahead)
 308                         for _t in pslice.pbstack:
 309                             lookaheadstack.append(_t)
 310                         lookahead = None
 311                         pslice.pbstack = []
 312
 313                     symstack.append(sym)
 314                     state = goto[statestack[-1]][pname]
 315                     statestack.append(state)
 316                     continue
 317
 318                 if t == 0:
 319                     n = symstack[-1]
 320                     return getattr(n,"value",None)
 321
 322             if t == None:
 323                 if debug:
 324                     sys.stderr.write(errorlead + "\n")
 325                 # We have some kind of parsing error here.  To handle
 326                 # this, we are going to push the current token onto
 327                 # the tokenstack and replace it with an 'error' token.
 328                 # If there are any synchronization rules, they may
 329                 # catch it.
 330                 #
 331                 # In addition to pushing the error token, we call call
 332                 # the user defined p_error() function if this is the
 333                 # first syntax error.  This function is only called if
 334                 # errorcount == 0.
 335                 if errorcount == 0 or self.errorok:
 336                     errorcount = error_count
 337                     self.errorok = 0
 338                     errtoken = lookahead
 339                     if errtoken.type == '$end':
 340                         errtoken = None               # End of file!
 341                     if self.errorfunc:
 342                         global errok,token,restart
 343                         errok = self.errok        # Set some special functions available in error recovery
 344                         token = get_token
 345                         restart = self.restart
 346                         tok = self.errorfunc(errtoken)
 347                         del errok, token, restart   # Delete special functions
 348
 349                         if self.errorok:
 350                             # User must have done some kind of panic
 351                             # mode recovery on their own.  The
 352                             # returned token is the next lookahead
 353                             lookahead = tok
 354                             errtoken = None
 355                             continue
 356                     else:
 357                         if errtoken:
 358                             if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
 359                             else: lineno = 0
 360                             if lineno:
 361                                 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
 362                             else:
 363                                 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
 364                         else:
 365                             sys.stderr.write("yacc: Parse error in input. EOF\n")
 366                             return
 367
 368                 else:
 369                     errorcount = error_count
 370
 371                 # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
 372                 # entire parse has been rolled back and we're completely hosed.   The token is
 373                 # discarded and we just keep going.
 374
 375                 if len(statestack) <= 1 and lookahead.type != '$end':
 376                     lookahead = None
 377                     errtoken = None
 378                     # Nuke the pushback stack
 379                     del lookaheadstack[:]
 380                     continue
 381
 382                 # case 2: the statestack has a couple of entries on it, but we're
 383                 # at the end of the file. nuke the top entry and generate an error token
 384
 385                 # Start nuking entries on the stack
 386                 if lookahead.type == '$end':
 387                     # Whoa. We're really hosed here. Bail out
 388                     return
 389
 390                 if lookahead.type != 'error':
 391                     sym = symstack[-1]
 392                     if sym.type == 'error':
 393                         # Hmmm. Error is on top of stack, we'll just nuke input
 394                         # symbol and continue
 395                         lookahead = None
 396                         continue
 397                     t = YaccSymbol()
 398                     t.type = 'error'
 399                     if hasattr(lookahead,"lineno"):
 400                         t.lineno = lookahead.lineno
 401                     t.value = lookahead
 402                     lookaheadstack.append(lookahead)
 403                     lookahead = t
 404                 else:
 405                     symstack.pop()
 406                     statestack.pop()
 407
 408                 continue
 409
 410             # Call an error function here
 411             raise RuntimeError, "yacc: internal parser error!!!\n"
 412
 413 # -----------------------------------------------------------------------------
 414 #                          === Parser Construction ===
 415 #
 416 # The following functions and variables are used to implement the yacc() function
 417 # itself.   This is pretty hairy stuff involving lots of error checking,
 418 # construction of LR items, kernels, and so forth.   Although a lot of
 419 # this work is done using global variables, the resulting Parser object
 420 # is completely self contained--meaning that it is safe to repeatedly
 421 # call yacc() with different grammars in the same application.
 422 # -----------------------------------------------------------------------------
 423
 424 # -----------------------------------------------------------------------------
 425 # validate_file()
 426 #
 427 # This function checks to see if there are duplicated p_rulename() functions
 428 # in the parser module file.  Without this function, it is really easy for
 429 # users to make mistakes by cutting and pasting code fragments (and it's a real
 430 # bugger to try and figure out why the resulting parser doesn't work).  Therefore,
 431 # we just do a little regular expression pattern matching of def statements
 432 # to try and detect duplicates.
 433 # -----------------------------------------------------------------------------
 434
 435 def validate_file(filename):
 436     base,ext = os.path.splitext(filename)
 437     if ext != '.py': return 1          # No idea. Assume it's okay.
 438
 439     try:
 440         f = open(filename)
 441         lines = f.readlines()
 442         f.close()
 443     except IOError:
 444         return 1                       # Oh well
 445
 446     # Match def p_funcname(
 447     fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
 448     counthash = { }
 449     linen = 1
 450     noerror = 1
 451     for l in lines:
 452         m = fre.match(l)
 453         if m:
 454             name = m.group(1)
 455             prev = counthash.get(name)
 456             if not prev:
 457                 counthash[name] = linen
 458             else:
 459                 sys.stderr.write("%s:%d: Function %s redefined. Previously defined on line %d\n" % (filename,linen,name,prev))
 460                 noerror = 0
 461         linen += 1
 462     return noerror
 463
 464 # This function looks for functions that might be grammar rules, but which don't have the proper p_suffix.
 465 def validate_dict(d):
 466     for n,v in d.items():
 467         if n[0:2] == 'p_' and type(v) in (types.FunctionType, types.MethodType): continue
 468         if n[0:2] == 't_': continue
 469
 470         if n[0:2] == 'p_':
 471             sys.stderr.write("yacc: Warning. '%s' not defined as a function\n" % n)
 472         if 1 and isinstance(v,types.FunctionType) and v.func_code.co_argcount == 1:
 473             try:
 474                 doc = v.__doc__.split(" ")
 475                 if doc[1] == ':':
 476                     sys.stderr.write("%s:%d: Warning. Possible grammar rule '%s' defined without p_ prefix.\n" % (v.func_code.co_filename, v.func_code.co_firstlineno,n))
 477             except StandardError:
 478                 pass
 479
 480 # -----------------------------------------------------------------------------
 481 #                           === GRAMMAR FUNCTIONS ===
 482 #
 483 # The following global variables and functions are used to store, manipulate,
 484 # and verify the grammar rules specified by the user.
 485 # -----------------------------------------------------------------------------
 486
 487 # Initialize all of the global variables used during grammar construction
 488 def initialize_vars():
 489     global Productions, Prodnames, Prodmap, Terminals
 490     global Nonterminals, First, Follow, Precedence, LRitems
 491     global Errorfunc, Signature, Requires
 492
 493     Productions  = [None]  # A list of all of the productions.  The first
 494                            # entry is always reserved for the purpose of
 495                            # building an augmented grammar
 496
 497     Prodnames    = { }     # A dictionary mapping the names of nonterminals to a list of all
 498                            # productions of that nonterminal.
 499
 500     Prodmap      = { }     # A dictionary that is only used to detect duplicate
 501                            # productions.
 502
 503     Terminals    = { }     # A dictionary mapping the names of terminal symbols to a
 504                            # list of the rules where they are used.
 505
 506     Nonterminals = { }     # A dictionary mapping names of nonterminals to a list
 507                            # of rule numbers where they are used.
 508
 509     First        = { }     # A dictionary of precomputed FIRST(x) symbols
 510
 511     Follow       = { }     # A dictionary of precomputed FOLLOW(x) symbols
 512
 513     Precedence   = { }     # Precedence rules for each terminal. Contains tuples of the
 514                            # form ('right',level) or ('nonassoc', level) or ('left',level)
 515
 516     LRitems      = [ ]     # A list of all LR items for the grammar.  These are the
 517                            # productions with the "dot" like E -> E . PLUS E
 518
 519     Errorfunc    = None    # User defined error handler
 520
 521     Signature    = md5.new()   # Digital signature of the grammar rules, precedence
 522                                # and other information.  Used to determined when a
 523                                # parsing table needs to be regenerated.
 524
 525     Requires     = { }     # Requires list
 526
 527     # File objects used when creating the parser.out debugging file
 528     global _vf, _vfc
 529     _vf           = cStringIO.StringIO()
 530     _vfc          = cStringIO.StringIO()
 531
 532 # -----------------------------------------------------------------------------
 533 # class Production:
 534 #
 535 # This class stores the raw information about a single production or grammar rule.
 536 # It has a few required attributes:
 537 #
 538 #       name     - Name of the production (nonterminal)
 539 #       prod     - A list of symbols making up its production
 540 #       number   - Production number.
 541 #
 542 # In addition, a few additional attributes are used to help with debugging or
 543 # optimization of table generation.
 544 #
 545 #       file     - File where production action is defined.
 546 #       lineno   - Line number where action is defined
 547 #       func     - Action function
 548 #       prec     - Precedence level
 549 #       lr_next  - Next LR item. Example, if we are ' E -> E . PLUS E'
 550 #                  then lr_next refers to 'E -> E PLUS . E'
 551 #       lr_index - LR item index (location of the ".") in the prod list.
 552 #       lookaheads - LALR lookahead symbols for this item
 553 #       len      - Length of the production (number of symbols on right hand side)
 554 # -----------------------------------------------------------------------------
 555
 556 class Production:
 557     def __init__(self,**kw):
 558         for k,v in kw.items():
 559             setattr(self,k,v)
 560         self.lr_index = -1
 561         self.lr0_added = 0    # Flag indicating whether or not added to LR0 closure
 562         self.lr1_added = 0    # Flag indicating whether or not added to LR1
 563         self.usyms = [ ]
 564         self.lookaheads = { }
 565         self.lk_added = { }
 566         self.setnumbers = [ ]
 567
 568     def __str__(self):
 569         if self.prod:
 570             s = "%s -> %s" % (self.name," ".join(self.prod))
 571         else:
 572             s = "%s -> <empty>" % self.name
 573         return s
 574
 575     def __repr__(self):
 576         return str(self)
 577
 578     # Compute lr_items from the production
 579     def lr_item(self,n):
 580         if n > len(self.prod): return None
 581         p = Production()
 582         p.name = self.name
 583         p.prod = list(self.prod)
 584         p.number = self.number
 585         p.lr_index = n
 586         p.lookaheads = { }
 587         p.setnumbers = self.setnumbers
 588         p.prod.insert(n,".")
 589         p.prod = tuple(p.prod)
 590         p.len = len(p.prod)
 591         p.usyms = self.usyms
 592
 593         # Precompute list of productions immediately following
 594         try:
 595             p.lrafter = Prodnames[p.prod[n+1]]
 596         except (IndexError,KeyError),e:
 597             p.lrafter = []
 598         try:
 599             p.lrbefore = p.prod[n-1]
 600         except IndexError:
 601             p.lrbefore = None
 602
 603         return p
 604
 605 class MiniProduction:
 606     pass
 607
 608 # regex matching identifiers
 609 _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
 610
 611 # -----------------------------------------------------------------------------
 612 # add_production()
 613 #
 614 # Given an action function, this function assembles a production rule.
 615 # The production rule is assumed to be found in the function's docstring.
 616 # This rule has the general syntax:
 617 #
 618 #              name1 ::= production1
 619 #                     |  production2
 620 #                     |  production3
 621 #                    ...
 622 #                     |  productionn
 623 #              name2 ::= production1
 624 #                     |  production2
 625 #                    ...
 626 # -----------------------------------------------------------------------------
 627
 628 def add_production(f,file,line,prodname,syms):
 629
 630     if Terminals.has_key(prodname):
 631         sys.stderr.write("%s:%d: Illegal rule name '%s'. Already defined as a token.\n" % (file,line,prodname))
 632         return -1
 633     if prodname == 'error':
 634         sys.stderr.write("%s:%d: Illegal rule name '%s'. error is a reserved word.\n" % (file,line,prodname))
 635         return -1
 636
 637     if not _is_identifier.match(prodname):
 638         sys.stderr.write("%s:%d: Illegal rule name '%s'\n" % (file,line,prodname))
 639         return -1
 640
 641     for x in range(len(syms)):
 642         s = syms[x]
 643         if s[0] in "'\"":
 644              try:
 645                  c = eval(s)
 646                  if (len(c) > 1):
 647                       sys.stderr.write("%s:%d: Literal token %s in rule '%s' may only be a single character\n" % (file,line,s, prodname))
 648                       return -1
 649                  if not Terminals.has_key(c):
 650                       Terminals[c] = []
 651                  syms[x] = c
 652                  continue
 653              except SyntaxError:
 654                  pass
 655         if not _is_identifier.match(s) and s != '%prec':
 656             sys.stderr.write("%s:%d: Illegal name '%s' in rule '%s'\n" % (file,line,s, prodname))
 657             return -1
 658
 659     # See if the rule is already in the rulemap
 660     map = "%s -> %s" % (prodname,syms)
 661     if Prodmap.has_key(map):
 662         m = Prodmap[map]
 663         sys.stderr.write("%s:%d: Duplicate rule %s.\n" % (file,line, m))
 664         sys.stderr.write("%s:%d: Previous definition at %s:%d\n" % (file,line, m.file, m.line))
 665         return -1
 666
 667     p = Production()
 668     p.name = prodname
 669     p.prod = syms
 670     p.file = file
 671     p.line = line
 672     p.func = f
 673     p.number = len(Productions)
 674
 675
 676     Productions.append(p)
 677     Prodmap[map] = p
 678     if not Nonterminals.has_key(prodname):
 679         Nonterminals[prodname] = [ ]
 680
 681     # Add all terminals to Terminals
 682     i = 0
 683     while i < len(p.prod):
 684         t = p.prod[i]
 685         if t == '%prec':
 686             try:
 687                 precname = p.prod[i+1]
 688             except IndexError:
 689                 sys.stderr.write("%s:%d: Syntax error. Nothing follows %%prec.\n" % (p.file,p.line))
 690                 return -1
 691
 692             prec = Precedence.get(precname,None)
 693             if not prec:
 694                 sys.stderr.write("%s:%d: Nothing known about the precedence of '%s'\n" % (p.file,p.line,precname))
 695                 return -1
 696             else:
 697                 p.prec = prec
 698             del p.prod[i]
 699             del p.prod[i]
 700             continue
 701
 702         if Terminals.has_key(t):
 703             Terminals[t].append(p.number)
 704             # Is a terminal.  We'll assign a precedence to p based on this
 705             if not hasattr(p,"prec"):
 706                 p.prec = Precedence.get(t,('right',0))
 707         else:
 708             if not Nonterminals.has_key(t):
 709                 Nonterminals[t] = [ ]
 710             Nonterminals[t].append(p.number)
 711         i += 1
 712
 713     if not hasattr(p,"prec"):
 714         p.prec = ('right',0)
 715
 716     # Set final length of productions
 717     p.len  = len(p.prod)
 718     p.prod = tuple(p.prod)
 719
 720     # Calculate unique syms in the production
 721     p.usyms = [ ]
 722     for s in p.prod:
 723         if s not in p.usyms:
 724             p.usyms.append(s)
 725
 726     # Add to the global productions list
 727     try:
 728         Prodnames[p.name].append(p)
 729     except KeyError:
 730         Prodnames[p.name] = [ p ]
 731     return 0
 732
 733 # Given a raw rule function, this function rips out its doc string
 734 # and adds rules to the grammar
 735
 736 def add_function(f):
 737     line = f.func_code.co_firstlineno
 738     file = f.func_code.co_filename
 739     error = 0
 740
 741     if isinstance(f,types.MethodType):
 742         reqdargs = 2
 743     else:
 744         reqdargs = 1
 745
 746     if f.func_code.co_argcount > reqdargs:
 747         sys.stderr.write("%s:%d: Rule '%s' has too many arguments.\n" % (file,line,f.__name__))
 748         return -1
 749
 750     if f.func_code.co_argcount < reqdargs:
 751         sys.stderr.write("%s:%d: Rule '%s' requires an argument.\n" % (file,line,f.__name__))
 752         return -1
 753
 754     if f.__doc__:
 755         # Split the doc string into lines
 756         pstrings = f.__doc__.splitlines()
 757         lastp = None
 758         dline = line
 759         for ps in pstrings:
 760             dline += 1
 761             p = ps.split()
 762             if not p: continue
 763             try:
 764                 if p[0] == '|':
 765                     # This is a continuation of a previous rule
 766                     if not lastp:
 767                         sys.stderr.write("%s:%d: Misplaced '|'.\n" % (file,dline))
 768                         return -1
 769                     prodname = lastp
 770                     if len(p) > 1:
 771                         syms = p[1:]
 772                     else:
 773                         syms = [ ]
 774                 else:
 775                     prodname = p[0]
 776                     lastp = prodname
 777                     assign = p[1]
 778                     if len(p) > 2:
 779                         syms = p[2:]
 780                     else:
 781                         syms = [ ]
 782                     if assign != ':' and assign != '::=':
 783                         sys.stderr.write("%s:%d: Syntax error. Expected ':'\n" % (file,dline))
 784                         return -1
 785
 786
 787                 e = add_production(f,file,dline,prodname,syms)
 788                 error += e
 789
 790
 791             except StandardError:
 792                 sys.stderr.write("%s:%d: Syntax error in rule '%s'\n" % (file,dline,ps))
 793                 error -= 1
 794     else:
 795         sys.stderr.write("%s:%d: No documentation string specified in function '%s'\n" % (file,line,f.__name__))
 796     return error
 797
 798
 799 # Cycle checking code (Michael Dyck)
 800
 801 def compute_reachable():
 802     '''
 803     Find each symbol that can be reached from the start symbol.
 804     Print a warning for any nonterminals that can't be reached.
 805     (Unused terminals have already had their warning.)
 806     '''
 807     Reachable = { }
 808     for s in Terminals.keys() + Nonterminals.keys():
 809         Reachable[s] = 0
 810
 811     mark_reachable_from( Productions[0].prod[0], Reachable )
 812
 813     for s in Nonterminals.keys():
 814         if not Reachable[s]:
 815             sys.stderr.write("yacc: Symbol '%s' is unreachable.\n" % s)
 816
 817 def mark_reachable_from(s, Reachable):
 818     '''
 819     Mark all symbols that are reachable from symbol s.
 820     '''
 821     if Reachable[s]:
 822         # We've already reached symbol s.
 823         return
 824     Reachable[s] = 1
 825     for p in Prodnames.get(s,[]):
 826         for r in p.prod:
 827             mark_reachable_from(r, Reachable)
 828
 829 # -----------------------------------------------------------------------------
 830 # compute_terminates()
 831 #
 832 # This function looks at the various parsing rules and tries to detect
 833 # infinite recursion cycles (grammar rules where there is no possible way
 834 # to derive a string of only terminals).
 835 # -----------------------------------------------------------------------------
 836 def compute_terminates():
 837     '''
 838     Raise an error for any symbols that don't terminate.
 839     '''
 840     Terminates = {}
 841
 842     # Terminals:
 843     for t in Terminals.keys():
 844         Terminates[t] = 1
 845
 846     Terminates['$end'] = 1
 847
 848     # Nonterminals:
 849
 850     # Initialize to false:
 851     for n in Nonterminals.keys():
 852         Terminates[n] = 0
 853
 854     # Then propagate termination until no change:
 855     while 1:
 856         some_change = 0
 857         for (n,pl) in Prodnames.items():
 858             # Nonterminal n terminates iff any of its productions terminates.
 859             for p in pl:
 860                 # Production p terminates iff all of its rhs symbols terminate.
 861                 for s in p.prod:
 862                     if not Terminates[s]:
 863                         # The symbol s does not terminate,
 864                         # so production p does not terminate.
 865                         p_terminates = 0
 866                         break
 867                 else:
 868                     # didn't break from the loop,
 869                     # so every symbol s terminates
 870                     # so production p terminates.
 871                     p_terminates = 1
 872
 873                 if p_terminates:
 874                     # symbol n terminates!
 875                     if not Terminates[n]:
 876                         Terminates[n] = 1
 877                         some_change = 1
 878                     # Don't need to consider any more productions for this n.
 879                     break
 880
 881         if not some_change:
 882             break
 883
 884     some_error = 0
 885     for (s,terminates) in Terminates.items():
 886         if not terminates:
 887             if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error':
 888                 # s is used-but-not-defined, and we've already warned of that,
 889                 # so it would be overkill to say that it's also non-terminating.
 890                 pass
 891             else:
 892                 sys.stderr.write("yacc: Infinite recursion detected for symbol '%s'.\n" % s)
 893                 some_error = 1
 894
 895     return some_error
 896
 897 # -----------------------------------------------------------------------------
 898 # verify_productions()
 899 #
 900 # This function examines all of the supplied rules to see if they seem valid.
 901 # -----------------------------------------------------------------------------
 902 def verify_productions(cycle_check=1):
 903     error = 0
 904     for p in Productions:
 905         if not p: continue
 906
 907         for s in p.prod:
 908             if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error':
 909                 sys.stderr.write("%s:%d: Symbol '%s' used, but not defined as a token or a rule.\n" % (p.file,p.line,s))
 910                 error = 1
 911                 continue
 912
 913     unused_tok = 0
 914     # Now verify all of the tokens
 915     if yaccdebug:
 916         _vf.write("Unused terminals:\n\n")
 917     for s,v in Terminals.items():
 918         if s != 'error' and not v:
 919             sys.stderr.write("yacc: Warning. Token '%s' defined, but not used.\n" % s)
 920             if yaccdebug: _vf.write("   %s\n"% s)
 921             unused_tok += 1
 922
 923     # Print out all of the productions
 924     if yaccdebug:
 925         _vf.write("\nGrammar\n\n")
 926         for i in range(1,len(Productions)):
 927             _vf.write("Rule %-5d %s\n" % (i, Productions[i]))
 928
 929     unused_prod = 0
 930     # Verify the use of all productions
 931     for s,v in Nonterminals.items():
 932         if not v:
 933             p = Prodnames[s][0]
 934             sys.stderr.write("%s:%d: Warning. Rule '%s' defined, but not used.\n" % (p.file,p.line, s))
 935             unused_prod += 1
 936
 937
 938     if unused_tok == 1:
 939         sys.stderr.write("yacc: Warning. There is 1 unused token.\n")
 940     if unused_tok > 1:
 941         sys.stderr.write("yacc: Warning. There are %d unused tokens.\n" % unused_tok)
 942
 943     if unused_prod == 1:
 944         sys.stderr.write("yacc: Warning. There is 1 unused rule.\n")
 945     if unused_prod > 1:
 946         sys.stderr.write("yacc: Warning. There are %d unused rules.\n" % unused_prod)
 947
 948     if yaccdebug:
 949         _vf.write("\nTerminals, with rules where they appear\n\n")
 950         ks = Terminals.keys()
 951         ks.sort()
 952         for k in ks:
 953             _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Terminals[k]])))
 954         _vf.write("\nNonterminals, with rules where they appear\n\n")
 955         ks = Nonterminals.keys()
 956         ks.sort()
 957         for k in ks:
 958             _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Nonterminals[k]])))
 959
 960     if (cycle_check):
 961         compute_reachable()
 962         error += compute_terminates()
 963 #        error += check_cycles()
 964     return error
 965
 966 # -----------------------------------------------------------------------------
 967 # build_lritems()
 968 #
 969 # This function walks the list of productions and builds a complete set of the
 970 # LR items.  The LR items are stored in two ways:  First, they are uniquely
 971 # numbered and placed in the list _lritems.  Second, a linked list of LR items
 972 # is built for each production.  For example:
 973 #
 974 #   E -> E PLUS E
 975 #
 976 # Creates the list
 977 #
 978 #  [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
 979 # -----------------------------------------------------------------------------
 980
 981 def build_lritems():
 982     for p in Productions:
 983         lastlri = p
 984         lri = p.lr_item(0)
 985         i = 0
 986         while 1:
 987             lri = p.lr_item(i)
 988             lastlri.lr_next = lri
 989             if not lri: break
 990             lri.lr_num = len(LRitems)
 991             LRitems.append(lri)
 992             lastlri = lri
 993             i += 1
 994
 995     # In order for the rest of the parser generator to work, we need to
 996     # guarantee that no more lritems are generated.  Therefore, we nuke
 997     # the p.lr_item method.  (Only used in debugging)
 998     # Production.lr_item = None
 999
1000 # -----------------------------------------------------------------------------
1001 # add_precedence()
1002 #
1003 # Given a list of precedence rules, add to the precedence table.
1004 # -----------------------------------------------------------------------------
1005
1006 def add_precedence(plist):
1007     plevel = 0
1008     error = 0
1009     for p in plist:
1010         plevel += 1
1011         try:
1012             prec = p[0]
1013             terms = p[1:]
1014             if prec != 'left' and prec != 'right' and prec != 'nonassoc':
1015                 sys.stderr.write("yacc: Invalid precedence '%s'\n" % prec)
1016                 return -1
1017             for t in terms:
1018                 if Precedence.has_key(t):
1019                     sys.stderr.write("yacc: Precedence already specified for terminal '%s'\n" % t)
1020                     error += 1
1021                     continue
1022                 Precedence[t] = (prec,plevel)
1023         except:
1024             sys.stderr.write("yacc: Invalid precedence table.\n")
1025             error += 1
1026
1027     return error
1028
1029 # -----------------------------------------------------------------------------
1030 # augment_grammar()
1031 #
1032 # Compute the augmented grammar.  This is just a rule S' -> start where start
1033 # is the starting symbol.
1034 # -----------------------------------------------------------------------------
1035
1036 def augment_grammar(start=None):
1037     if not start:
1038         start = Productions[1].name
1039     Productions[0] = Production(name="S'",prod=[start],number=0,len=1,prec=('right',0),func=None)
1040     Productions[0].usyms = [ start ]
1041     Nonterminals[start].append(0)
1042
1043
1044 # -------------------------------------------------------------------------
1045 # first()
1046 #
1047 # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
1048 #
1049 # During execution of compute_first1, the result may be incomplete.
1050 # Afterward (e.g., when called from compute_follow()), it will be complete.
1051 # -------------------------------------------------------------------------
1052 def first(beta):
1053
1054     # We are computing First(x1,x2,x3,...,xn)
1055     result = [ ]
1056     for x in beta:
1057         x_produces_empty = 0
1058
1059         # Add all the non-<empty> symbols of First[x] to the result.
1060         for f in First[x]:
1061             if f == '<empty>':
1062                 x_produces_empty = 1
1063             else:
1064                 if f not in result: result.append(f)
1065
1066         if x_produces_empty:
1067             # We have to consider the next x in beta,
1068             # i.e. stay in the loop.
1069             pass
1070         else:
1071             # We don't have to consider any further symbols in beta.
1072             break
1073     else:
1074         # There was no 'break' from the loop,
1075         # so x_produces_empty was true for all x in beta,
1076         # so beta produces empty as well.
1077         result.append('<empty>')
1078
1079     return result
1080
1081
1082 # FOLLOW(x)
1083 # Given a non-terminal.  This function computes the set of all symbols
1084 # that might follow it.  Dragon book, p. 189.
1085
1086 def compute_follow(start=None):
1087     # Add '$end' to the follow list of the start symbol
1088     for k in Nonterminals.keys():
1089         Follow[k] = [ ]
1090
1091     if not start:
1092         start = Productions[1].name
1093
1094     Follow[start] = [ '$end' ]
1095
1096     while 1:
1097         didadd = 0
1098         for p in Productions[1:]:
1099             # Here is the production set
1100             for i in range(len(p.prod)):
1101                 B = p.prod[i]
1102                 if Nonterminals.has_key(B):
1103                     # Okay. We got a non-terminal in a production
1104                     fst = first(p.prod[i+1:])
1105                     hasempty = 0
1106                     for f in fst:
1107                         if f != '<empty>' and f not in Follow[B]:
1108                             Follow[B].append(f)
1109                             didadd = 1
1110                         if f == '<empty>':
1111                             hasempty = 1
1112                     if hasempty or i == (len(p.prod)-1):
1113                         # Add elements of follow(a) to follow(b)
1114                         for f in Follow[p.name]:
1115                             if f not in Follow[B]:
1116                                 Follow[B].append(f)
1117                                 didadd = 1
1118         if not didadd: break
1119
1120     if 0 and yaccdebug:
1121         _vf.write('\nFollow:\n')
1122         for k in Nonterminals.keys():
1123             _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Follow[k]])))
1124
1125 # -------------------------------------------------------------------------
1126 # compute_first1()
1127 #
1128 # Compute the value of FIRST1(X) for all symbols
1129 # -------------------------------------------------------------------------
1130 def compute_first1():
1131
1132     # Terminals:
1133     for t in Terminals.keys():
1134         First[t] = [t]
1135
1136     First['$end'] = ['$end']
1137     First['#'] = ['#'] # what's this for?
1138
1139     # Nonterminals:
1140
1141     # Initialize to the empty set:
1142     for n in Nonterminals.keys():
1143         First[n] = []
1144
1145     # Then propagate symbols until no change:
1146     while 1:
1147         some_change = 0
1148         for n in Nonterminals.keys():
1149             for p in Prodnames[n]:
1150                 for f in first(p.prod):
1151                     if f not in First[n]:
1152                         First[n].append( f )
1153                         some_change = 1
1154         if not some_change:
1155             break
1156
1157     if 0 and yaccdebug:
1158         _vf.write('\nFirst:\n')
1159         for k in Nonterminals.keys():
1160             _vf.write("%-20s : %s\n" %
1161                 (k, " ".join([str(s) for s in First[k]])))
1162
1163 # -----------------------------------------------------------------------------
1164 #                           === SLR Generation ===
1165 #
1166 # The following functions are used to construct SLR (Simple LR) parsing tables
1167 # as described on p.221-229 of the dragon book.
1168 # -----------------------------------------------------------------------------
1169
1170 # Global variables for the LR parsing engine
1171 def lr_init_vars():
1172     global _lr_action, _lr_goto, _lr_method
1173     global _lr_goto_cache, _lr0_cidhash
1174
1175     _lr_action       = { }        # Action table
1176     _lr_goto         = { }        # Goto table
1177     _lr_method       = "Unknown"  # LR method used
1178     _lr_goto_cache   = { }
1179     _lr0_cidhash     = { }
1180
1181
1182 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
1183 # prodlist is a list of productions.
1184
1185 _add_count = 0       # Counter used to detect cycles
1186
1187 def lr0_closure(I):
1188     global _add_count
1189
1190     _add_count += 1
1191     prodlist = Productions
1192
1193     # Add everything in I to J
1194     J = I[:]
1195     didadd = 1
1196     while didadd:
1197         didadd = 0
1198         for j in J:
1199             for x in j.lrafter:
1200                 if x.lr0_added == _add_count: continue
1201                 # Add B --> .G to J
1202                 J.append(x.lr_next)
1203                 x.lr0_added = _add_count
1204                 didadd = 1
1205
1206     return J
1207
1208 # Compute the LR(0) goto function goto(I,X) where I is a set
1209 # of LR(0) items and X is a grammar symbol.   This function is written
1210 # in a way that guarantees uniqueness of the generated goto sets
1211 # (i.e. the same goto set will never be returned as two different Python
1212 # objects).  With uniqueness, we can later do fast set comparisons using
1213 # id(obj) instead of element-wise comparison.
1214
1215 def lr0_goto(I,x):
1216     # First we look for a previously cached entry
1217     g = _lr_goto_cache.get((id(I),x),None)
1218     if g: return g
1219
1220     # Now we generate the goto set in a way that guarantees uniqueness
1221     # of the result
1222
1223     s = _lr_goto_cache.get(x,None)
1224     if not s:
1225         s = { }
1226         _lr_goto_cache[x] = s
1227
1228     gs = [ ]
1229     for p in I:
1230         n = p.lr_next
1231         if n and n.lrbefore == x:
1232             s1 = s.get(id(n),None)
1233             if not s1:
1234                 s1 = { }
1235                 s[id(n)] = s1
1236             gs.append(n)
1237             s = s1
1238     g = s.get('$end',None)
1239     if not g:
1240         if gs:
1241             g = lr0_closure(gs)
1242             s['$end'] = g
1243         else:
1244             s['$end'] = gs
1245     _lr_goto_cache[(id(I),x)] = g
1246     return g
1247
1248 _lr0_cidhash = { }
1249
1250 # Compute the LR(0) sets of item function
1251 def lr0_items():
1252
1253     C = [ lr0_closure([Productions[0].lr_next]) ]
1254     i = 0
1255     for I in C:
1256         _lr0_cidhash[id(I)] = i
1257         i += 1
1258
1259     # Loop over the items in C and each grammar symbols
1260     i = 0
1261     while i < len(C):
1262         I = C[i]
1263         i += 1
1264
1265         # Collect all of the symbols that could possibly be in the goto(I,X) sets
1266         asyms = { }
1267         for ii in I:
1268             for s in ii.usyms:
1269                 asyms[s] = None
1270
1271         for x in asyms.keys():
1272             g = lr0_goto(I,x)
1273             if not g:  continue
1274             if _lr0_cidhash.has_key(id(g)): continue
1275             _lr0_cidhash[id(g)] = len(C)
1276             C.append(g)
1277
1278     return C
1279
1280 # -----------------------------------------------------------------------------
1281 #                       ==== LALR(1) Parsing ====
1282 #
1283 # LALR(1) parsing is almost exactly the same as SLR except that instead of
1284 # relying upon Follow() sets when performing reductions, a more selective
1285 # lookahead set that incorporates the state of the LR(0) machine is utilized.
1286 # Thus, we mainly just have to focus on calculating the lookahead sets.
1287 #
1288 # The method used here is due to DeRemer and Pennelo (1982).
1289 #
1290 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
1291 #     Lookahead Sets", ACM Transactions on Programming Languages and Systems,
1292 #     Vol. 4, No. 4, Oct. 1982, pp. 615-649
1293 #
1294 # Further details can also be found in:
1295 #
1296 #  J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
1297 #      McGraw-Hill Book Company, (1985).
1298 #
1299 # Note:  This implementation is a complete replacement of the LALR(1)
1300 #        implementation in PLY-1.x releases.   That version was based on
1301 #        a less efficient algorithm and it had bugs in its implementation.
1302 # -----------------------------------------------------------------------------
1303
1304 # -----------------------------------------------------------------------------
1305 # compute_nullable_nonterminals()
1306 #
1307 # Creates a dictionary containing all of the non-terminals that might produce
1308 # an empty production.
1309 # -----------------------------------------------------------------------------
1310
1311 def compute_nullable_nonterminals():
1312     nullable = {}
1313     num_nullable = 0
1314     while 1:
1315        for p in Productions[1:]:
1316            if p.len == 0:
1317                 nullable[p.name] = 1
1318                 continue
1319            for t in p.prod:
1320                 if not nullable.has_key(t): break
1321            else:
1322                 nullable[p.name] = 1
1323        if len(nullable) == num_nullable: break
1324        num_nullable = len(nullable)
1325     return nullable
1326
1327 # -----------------------------------------------------------------------------
1328 # find_nonterminal_trans(C)
1329 #
1330 # Given a set of LR(0) items, this functions finds all of the non-terminal
1331 # transitions.    These are transitions in which a dot appears immediately before
1332 # a non-terminal.   Returns a list of tuples of the form (state,N) where state
1333 # is the state number and N is the nonterminal symbol.
1334 #
1335 # The input C is the set of LR(0) items.
1336 # -----------------------------------------------------------------------------
1337
1338 def find_nonterminal_transitions(C):
1339      trans = []
1340      for state in range(len(C)):
1341          for p in C[state]:
1342              if p.lr_index < p.len - 1:
1343                   t = (state,p.prod[p.lr_index+1])
1344                   if Nonterminals.has_key(t[1]):
1345                         if t not in trans: trans.append(t)
1346          state = state + 1
1347      return trans
1348
1349 # -----------------------------------------------------------------------------
1350 # dr_relation()
1351 #
1352 # Computes the DR(p,A) relationships for non-terminal transitions.  The input
1353 # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
1354 #
1355 # Returns a list of terminals.
1356 # -----------------------------------------------------------------------------
1357
1358 def dr_relation(C,trans,nullable):
1359     dr_set = { }
1360     state,N = trans
1361     terms = []
1362
1363     g = lr0_goto(C[state],N)
1364     for p in g:
1365        if p.lr_index < p.len - 1:
1366            a = p.prod[p.lr_index+1]
1367            if Terminals.has_key(a):
1368                if a not in terms: terms.append(a)
1369
1370     # This extra bit is to handle the start state
1371     if state == 0 and N == Productions[0].prod[0]:
1372        terms.append('$end')
1373
1374     return terms
1375
1376 # -----------------------------------------------------------------------------
1377 # reads_relation()
1378 #
1379 # Computes the READS() relation (p,A) READS (t,C).
1380 # -----------------------------------------------------------------------------
1381
1382 def reads_relation(C, trans, empty):
1383     # Look for empty transitions
1384     rel = []
1385     state, N = trans
1386
1387     g = lr0_goto(C[state],N)
1388     j = _lr0_cidhash.get(id(g),-1)
1389     for p in g:
1390         if p.lr_index < p.len - 1:
1391              a = p.prod[p.lr_index + 1]
1392              if empty.has_key(a):
1393                   rel.append((j,a))
1394
1395     return rel
1396
1397 # -----------------------------------------------------------------------------
1398 # compute_lookback_includes()
1399 #
1400 # Determines the lookback and includes relations
1401 #
1402 # LOOKBACK:
1403 #
1404 # This relation is determined by running the LR(0) state machine forward.
1405 # For example, starting with a production "N : . A B C", we run it forward
1406 # to obtain "N : A B C ."   We then build a relationship between this final
1407 # state and the starting state.   These relationships are stored in a dictionary
1408 # lookdict.
1409 #
1410 # INCLUDES:
1411 #
1412 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
1413 #
1414 # This relation is used to determine non-terminal transitions that occur
1415 # inside of other non-terminal transition states.   (p,A) INCLUDES (p', B)
1416 # if the following holds:
1417 #
1418 #       B -> LAT, where T -> epsilon and p' -L-> p
1419 #
1420 # L is essentially a prefix (which may be empty), T is a suffix that must be
1421 # able to derive an empty string.  State p' must lead to state p with the string L.
1422 #
1423 # -----------------------------------------------------------------------------
1424
1425 def compute_lookback_includes(C,trans,nullable):
1426
1427     lookdict = {}          # Dictionary of lookback relations
1428     includedict = {}       # Dictionary of include relations
1429
1430     # Make a dictionary of non-terminal transitions
1431     dtrans = {}
1432     for t in trans:
1433         dtrans[t] = 1
1434
1435     # Loop over all transitions and compute lookbacks and includes
1436     for state,N in trans:
1437         lookb = []
1438         includes = []
1439         for p in C[state]:
1440             if p.name != N: continue
1441
1442             # Okay, we have a name match.  We now follow the production all the way
1443             # through the state machine until we get the . on the right hand side
1444
1445             lr_index = p.lr_index
1446             j = state
1447             while lr_index < p.len - 1:
1448                  lr_index = lr_index + 1
1449                  t = p.prod[lr_index]
1450
1451                  # Check to see if this symbol and state are a non-terminal transition
1452                  if dtrans.has_key((j,t)):
1453                        # Yes.  Okay, there is some chance that this is an includes relation
1454                        # the only way to know for certain is whether the rest of the
1455                        # production derives empty
1456
1457                        li = lr_index + 1
1458                        while li < p.len:
1459                             if Terminals.has_key(p.prod[li]): break      # No forget it
1460                             if not nullable.has_key(p.prod[li]): break
1461                             li = li + 1
1462                        else:
1463                             # Appears to be a relation between (j,t) and (state,N)
1464                             includes.append((j,t))
1465
1466                  g = lr0_goto(C[j],t)               # Go to next set
1467                  j = _lr0_cidhash.get(id(g),-1)     # Go to next state
1468
1469             # When we get here, j is the final state, now we have to locate the production
1470             for r in C[j]:
1471                  if r.name != p.name: continue
1472                  if r.len != p.len:   continue
1473                  i = 0
1474                  # This look is comparing a production ". A B C" with "A B C ."
1475                  while i < r.lr_index:
1476                       if r.prod[i] != p.prod[i+1]: break
1477                       i = i + 1
1478                  else:
1479                       lookb.append((j,r))
1480         for i in includes:
1481              if not includedict.has_key(i): includedict[i] = []
1482              includedict[i].append((state,N))
1483         lookdict[(state,N)] = lookb
1484
1485     return lookdict,includedict
1486
1487 # -----------------------------------------------------------------------------
1488 # digraph()
1489 # traverse()
1490 #
1491 # The following two functions are used to compute set valued functions
1492 # of the form:
1493 #
1494 #     F(x) = F'(x) U U{F(y) | x R y}
1495 #
1496 # This is used to compute the values of Read() sets as well as FOLLOW sets
1497 # in LALR(1) generation.
1498 #
1499 # Inputs:  X    - An input set
1500 #          R    - A relation
1501 #          FP   - Set-valued function
1502 # ------------------------------------------------------------------------------
1503
1504 def digraph(X,R,FP):
1505     N = { }
1506     for x in X:
1507        N[x] = 0
1508     stack = []
1509     F = { }
1510     for x in X:
1511         if N[x] == 0: traverse(x,N,stack,F,X,R,FP)
1512     return F
1513
1514 def traverse(x,N,stack,F,X,R,FP):
1515     stack.append(x)
1516     d = len(stack)
1517     N[x] = d
1518     F[x] = FP(x)             # F(X) <- F'(x)
1519
1520     rel = R(x)               # Get y's related to x
1521     for y in rel:
1522         if N[y] == 0:
1523              traverse(y,N,stack,F,X,R,FP)
1524         N[x] = min(N[x],N[y])
1525         for a in F.get(y,[]):
1526             if a not in F[x]: F[x].append(a)
1527     if N[x] == d:
1528        N[stack[-1]] = sys.maxint
1529        F[stack[-1]] = F[x]
1530        element = stack.pop()
1531        while element != x:
1532            N[stack[-1]] = sys.maxint
1533            F[stack[-1]] = F[x]
1534            element = stack.pop()
1535
1536 # -----------------------------------------------------------------------------
1537 # compute_read_sets()
1538 #
1539 # Given a set of LR(0) items, this function computes the read sets.
1540 #
1541 # Inputs:  C        =  Set of LR(0) items
1542 #          ntrans   = Set of nonterminal transitions
1543 #          nullable = Set of empty transitions
1544 #
1545 # Returns a set containing the read sets
1546 # -----------------------------------------------------------------------------
1547
1548 def compute_read_sets(C, ntrans, nullable):
1549     FP = lambda x: dr_relation(C,x,nullable)
1550     R =  lambda x: reads_relation(C,x,nullable)
1551     F = digraph(ntrans,R,FP)
1552     return F
1553
1554 # -----------------------------------------------------------------------------
1555 # compute_follow_sets()
1556 #
1557 # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
1558 # and an include set, this function computes the follow sets
1559 #
1560 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
1561 #
1562 # Inputs:
1563 #            ntrans     = Set of nonterminal transitions
1564 #            readsets   = Readset (previously computed)
1565 #            inclsets   = Include sets (previously computed)
1566 #
1567 # Returns a set containing the follow sets
1568 # -----------------------------------------------------------------------------
1569
1570 def compute_follow_sets(ntrans,readsets,inclsets):
1571      FP = lambda x: readsets[x]
1572      R  = lambda x: inclsets.get(x,[])
1573      F = digraph(ntrans,R,FP)
1574      return F
1575
1576 # -----------------------------------------------------------------------------
1577 # add_lookaheads()
1578 #
1579 # Attaches the lookahead symbols to grammar rules.
1580 #
1581 # Inputs:    lookbacks         -  Set of lookback relations
1582 #            followset         -  Computed follow set
1583 #
1584 # This function directly attaches the lookaheads to productions contained
1585 # in the lookbacks set
1586 # -----------------------------------------------------------------------------
1587
1588 def add_lookaheads(lookbacks,followset):
1589     for trans,lb in lookbacks.items():
1590         # Loop over productions in lookback
1591         for state,p in lb:
1592              if not p.lookaheads.has_key(state):
1593                   p.lookaheads[state] = []
1594              f = followset.get(trans,[])
1595              for a in f:
1596                   if a not in p.lookaheads[state]: p.lookaheads[state].append(a)
1597
1598 # -----------------------------------------------------------------------------
1599 # add_lalr_lookaheads()
1600 #
1601 # This function does all of the work of adding lookahead information for use
1602 # with LALR parsing
1603 # -----------------------------------------------------------------------------
1604
1605 def add_lalr_lookaheads(C):
1606     # Determine all of the nullable nonterminals
1607     nullable = compute_nullable_nonterminals()
1608
1609     # Find all non-terminal transitions
1610     trans = find_nonterminal_transitions(C)
1611
1612     # Compute read sets
1613     readsets = compute_read_sets(C,trans,nullable)
1614
1615     # Compute lookback/includes relations
1616     lookd, included = compute_lookback_includes(C,trans,nullable)
1617
1618     # Compute LALR FOLLOW sets
1619     followsets = compute_follow_sets(trans,readsets,included)
1620
1621     # Add all of the lookaheads
1622     add_lookaheads(lookd,followsets)
1623
1624 # -----------------------------------------------------------------------------
1625 # lr_parse_table()
1626 #
1627 # This function constructs the parse tables for SLR or LALR
1628 # -----------------------------------------------------------------------------
1629 def lr_parse_table(method):
1630     global _lr_method
1631     goto = _lr_goto           # Goto array
1632     action = _lr_action       # Action array
1633     actionp = { }             # Action production array (temporary)
1634
1635     _lr_method = method
1636
1637     n_srconflict = 0
1638     n_rrconflict = 0
1639
1640     if yaccdebug:
1641         sys.stderr.write("yacc: Generating %s parsing table...\n" % method)
1642         _vf.write("\n\nParsing method: %s\n\n" % method)
1643
1644     # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
1645     # This determines the number of states
1646
1647     C = lr0_items()
1648
1649     if method == 'LALR':
1650         add_lalr_lookaheads(C)
1651
1652
1653     # Build the parser table, state by state
1654     st = 0
1655     for I in C:
1656         # Loop over each production in I
1657         actlist = [ ]              # List of actions
1658         st_action  = { }
1659         st_actionp = { }
1660         st_goto    = { }
1661         if yaccdebug:
1662             _vf.write("\nstate %d\n\n" % st)
1663             for p in I:
1664                 _vf.write("    (%d) %s\n" % (p.number, str(p)))
1665             _vf.write("\n")
1666
1667         for p in I:
1668             try:
1669                 if p.len == p.lr_index + 1:
1670                     if p.name == "S'":
1671                         # Start symbol. Accept!
1672                         st_action["$end"] = 0
1673                         st_actionp["$end"] = p
1674                     else:
1675                         # We are at the end of a production.  Reduce!
1676                         if method == 'LALR':
1677                             laheads = p.lookaheads[st]
1678                         else:
1679                             laheads = Follow[p.name]
1680                         for a in laheads:
1681                             actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
1682                             r = st_action.get(a,None)
1683                             if r is not None:
1684                                 # Whoa. Have a shift/reduce or reduce/reduce conflict
1685                                 if r > 0:
1686                                     # Need to decide on shift or reduce here
1687                                     # By default we favor shifting. Need to add
1688                                     # some precedence rules here.
1689                                     sprec,slevel = Productions[st_actionp[a].number].prec
1690                                     rprec,rlevel = Precedence.get(a,('right',0))
1691                                     if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
1692                                         # We really need to reduce here.
1693                                         st_action[a] = -p.number
1694                                         st_actionp[a] = p
1695                                         if not slevel and not rlevel:
1696                                             _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
1697                                             _vf.write("  ! shift/reduce conflict for %s resolved as reduce.\n" % a)
1698                                             n_srconflict += 1
1699                                     elif (slevel == rlevel) and (rprec == 'nonassoc'):
1700                                         st_action[a] = None
1701                                     else:
1702                                         # Hmmm. Guess we'll keep the shift
1703                                         if not rlevel:
1704                                             _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
1705                                             _vf.write("  ! shift/reduce conflict for %s resolved as shift.\n" % a)
1706                                             n_srconflict +=1
1707                                 elif r < 0:
1708                                     # Reduce/reduce conflict.   In this case, we favor the rule
1709                                     # that was defined first in the grammar file
1710                                     oldp = Productions[-r]
1711                                     pp = Productions[p.number]
1712                                     if oldp.line > pp.line:
1713                                         st_action[a] = -p.number
1714                                         st_actionp[a] = p
1715                                     # sys.stderr.write("Reduce/reduce conflict in state %d\n" % st)
1716                                     n_rrconflict += 1
1717                                     _vfc.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st, st_actionp[a].number, st_actionp[a]))
1718                                     _vf.write("  ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a,st_actionp[a].number, st_actionp[a]))
1719                                 else:
1720                                     sys.stderr.write("Unknown conflict in state %d\n" % st)
1721                             else:
1722                                 st_action[a] = -p.number
1723                                 st_actionp[a] = p
1724                 else:
1725                     i = p.lr_index
1726                     a = p.prod[i+1]       # Get symbol right after the "."
1727                     if Terminals.has_key(a):
1728                         g = lr0_goto(I,a)
1729                         j = _lr0_cidhash.get(id(g),-1)
1730                         if j >= 0:
1731                             # We are in a shift state
1732                             actlist.append((a,p,"shift and go to state %d" % j))
1733                             r = st_action.get(a,None)
1734                             if r is not None:
1735                                 # Whoa have a shift/reduce or shift/shift conflict
1736                                 if r > 0:
1737                                     if r != j:
1738                                         sys.stderr.write("Shift/shift conflict in state %d\n" % st)
1739                                 elif r < 0:
1740                                     # Do a precedence check.
1741                                     #   -  if precedence of reduce rule is higher, we reduce.
1742                                     #   -  if precedence of reduce is same and left assoc, we reduce.
1743                                     #   -  otherwise we shift
1744                                     rprec,rlevel = Productions[st_actionp[a].number].prec
1745                                     sprec,slevel = Precedence.get(a,('right',0))
1746                                     if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')):
1747                                         # We decide to shift here... highest precedence to shift
1748                                         st_action[a] = j
1749                                         st_actionp[a] = p
1750                                         if not rlevel:
1751                                             n_srconflict += 1
1752                                             _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
1753                                             _vf.write("  ! shift/reduce conflict for %s resolved as shift.\n" % a)
1754                                     elif (slevel == rlevel) and (rprec == 'nonassoc'):
1755                                         st_action[a] = None
1756                                     else:
1757                                         # Hmmm. Guess we'll keep the reduce
1758                                         if not slevel and not rlevel:
1759                                             n_srconflict +=1
1760                                             _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
1761                                             _vf.write("  ! shift/reduce conflict for %s resolved as reduce.\n" % a)
1762
1763                                 else:
1764                                     sys.stderr.write("Unknown conflict in state %d\n" % st)
1765                             else:
1766                                 st_action[a] = j
1767                                 st_actionp[a] = p
1768
1769             except StandardError,e:
1770                print sys.exc_info()
1771                raise YaccError, "Hosed in lr_parse_table"
1772
1773         # Print the actions associated with each terminal
1774         if yaccdebug:
1775           _actprint = { }
1776           for a,p,m in actlist:
1777             if st_action.has_key(a):
1778                 if p is st_actionp[a]:
1779                     _vf.write("    %-15s %s\n" % (a,m))
1780                     _actprint[(a,m)] = 1
1781           _vf.write("\n")
1782           for a,p,m in actlist:
1783             if st_action.has_key(a):
1784                 if p is not st_actionp[a]:
1785                     if not _actprint.has_key((a,m)):
1786                         _vf.write("  ! %-15s [ %s ]\n" % (a,m))
1787                         _actprint[(a,m)] = 1
1788
1789         # Construct the goto table for this state
1790         if yaccdebug:
1791             _vf.write("\n")
1792         nkeys = { }
1793         for ii in I:
1794             for s in ii.usyms:
1795                 if Nonterminals.has_key(s):
1796                     nkeys[s] = None
1797         for n in nkeys.keys():
1798             g = lr0_goto(I,n)
1799             j = _lr0_cidhash.get(id(g),-1)
1800             if j >= 0:
1801                 st_goto[n] = j
1802                 if yaccdebug:
1803                     _vf.write("    %-30s shift and go to state %d\n" % (n,j))
1804
1805         action[st] = st_action
1806         actionp[st] = st_actionp
1807         goto[st] = st_goto
1808
1809         st += 1
1810
1811     if yaccdebug:
1812         if n_srconflict == 1:
1813             sys.stderr.write("yacc: %d shift/reduce conflict\n" % n_srconflict)
1814         if n_srconflict > 1:
1815             sys.stderr.write("yacc: %d shift/reduce conflicts\n" % n_srconflict)
1816         if n_rrconflict == 1:
1817             sys.stderr.write("yacc: %d reduce/reduce conflict\n" % n_rrconflict)
1818         if n_rrconflict > 1:
1819             sys.stderr.write("yacc: %d reduce/reduce conflicts\n" % n_rrconflict)
1820
1821 # -----------------------------------------------------------------------------
1822 #                          ==== LR Utility functions ====
1823 # -----------------------------------------------------------------------------
1824
1825 # -----------------------------------------------------------------------------
1826 # _lr_write_tables()
1827 #
1828 # This function writes the LR parsing tables to a file
1829 # -----------------------------------------------------------------------------
1830
1831 def lr_write_tables(modulename=tab_module,outputdir=''):
1832     filename = os.path.join(outputdir,modulename) + ".py"
1833     try:
1834         f = open(filename,"w")
1835
1836         f.write("""
1837 # %s
1838 # This file is automatically generated. Do not edit.
1839
1840 _lr_method = %s
1841
1842 _lr_signature = %s
1843 """ % (filename, repr(_lr_method), repr(Signature.digest())))
1844
1845         # Change smaller to 0 to go back to original tables
1846         smaller = 1
1847
1848         # Factor out names to try and make smaller
1849         if smaller:
1850             items = { }
1851
1852             for s,nd in _lr_action.items():
1853                for name,v in nd.items():
1854                   i = items.get(name)
1855                   if not i:
1856                      i = ([],[])
1857                      items[name] = i
1858                   i[0].append(s)
1859                   i[1].append(v)
1860
1861             f.write("\n_lr_action_items = {")
1862             for k,v in items.items():
1863                 f.write("%r:([" % k)
1864                 for i in v[0]:
1865                     f.write("%r," % i)
1866                 f.write("],[")
1867                 for i in v[1]:
1868                     f.write("%r," % i)
1869
1870                 f.write("]),")
1871             f.write("}\n")
1872
1873             f.write("""
1874 _lr_action = { }
1875 for _k, _v in _lr_action_items.items():
1876    for _x,_y in zip(_v[0],_v[1]):
1877       if not _lr_action.has_key(_x):  _lr_action[_x] = { }
1878       _lr_action[_x][_k] = _y
1879 del _lr_action_items
1880 """)
1881
1882         else:
1883             f.write("\n_lr_action = { ");
1884             for k,v in _lr_action.items():
1885                 f.write("(%r,%r):%r," % (k[0],k[1],v))
1886             f.write("}\n");
1887
1888         if smaller:
1889             # Factor out names to try and make smaller
1890             items = { }
1891
1892             for s,nd in _lr_goto.items():
1893                for name,v in nd.items():
1894                   i = items.get(name)
1895                   if not i:
1896                      i = ([],[])
1897                      items[name] = i
1898                   i[0].append(s)
1899                   i[1].append(v)
1900
1901             f.write("\n_lr_goto_items = {")
1902             for k,v in items.items():
1903                 f.write("%r:([" % k)
1904                 for i in v[0]:
1905                     f.write("%r," % i)
1906                 f.write("],[")
1907                 for i in v[1]:
1908                     f.write("%r," % i)
1909
1910                 f.write("]),")
1911             f.write("}\n")
1912
1913             f.write("""
1914 _lr_goto = { }
1915 for _k, _v in _lr_goto_items.items():
1916    for _x,_y in zip(_v[0],_v[1]):
1917        if not _lr_goto.has_key(_x): _lr_goto[_x] = { }
1918        _lr_goto[_x][_k] = _y
1919 del _lr_goto_items
1920 """)
1921         else:
1922             f.write("\n_lr_goto = { ");
1923             for k,v in _lr_goto.items():
1924                 f.write("(%r,%r):%r," % (k[0],k[1],v))
1925             f.write("}\n");
1926
1927         # Write production table
1928         f.write("_lr_productions = [\n")
1929         for p in Productions:
1930             if p:
1931                 if (p.func):
1932                     f.write("  (%r,%d,%r,%r,%d),\n" % (p.name, p.len, p.func.__name__,p.file,p.line))
1933                 else:
1934                     f.write("  (%r,%d,None,None,None),\n" % (p.name, p.len))
1935             else:
1936                 f.write("  None,\n")
1937         f.write("]\n")
1938
1939         f.close()
1940
1941     except IOError,e:
1942         print >>sys.stderr, "Unable to create '%s'" % filename
1943         print >>sys.stderr, e
1944         return
1945
1946 def lr_read_tables(module=tab_module,optimize=0):
1947     global _lr_action, _lr_goto, _lr_productions, _lr_method
1948     try:
1949         exec "import %s as parsetab" % module
1950
1951         if (optimize) or (Signature.digest() == parsetab._lr_signature):
1952             _lr_action = parsetab._lr_action
1953             _lr_goto   = parsetab._lr_goto
1954             _lr_productions = parsetab._lr_productions
1955             _lr_method = parsetab._lr_method
1956             return 1
1957         else:
1958             return 0
1959
1960     except (ImportError,AttributeError):
1961         return 0
1962
1963
1964 # -----------------------------------------------------------------------------
1965 # yacc(module)
1966 #
1967 # Build the parser module
1968 # -----------------------------------------------------------------------------
1969
1970 def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module, start=None, check_recursion=1, optimize=0,write_tables=1,debugfile=debug_file,outputdir=''):
1971     global yaccdebug
1972     yaccdebug = debug
1973
1974     initialize_vars()
1975     files = { }
1976     error = 0
1977
1978
1979     # Add parsing method to signature
1980     Signature.update(method)
1981
1982     # If a "module" parameter was supplied, extract its dictionary.
1983     # Note: a module may in fact be an instance as well.
1984
1985     if module:
1986         # User supplied a module object.
1987         if isinstance(module, types.ModuleType):
1988             ldict = module.__dict__
1989         elif isinstance(module, _INSTANCETYPE):
1990             _items = [(k,getattr(module,k)) for k in dir(module)]
1991             ldict = { }
1992             for i in _items:
1993                 ldict[i[0]] = i[1]
1994         else:
1995             raise ValueError,"Expected a module"
1996
1997     else:
1998         # No module given.  We might be able to get information from the caller.
1999         # Throw an exception and unwind the traceback to get the globals
2000
2001         try:
2002             raise RuntimeError
2003         except RuntimeError:
2004             e,b,t = sys.exc_info()
2005             f = t.tb_frame
2006             f = f.f_back           # Walk out to our calling function
2007             ldict = f.f_globals    # Grab its globals dictionary
2008
2009     # Add starting symbol to signature
2010     if not start:
2011         start = ldict.get("start",None)
2012     if start:
2013         Signature.update(start)
2014
2015     # If running in optimized mode.  We're going to
2016
2017     if (optimize and lr_read_tables(tabmodule,1)):
2018         # Read parse table
2019         del Productions[:]
2020         for p in _lr_productions:
2021             if not p:
2022                 Productions.append(None)
2023             else:
2024                 m = MiniProduction()
2025                 m.name = p[0]
2026                 m.len  = p[1]
2027                 m.file = p[3]
2028                 m.line = p[4]
2029                 if p[2]:
2030                     m.func = ldict[p[2]]
2031                 Productions.append(m)
2032
2033     else:
2034         # Get the tokens map
2035         if (module and isinstance(module,_INSTANCETYPE)):
2036             tokens = getattr(module,"tokens",None)
2037         else:
2038             tokens = ldict.get("tokens",None)
2039
2040         if not tokens:
2041             raise YaccError,"module does not define a list 'tokens'"
2042         if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)):
2043             raise YaccError,"tokens must be a list or tuple."
2044
2045         # Check to see if a requires dictionary is defined.
2046         requires = ldict.get("require",None)
2047         if requires:
2048             if not (isinstance(requires,types.DictType)):
2049                 raise YaccError,"require must be a dictionary."
2050
2051             for r,v in requires.items():
2052                 try:
2053                     if not (isinstance(v,types.ListType)):
2054                         raise TypeError
2055                     v1 = [x.split(".") for x in v]
2056                     Requires[r] = v1
2057                 except StandardError:
2058                     print >>sys.stderr, "Invalid specification for rule '%s' in require. Expected a list of strings" % r
2059
2060
2061         # Build the dictionary of terminals.  We a record a 0 in the
2062         # dictionary to track whether or not a terminal is actually
2063         # used in the grammar
2064
2065         if 'error' in tokens:
2066             print >>sys.stderr, "yacc: Illegal token 'error'.  Is a reserved word."
2067             raise YaccError,"Illegal token name"
2068
2069         for n in tokens:
2070             if Terminals.has_key(n):
2071                 print >>sys.stderr, "yacc: Warning. Token '%s' multiply defined." % n
2072             Terminals[n] = [ ]
2073
2074         Terminals['error'] = [ ]
2075
2076         # Get the precedence map (if any)
2077         prec = ldict.get("precedence",None)
2078         if prec:
2079             if not (isinstance(prec,types.ListType) or isinstance(prec,types.TupleType)):
2080                 raise YaccError,"precedence must be a list or tuple."
2081             add_precedence(prec)
2082             Signature.update(repr(prec))
2083
2084         for n in tokens:
2085             if not Precedence.has_key(n):
2086                 Precedence[n] = ('right',0)         # Default, right associative, 0 precedence
2087
2088         # Look for error handler
2089         ef = ldict.get('p_error',None)
2090         if ef:
2091             if isinstance(ef,types.FunctionType):
2092                 ismethod = 0
2093             elif isinstance(ef, types.MethodType):
2094                 ismethod = 1
2095             else:
2096                 raise YaccError,"'p_error' defined, but is not a function or method."
2097             eline = ef.func_code.co_firstlineno
2098             efile = ef.func_code.co_filename
2099             files[efile] = None
2100
2101             if (ef.func_code.co_argcount != 1+ismethod):
2102                 raise YaccError,"%s:%d: p_error() requires 1 argument." % (efile,eline)
2103             global Errorfunc
2104             Errorfunc = ef
2105         else:
2106             print >>sys.stderr, "yacc: Warning. no p_error() function is defined."
2107
2108         # Get the list of built-in functions with p_ prefix
2109         symbols = [ldict[f] for f in ldict.keys()
2110                if (type(ldict[f]) in (types.FunctionType, types.MethodType) and ldict[f].__name__[:2] == 'p_'
2111                    and ldict[f].__name__ != 'p_error')]
2112
2113         # Check for non-empty symbols
2114         if len(symbols) == 0:
2115             raise YaccError,"no rules of the form p_rulename are defined."
2116
2117         # Sort the symbols by line number
2118         symbols.sort(lambda x,y: cmp(x.func_code.co_firstlineno,y.func_code.co_firstlineno))
2119
2120         # Add all of the symbols to the grammar
2121         for f in symbols:
2122             if (add_function(f)) < 0:
2123                 error += 1
2124             else:
2125                 files[f.func_code.co_filename] = None
2126
2127         # Make a signature of the docstrings
2128         for f in symbols:
2129             if f.__doc__:
2130                 Signature.update(f.__doc__)
2131
2132         lr_init_vars()
2133
2134         if error:
2135             raise YaccError,"Unable to construct parser."
2136
2137         if not lr_read_tables(tabmodule):
2138
2139             # Validate files
2140             for filename in files.keys():
2141                 if not validate_file(filename):
2142                     error = 1
2143
2144             # Validate dictionary
2145             validate_dict(ldict)
2146
2147             if start and not Prodnames.has_key(start):
2148                 raise YaccError,"Bad starting symbol '%s'" % start
2149
2150             augment_grammar(start)
2151             error = verify_productions(cycle_check=check_recursion)
2152             otherfunc = [ldict[f] for f in ldict.keys()
2153                if (type(f) in (types.FunctionType,types.MethodType) and ldict[f].__name__[:2] != 'p_')]
2154
2155             if error:
2156                 raise YaccError,"Unable to construct parser."
2157
2158             build_lritems()
2159             compute_first1()
2160             compute_follow(start)
2161
2162             if method in ['SLR','LALR']:
2163                 lr_parse_table(method)
2164             else:
2165                 raise YaccError, "Unknown parsing method '%s'" % method
2166
2167             if write_tables:
2168                 lr_write_tables(tabmodule,outputdir)
2169
2170             if yaccdebug:
2171                 try:
2172                     f = open(os.path.join(outputdir,debugfile),"w")
2173                     f.write(_vfc.getvalue())
2174                     f.write("\n\n")
2175                     f.write(_vf.getvalue())
2176                     f.close()
2177                 except IOError,e:
2178                     print >>sys.stderr, "yacc: can't create '%s'" % debugfile,e
2179
2180     # Made it here.   Create a parser object and set up its internal state.
2181     # Set global parse() method to bound method of parser object.
2182
2183     p = Parser("xyzzy")
2184     p.productions = Productions
2185     p.errorfunc = Errorfunc
2186     p.action = _lr_action
2187     p.goto   = _lr_goto
2188     p.method = _lr_method
2189     p.require = Requires
2190
2191     global parse
2192     parse = p.parse
2193
2194     global parser
2195     parser = p
2196
2197     # Clean up all of the globals we created
2198     if (not optimize):
2199         yacc_cleanup()
2200     return p
2201
2202 # yacc_cleanup function.  Delete all of the global variables
2203 # used during table construction
2204
2205 def yacc_cleanup():
2206     global _lr_action, _lr_goto, _lr_method, _lr_goto_cache
2207     del _lr_action, _lr_goto, _lr_method, _lr_goto_cache
2208
2209     global Productions, Prodnames, Prodmap, Terminals
2210     global Nonterminals, First, Follow, Precedence, LRitems
2211     global Errorfunc, Signature, Requires
2212
2213     del Productions, Prodnames, Prodmap, Terminals
2214     del Nonterminals, First, Follow, Precedence, LRitems
2215     del Errorfunc, Signature, Requires
2216
2217     global _vf, _vfc
2218     del _vf, _vfc
2219
2220
2221 # Stub that raises an error if parsing is attempted without first calling yacc()
2222 def parse(*args,**kwargs):
2223     raise YaccError, "yacc: No parser built with yacc()"
2224